#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2013-2018
# This file is part of Shinken Enterprise, all rights reserved.


from collections import OrderedDict
import time
import os
import threading
import json
import subprocess
import socket
import traceback
import signal
import uuid
from threading import Timer

from shinken.log import logger
from shinkensolutions.api.synchronizer.source.abstract_module.analyzer_module import AnalyzerModule
from shinken.synchronizer.business.analyzer_controller import ANALYZER_JOB_STATUS
from shinken.synchronizer.dao.dataprovider.dataprovider_mongo import DataProviderMongo
from shinken.synchronizer.dao.helpers import split_and_strip_list, get_name_from_type
from shinken.synchronizer.dao.def_items import ITEM_STATE, ITEM_TYPE, METADATA, DEF_ITEMS
import shinken.synchronizer.dao.data_resolver as data_resolver

properties = {
    'daemons': ['synchronizer'],
    'type'   : 'server_analyzer',
}

# Global timeout for pinging elements
STANDARD_PING_TIMEOUT = 3

# Time to upload, must be a bit large. current packet is 16MB, so with a 5min timeout, means
# a minimum upload of 50KB/s
UPLOAD_TIMEOUT = 300

# Execution timeout of the local opsbro run, should not be very long
EXECUTION_TIMEOUT = 120

# Popen.subprocess.returncode give a -9 when is killed timeout)
KILL_RETURN_CODE = -9

SERVER_ANALYZER_PATH = "/var/lib/shinken/analyzer/server-analyzer.tar.gz"
EXECUTE_ANALYZER_ON_SERVER_PATH = "/var/lib/shinken/libexec/analyzer/execute_analyzer_on_server.sh"
RESULT_DIRECTORY = '/var/lib/shinken/tmp/'

# LogLevel=ERROR  => only print errors
# UserKnownHostsFile=/dev/null  StrictHostKeyChecking=no => do not ask for distant host key checking, we are a daemon, not a tty
SCP_COMMAND = '/usr/bin/sshpass -e /usr/bin/scp -o LogLevel=ERROR -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no '
SSH_COMMAND = '/usr/bin/sshpass -e /usr/bin/ssh -o LogLevel=ERROR -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no '

# Windows analyze script
WINDOWS_ANALYZE = '/var/lib/shinken/analyzer/launch_windows_analyze.sh '


# * We prefer to now harm the server with ssh CPU computation, so lower the priority of ssh
# * we make the shell a process group owner
def _sub_cmd_settings():
    os.nice(20)
    # IMPORTANT: set the first level as the process group master, as we will be able to kill all the group if timeout
    os.setsid()


def _kill_sub_process(process):
    # Use a kill process group
    os.killpg(process.pid, signal.SIGKILL)  # send the -9 signal, so expect Popen.returncode to be -9


# called by the plugin manager to get a broker
def get_instance(plugin):
    instance = ServerAnalyzerModule(plugin)
    return instance


class ServerAnalyzerModule(AnalyzerModule):
    analyzer_host_templates = None
    configuration_fields = None
    
    
    def get_analyzer_host_templates(self):
        if self.analyzer_host_templates is None:
            self.analyzer_host_templates = OrderedDict([
                ('Operating Systems',
                 {'display_name': self.syncdaemon._('analyzer.template_mapping_operating_systems'),
                  'values'      : ('windows', 'linux',)
                  }
                 ),
                ('Distributions',
                 {'display_name': self.syncdaemon._('analyzer.template_mapping_distributions'),
                  'values'      : ('debian', 'ubuntu', 'centos', 'redhat', 'fedora', 'oracle-linux', 'amazon-linux', 'windows', 'alpine', 'opensuse')
                  }
                 ),
                ('Hypervisors',
                 {'display_name': self.syncdaemon._('analyzer.template_mapping_hypervisors'),
                  'values'      : ('vmware', 'xen', 'uml', 'powervm_lx86', 'openvz', 'vserver', 'hyperv', 'docker-host', 'docker-container', 'virtualbox', 'qemu', 'kvm', 'systemz', 'ec2', 'aws',)
                  }
                 ),
                ('Misc',
                 {'display_name': self.syncdaemon._('analyzer.template_mapping_misc'),
                  'values'      : ('mysql', 'postfix', 'rabbitmq', 'mongodb', 'redis', 'nginx', 'iis', 'dns', 'domain-controller', 'asp--net', 'ftp')
                  }
                 ),
                ('Shinken',
                 {'display_name': self.syncdaemon._('analyzer.template_mapping_shinken'),
                  'values'      : ('shinken', 'shinken-enterprise', 'shinken-arbiter', 'shinken-broker', 'shinken-scheduler', 'shinken-receiver', 'shinken-synchronizer', 'shinken-reactionner', 'shinken-poller')
                  }
                 ),
            ])
        return self.analyzer_host_templates
    
    
    def get_configuration_fields(self):
        if self.configuration_fields is None:
            self.configuration_fields = OrderedDict([
                ('windows', OrderedDict([
                    ('login', {
                        'display_name': self.syncdaemon._('analyzer.conf_login'),
                        'default'     : 'administrator',
                        'protected'   : False,
                        'help'        : self.syncdaemon._('analyzer.conf_help_login_windows'),
                        'type'        : 'text',
                    }),
                    ('password', {
                        'display_name': self.syncdaemon._('analyzer.conf_password'),
                        'default'     : 'password',
                        'protected'   : True,
                        'help'        : self.syncdaemon._('analyzer.conf_help_password_windows'),
                        'type'        : 'text',
                    }),
                    ('data_login', {
                        'display_name': self.syncdaemon._('analyzer.conf_data_login_windows'),
                        'default'     : 'DOMAINUSER',
                        'protected'   : False,
                        'help'        : self.syncdaemon._('analyzer.conf_help_data_login'),
                        'type'        : 'text',
                    }),
                    ('data_password', {
                        'display_name': self.syncdaemon._('analyzer.conf_data_password_windows'),
                        'default'     : 'DOMAINPASSWORD',
                        'protected'   : False,
                        'help'        : self.syncdaemon._('analyzer.conf_help_data_password'),
                        'type'        : 'text',
                    }
                     )
                ]
                )
                 ),
                ('linux', OrderedDict([
                    ('login', {
                        'display_name': self.syncdaemon._('analyzer.conf_login'),
                        'default'     : 'root',
                        'protected'   : False,
                        'help'        : self.syncdaemon._('analyzer.conf_help_login_linux'),
                        'type'        : 'text',
                    }),
                    ('password', {
                        'display_name': self.syncdaemon._('analyzer.conf_password'),
                        'default'     : 'root',
                        'protected'   : True,
                        'help'        : self.syncdaemon._('analyzer.conf_help_password_linux'),
                        'type'        : 'text',
                    }),
                    ('ssh_key', {
                        'display_name': self.syncdaemon._('analyzer.conf_ssh_key'),
                        'default'     : '~shinken/.ssh/id_rsa',
                        'protected'   : False,
                        'help'        : self.syncdaemon._('analyzer.conf_help_ssh_key'),
                        'type'        : 'text',
                    }),
                    ('data_login', {
                        'display_name': self.syncdaemon._('analyzer.conf_data_login_linux'),
                        'default'     : 'ANALYZER_USER',
                        'protected'   : False,
                        'help'        : self.syncdaemon._('analyzer.conf_help_data_login'),
                        'type'        : 'text',
                    }),
                    ('data_password', {
                        'display_name': self.syncdaemon._('analyzer.conf_data_password_linux'),
                        'default'     : 'ANALYZER_PASSWORD',
                        'protected'   : False,
                        'help'        : self.syncdaemon._('analyzer.conf_help_data_password'),
                        'type'        : 'text',
                        
                    }),
                    ('data_ssh_key', {
                        'display_name': self.syncdaemon._('analyzer.conf_data_ssh_key'),
                        'default'     : 'ANALYZER_SSH_KEY',
                        'protected'   : False,
                        'help'        : self.syncdaemon._('analyzer.conf_help_data_ssh_key'),
                        'type'        : 'text',
                    }),
                ]
                )
                 ),
            ])
        return self.configuration_fields
    
    
    def __init__(self, modconf):
        super(ServerAnalyzerModule, self).__init__(modconf)
        self.hosts = {}
        self.hosts_lock = threading.RLock()
        
        self.provider_mongo = None
        
        # We will keep a list of the current running processes, but will be accessed
        self.current_processes_lock = threading.RLock()
        self.current_processes = []
    
    
    def _exec_timeout(self, cmd, timeout, env=None):
        process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=_sub_cmd_settings, env=env)
        
        with self.current_processes_lock:
            self.current_processes.append(process)
        
        kill_timer = Timer(timeout, _kill_sub_process, [process])
        
        try:
            kill_timer.start()
            stdout, stderr = process.communicate()
            
            # Warn: windows can give us garbage encoding...
            if isinstance(stdout, str):
                stdout = stdout.decode('utf8', 'ignore')
            if isinstance(stderr, str):
                stderr = stderr.decode('utf8', 'ignore')
            returncode = process.returncode
            
            # We can remove this process from the list
            with self.current_processes_lock:
                self.current_processes.remove(process)
            
            if returncode == KILL_RETURN_CODE:
                return 2, '', 'Error: the command did exceed %s seconds.' % timeout
            return returncode, stdout, stderr
        finally:
            kill_timer.cancel()
    
    
    # We want to get a value from our configuration, but it can be a global one, or a temporary one
    def _get_configuration_value(self, job, os_type, key):
        if job.has_job_parameter(os_type, key):
            return job.get_job_parameter(os_type, key)
        analyzer_configuration = self.get_my_configuration()
        value = analyzer_configuration[os_type].get(key, '')
        return value
    
    
    # Get info use by analyser form the orders :
    # *1) In host DATA
    # *2) In current job
    # *3) In default configuration
    def _get_host_value(self, os_name, value_name, job, host):
        _data_name = self._get_configuration_value(job, os_name, 'data_%s' % value_name)
        if _data_name:
            _data_name = _data_name.upper()
            _data_name = _data_name if _data_name.startswith('_') else '_%s' % _data_name
            if _data_name in host:
                _data_value = host[_data_name]
                if '$' in _data_value:
                    resolved_data_name = '@RESOLVED_%s' % _data_name
                    if resolved_data_name in host:
                        _data_value = host[resolved_data_name]
                    else:
                        # Here we haven't the tpl list
                        _resolve_macros = data_resolver.resolve_macros(self.syncdaemon, host, [], None, None, _data_value, None, ITEM_TYPE.HOSTS)
                        _data_value = data_resolver.expand_value(_data_value, _resolve_macros)
                        host[resolved_data_name] = _data_value
                return _data_value
        
        return self._get_configuration_value(job, os_name, value_name)
    
    
    def set_conf_enabled(self, conf_id, enabled):
        mongo_db = self.syncdaemon.mongodb_db
        dc = mongo_db.discovery_confs.find_one({'_id': conf_id})
        dc['enabled'] = (enabled == '1')
        mongo_db.discovery_confs.save(dc)
    
    
    def delete_conf(self, conf_id):
        mongo_db = self.syncdaemon.mongodb_db
        mongo_db.discovery_confs.remove({'_id': conf_id})
    
    
    def save_conf(self, conf_id, new_conf, sname):
        mongo_db = self.syncdaemon.mongodb_db
        conf = mongo_db.discovery_confs.find_one({'_id': conf_id})
        
        if conf is None:
            confs = [c['discovery_name'] for c in mongo_db.discovery_confs.find({'source_name': sname})]
            if new_conf['discovery_name'] in confs:
                return "name_already_exist"
                # return self.app._('validator.disco_already_exist') % new_conf['discovery_name']
            
            conf = {'_id'             : uuid.uuid4().hex,
                    'state'           : 'PENDING',
                    'last_scan'       : 0,
                    'synchronizer'    : '',
                    'synchronizer_tag': '',
                    'scan_number'     : 0,
                    'source_name'     : sname,
                    'last_heartbeat'  : 0}
        
        conf['discovery_name'] = new_conf['discovery_name']
        conf['iprange'] = new_conf['iprange']
        conf['scan_interval'] = new_conf['scan_interval']
        conf['notes'] = new_conf['notes']
        conf['enabled'] = new_conf['enabled']
        conf['port_range'] = new_conf['port_range']
        conf['extra_option'] = new_conf['extra_option']
        mongo_db.discovery_confs.save(conf)
        
        return "ok"
    
    
    def is_all_confs_disabled(self, sname):
        mongo_db = self.syncdaemon.mongodb_db
        all_conf = mongo_db.discovery_confs.find({'source_name': sname, 'enabled': True})
        return all_conf.count() == 0
    
    
    def get_trad_no_conf_enabled(self):
        return self.syncdaemon._('import-discovery.output_nc_no_scan_range')
    
    
    # Ping up to 3s for the asnwer
    def _is_up(self, host):
        address = host.get('address', '')
        ping_cmd = '/bin/ping -c 1 -W %d "%s"' % (STANDARD_PING_TIMEOUT, address)
        returncode, stdout, stderr = self._exec_timeout(ping_cmd, timeout=STANDARD_PING_TIMEOUT + 1)
        return returncode == 0
    
    
    # Currently: we consider as a linux if the ssh port is open
    def _is_linux(self, host):
        address = host.get('address', '')
        
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.settimeout(STANDARD_PING_TIMEOUT)
        try:
            sock.connect((address, 22))
            sock.close()
            return True
        except socket.error:
            sock.close()
            return False
    
    
    # Currently: we consider as a windows if the rdp port is open
    def _is_windows(self, host):
        address = host.get('address', '')
        
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.settimeout(STANDARD_PING_TIMEOUT)
        try:
            sock.connect((address, 445))
            sock.close()
            return True
        except socket.error:
            sock.close()
            return False
    
    
    def _init_datamanager(self):
        self.provider_mongo = DataProviderMongo(self.syncdaemon.mongodb_db, self.syncdaemon.database_cipher)
    
    
    def _load_from_datamanager(self):
        with self.hosts_lock:
            new_hosts = {}
            all_hosts = self.provider_mongo.find_items(ITEM_TYPE.HOSTS, item_state=ITEM_STATE.RAW_SOURCES, item_source=self.get_name())
            for h in all_hosts:
                new_hosts[h['_id']] = h
            self.hosts = new_hosts
            logger.info('[module-analyzer] loaded %d hosts from database' % len(self.hosts))
    
    
    def _get_result_path(self, host_uuid):
        return os.path.join(RESULT_DIRECTORY, 'result.%s.dat' % host_uuid)
    
    
    # MAIN ACCESS point for analyze a host
    # job: analyzer controller job object, where e must set our current state & results
    # host: real object from datamanager to analyze
    def analyze_host(self, job, host):
        logger.debug('[module-analyzer] Launching the server analyze of [%s] in a thread' % host.get('host_name'))
        host_uuid = host['_id']
        host_name = host['host_name']
        address = host.get('address', '')
        
        if not address:
            job.set_error(self.syncdaemon._('analyzer.no_address') % (host_uuid, host_name))
            return
        
        # First try to ping the host. If do not answer, bail out
        if not self._is_up(host):
            if host_uuid != address:
                display_host = "%s(%s-%s)" % (host_uuid, host_name, address)
            else:
                display_host = "%s" % address
            job.set_error(self.syncdaemon._('analyzer.no_ping') % display_host, ANALYZER_JOB_STATUS.ERROR_TIMEOUT)
            return
        
        is_linux = is_windows = False
        is_linux = self._is_linux(host)
        if not is_linux:
            is_windows = self._is_windows(host)
        
        logger.debug('[module-analyzer] Host %s(%s-%s) linux:%s ' % (host_uuid, host_name, address, is_linux))
        
        if is_linux:
            # logger.debug("[module-analyzer] ANALYZE LINUX: %s" % host)
            start = time.time()
            try:
                was_ok = self._linux_push_agent(job, host, address)
                if not was_ok:
                    return
                before_analysing = time.time()
                was_ok = self._linux_analysing_server(job, host, address)
                if not was_ok:
                    return
                before_back_result = time.time()
                was_ok = self._linux_get_back_result(job, host, host_uuid, address)
                if not was_ok:
                    return
            except Exception:
                job.set_error(traceback.print_exc())
                return
            result_path = self._get_result_path(host_uuid)
            before_load_result = time.time()
            self._load_host_from_json(job, result_path, host_uuid, host_name, address, host)
            end = time.time()
            logger.debug('[module-analyzer] LINUX Host %s(%s) is finish. Execution times: %.2f %.2f %.2f %.2f' % (
                host_uuid, host_name, before_analysing - start, before_back_result - before_analysing, before_load_result - before_back_result, end - before_load_result))
        elif is_windows:
            try:
                start = time.time()
                did_run = self._windows_analyze_host(job, host)
                if not did_run:
                    return
            except Exception, exp:
                job.set_error(traceback.print_exc())
                return
            result_path = self._get_result_path(host_uuid)
            self._load_host_from_json(job, result_path, host_uuid, host_name, address, host)
            end = time.time()
            logger.debug('[module-analyzer] WINDOWS Host %s(%s) is finish. Execution times: %.2f ' % (host_uuid, host_name, end - start))
        else:
            if host_uuid != address:
                display_host = "%s(%s-%s)" % (host_uuid, host_name, address)
            else:
                display_host = "%s" % address
            job.set_error(self.syncdaemon._("analyzer.cannot_find_type") % display_host)
    
    
    # SSH key is a bit special as it can be a ~shinken value that must be expanded
    def __get_linux_ssh_key(self, job, host):
        ssh_key = self._get_host_value('linux', 'ssh_key', job, host)
        return os.path.expanduser(ssh_key)
    
    
    ##########################################################################################
    #                                Linux                                                   #
    ##########################################################################################
    def _linux_push_agent(self, job, host, address):
        ssh_password = self._get_host_value('linux', 'password', job, host)
        ssh_user = self._get_host_value('linux', 'login', job, host)
        ssh_key = self.__get_linux_ssh_key(job, host)
        
        ssh_opt = ""
        if ssh_key:
            ssh_opt = '-i "%s"' % ssh_key
        
        # print "PASSWORD", ssh_password
        scp_command = '%s %s %s %s %s@%s:/tmp 2>&1' % (SCP_COMMAND, ssh_opt, SERVER_ANALYZER_PATH, EXECUTE_ANALYZER_ON_SERVER_PATH, ssh_user, address)
        returncode, stdout, stderr = self._exec_timeout(scp_command, timeout=UPLOAD_TIMEOUT, env={'SSHPASS': ssh_password})
        if returncode != 0:
            err = 'Cannot copy the server analyzer on the server %s: %s' % (address, stdout + stderr)
            logger.warning(err)
            job.set_error(self.syncdaemon._('analyzer.cannot_copy_analyzer') % (address, stdout + stderr), ANALYZER_JOB_STATUS.ERROR_AUTH)
            return False
        return True
    
    
    def _linux_analysing_server(self, job, host, address):
        ssh_password = self._get_host_value('linux', 'password', job, host)
        ssh_user = self._get_host_value('linux', 'login', job, host)
        ssh_key = self.__get_linux_ssh_key(job, host)
        
        # Analyze must be launch as root user (for now) Bro cannot run properly as non root
        if ssh_user != "root":
            err = 'Cannot launch the server analyse with no root user (%s): %s' % (ssh_user, address)
            logger.warning(err)
            job.set_error(self.syncdaemon._('analyzer.cannot_launch_analyze_non_root'))
            return False
        
        ssh_opt = ""
        if ssh_key:
            ssh_opt = '-i "%s"' % ssh_key
        
        ssh_command = '%s %s %s@%s "/bin/bash /tmp/execute_analyzer_on_server.sh" 2>&1' % (SSH_COMMAND, ssh_opt, ssh_user, address)
        returncode, stdout, stderr = self._exec_timeout(ssh_command, timeout=EXECUTION_TIMEOUT, env={'SSHPASS': ssh_password})
        if returncode != 0:
            err = 'Cannot launch the server analyse %s: %s' % (address, stdout + stderr)
            logger.warning(err)
            job.set_error(self.syncdaemon._('analyzer.cannot_launch_analyze') % (address, stdout + stderr))
            return False
        return True
    
    
    def _linux_get_back_result(self, job, host, host_uuid, address):
        ssh_password = self._get_host_value('linux', 'password', job, host)
        ssh_user = self._get_host_value('linux', 'login', job, host)
        ssh_key = self.__get_linux_ssh_key(job, host)
        
        result_path = self._get_result_path(host_uuid)
        scp_command = '%s -i "%s" %s@%s:/tmp/shinken-local-analyzer-payload.json %s' % (SCP_COMMAND, ssh_key, ssh_user, address, result_path)
        returncode, stdout, stderr = self._exec_timeout(scp_command, timeout=STANDARD_PING_TIMEOUT, env={'SSHPASS': ssh_password})  # should be quick
        if returncode != 0:
            err = 'Cannot get back analyze results from the server %s: %s' % (address, stdout + stderr)
            logger.warning(err)
            job.set_error(self.syncdaemon._('analyzer.cannot_get_analyze_results') % (address, stdout + stderr))
            return False
        
        if not os.path.exists(result_path):
            err = 'The server analyzer result file is missing from server %s at %s' % (address, result_path)
            logger.warning(err)
            job.set_error(self.syncdaemon._('analyzer.cannot_result_missing') % (address, result_path))
            return False
        
        return True
    
    
    ##########################################################################################
    #                                Windows                                                 #
    ##########################################################################################
    
    def _windows_analyze_host(self, job, host):
        logger.debug("[module-analyzer] HOST TO WINDOWS ANALYSE: [%s]" % host.get('host_name', 'no name'))
        host_uuid = host['_id']
        address = host.get('address', '')
        result_path = self._get_result_path(host_uuid)
        windows_password = self._get_host_value('windows', 'password', job, host)
        windows_login = self._get_host_value('windows', 'login', job, host)
        command = '%s "%s" "%s" "%s" "%s"' % (WINDOWS_ANALYZE, windows_login, windows_password, address, result_path)
        
        # The windows commands does all, so take the larger timeout
        returncode, stdout, stderr = self._exec_timeout(command, timeout=UPLOAD_TIMEOUT)
        
        if returncode != 0:
            logger.error('Cannot copy and execute the server analyzer on the server %s: %s' % (address, stdout + stderr))
            job.set_error(self.syncdaemon._('analyzer.cannot_copy_and_run') % (address, stdout + stderr), ANALYZER_JOB_STATUS.ERROR_AUTH)
            return False
        if not os.path.exists(result_path):
            job.set_error(self.syncdaemon._('analyzer.cannot_result_missing') % (address, result_path))
            return False
        return True
    
    
    def _load_host_from_json(self, job, pth, host_uuid, host_name, address, host_from_staging=None):
        with open(pth, 'r') as f:
            data = f.read()
        
        try:
            host = json.loads(data)
        except Exception, exp:
            job.set_error('Bad json object (%s)' % exp)
            return
        
        logger.debug('[module-analyzer] did receive host from API: %s' % host)
        
        # What ever the agent did send us, give it the
        host['host_name'] = host_name
        host['address'] = address
        if '_FQDN' in host:
            host['display_name'] = host['_FQDN']
        
        # apply template mapping for inheritance
        input_templates = set(split_and_strip_list(host.get('use', '')))
        if input_templates:
            my_source = self.get_my_source()
            # get only the values and not the template group
            templates_mapping_groups = self.syncdaemon.source_controller.get_analyzer_template_mapping(my_source).values()
            new_templates = set()
            for template in input_templates:
                for template_mapping_dict in templates_mapping_groups:
                    if template in template_mapping_dict['values']:
                        template = template_mapping_dict['values'][template]['value']
                new_templates.add(template)
            
            # Catch the specific country---- case that the analyser give us, and must be transform into a DATA instead
            to_del = []
            for template in new_templates:
                if template.startswith('country----'):
                    v = template.replace('country----', '')
                    host['_COUNTRY'] = v
                    to_del.append(template)
            for template in to_del:
                new_templates.remove(template)
            
            # We can reform the use parameter to it's final value
            sorted_templates = list(new_templates)
            sorted_templates.sort()
            host['use'] = ",".join(sorted_templates)
            host['update_date'] = time.time()
        
        for (k, v) in host.iteritems():
            if not k.startswith('_'):
                continue
            if not isinstance(k, basestring):
                job.set_error('Data keys must be strings (%s found) key=%s' % (type(k), k))
                return
            if k != k.upper():
                job.set_error('Data keys must be upper case')
                return
            if not isinstance(v, basestring):
                job.set_error('Data values must be string (%s found) key=%s value=%s' % (type(v), k, v))
                return
        
        host['_id'] = host_name
        if job.object_state == 'discovery':
            host['_SYNC_KEYS'] = (','.join(set([host_uuid, host_name, host['_AGENT_UUID']]))).lower()
        else:
            # for analyze coming from Staging, don't add agent_id in sync_keys but get the SE_UUID from Staging or generate it if there is none
            fallback_se_uuid = "core-%s-%s" % (ITEM_TYPE.HOSTS, host_uuid)
            se_uuid = host_from_staging.get('_SE_UUID', fallback_se_uuid) if host_from_staging else fallback_se_uuid
            host['_SYNC_KEYS'] = se_uuid.lower()
            host['_SE_UUID'] = se_uuid
        
        logger.info('[module-analyzer] host %s(%s) is valid' % (host_name, host_uuid))
        logger.debug('[module-analyzer] host returned to the source data: %s' % host)
        with self.hosts_lock:
            self.hosts[host_uuid] = host
            self.provider_mongo.save_item(host, item_type=ITEM_TYPE.HOSTS, item_state=ITEM_STATE.RAW_SOURCES, item_source=self.get_name())
        
        # The job is complete
        job.set_analyze_result(host)
        
        # And we must let the synchronizer call us back
        self.callback_synchronizer_about_new_elements(items_type=ITEM_TYPE.HOSTS, data=host)
        
        return
    
    
    def get_dataprovider(self):
        if self.provider_mongo is None:
            self._init_datamanager()
            self._load_from_datamanager()
        return self.provider_mongo
    
    
    def init_analyzer(self):
        self.get_dataprovider()
    
    
    def stop_analyzer(self):
        self.must_run = False
        
        # Already no more thread, we are great
        if self.main_thread is None:
            return
    
    
    def get_all_discovery_elements(self):
        raw_objects = {'host': []}
        
        # Get a copy of the values as we don't know how much time they will be keep ourside this code, and so outside the lock
        with self.hosts_lock:
            my_hosts = self.get_dataprovider().find_items(ITEM_TYPE.HOSTS, item_state=ITEM_STATE.RAW_SOURCES, item_source=self.get_name())
        
        # remove unnecessary property for object after merge and set them as metadata
        for host in my_hosts:
            update_date = host.pop('update_date', None)
            if update_date:
                METADATA.update_metadata(host, METADATA.UPDATE_DATE, int(update_date))
        
        raw_objects['host'] = my_hosts
        if len(my_hosts) == 1:
            res = {'state': 'OK', 'objects': raw_objects, 'errors': [], 'warnings': []}
        else:
            res = {'state': 'OK', 'objects': raw_objects, 'errors': [], 'warnings': []}
        return res
    
    
    # We must stop all the analyzes
    def stop_all_analyzes(self):
        logger.info('[module-analyzer] The analyser %s is disabling, we stop all the current running analyzes' % self.get_name())
        with self.current_processes_lock:
            for process in self.current_processes:
                _kill_sub_process(process)
    
    
    def remove_source_item(self, item_type, source_item):
        with self.hosts_lock:
            items = self.get_dataprovider().find_items(item_type, item_state=ITEM_STATE.RAW_SOURCES, item_source=self.get_name(), where={'host_name': get_name_from_type(ITEM_TYPE.HOSTS, source_item)})
            for item in items:
                self.get_dataprovider().delete_item(item, item_type, item_state=ITEM_STATE.RAW_SOURCES, item_source=self.get_name())
            # import_needed must be false because the import will be launch after in api_remove_source_item
            self.callback_synchronizer_about_delete_elements(items_type=ITEM_TYPE.HOSTS, data={'_id': item['_id']}, import_needed=False)
