#!/usr/bin/python
# -*- coding: utf-8 -*-

# Copyright (C) 2009-2012:
#    Gabes Jean, naparuba@gmail.com
#    Gerhard Lausser, Gerhard.Lausser@consol.de
#    Gregory Starck, g.starck@gmail.com
#    Hartmut Goebel, h.goebel@goebel-consult.de
#
# This file is part of Shinken.
#
# Shinken is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Shinken is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with Shinken.  If not, see <http://www.gnu.org/licenses/>.

import signal
import time
import traceback
import cPickle
import zlib
import json
import base64
from datetime import datetime

from shinken.scheduler import Scheduler, _PROVIDERS_KEY_RAW_DATAS, _PROVIDERS_KEY_INITAL_DONE
from shinken.macroresolver import MacroResolver
from shinken.daemon import Daemon
from shinken.property import PathProp, IntegerProp
from shinken.log import logger
from shinken.satellite import BaseSatellite, IForArbiter as IArb, Interface
from shinken.objects.proxyitem import proxyitemsmgr, proxyitemsgraph
from shinken.http_client import HTTPExceptions
from shinken.schedulingmanager import scheduling_manager
from shinken.util import force_memory_trimming
from shinken.vmware_stats import vmware_stats_reader


# Interface for Workers

class IChecks(Interface):
    """
    Interface for Workers:
    They connect here and see if they are still OK with our running_id, if not, they must drop their checks
    """
    
    
    # poller or reactionner ask us actions
    def get_checks(self, do_checks=False, do_actions=False, poller_tags=['None'], reactionner_tags=['None'], worker_name='none', module_types=['fork'], request_limit=-1, request_limit_cpu=-1):
        get_checks_time = time.time()
        do_checks = (do_checks == 'true')
        do_actions = (do_actions == 'true')
        request_limit = int(request_limit)
        request_limit_cpu = float(request_limit_cpu)
        
        if isinstance(poller_tags, basestring):
            poller_tags = json.loads(poller_tags)
        if isinstance(reactionner_tags, basestring):
            reactionner_tags = json.loads(reactionner_tags)
        if isinstance(module_types, basestring):
            module_types = json.loads(module_types)
        res = self.app.get_to_run_checks(do_checks, do_actions, poller_tags, reactionner_tags, worker_name, module_types, request_limit, request_limit_cpu)
        
        # Now we can prepare the result into a string
        t0 = time.time()
        _cpick = cPickle.dumps(res, 2)
        t1 = time.time()
        _z_compr = zlib.compress(_cpick, 1)
        t2 = time.time()
        _b64 = base64.b64encode(_z_compr)
        t3 = time.time()
        cpick_time = (t1 - t0)
        zcompress_time = (t2 - t1)
        b64_time = (t3 - t2)
        
        get_checks_time = time.time() - get_checks_time
        if get_checks_time > 0.1 or cpick_time > 0.05 or zcompress_time > 0.05 or b64_time > 0.05:
            logger.warning('get_checks:: total time:[%.3f] cpick:[%.3f] zcompres:[%.3f] b64encore:[%.3f] nb_check_send:[%d] b64_size:[%d]' % (get_checks_time, cpick_time, zcompress_time, b64_time, len(res), len(_b64)))
        return _b64
    
    
    get_checks.encode = 'raw'
    get_checks.need_lock = False
    
    
    # poller or reactionner are putting us results
    def put_results(self, results):
        nb_received = len(results)
        if nb_received != 0:
            logger.debug("Received %d results" % nb_received)
        with self.app.waiting_results_lock:
            self.app.waiting_results.extend(results)
        return True
    
    
    put_results.method = 'post'
    put_results.need_lock = False


class IBroks(Interface):
    """ Interface for Brokers:
They connect here and get all broks (data for brokers). Data must be ORDERED! (initial status BEFORE update...) """
    
    
    # Maybe it was not registered as it should, if so, do it for it
    def __create_broker_entry(self, bname):
        self.app.create_broker_entry(bname)
    
    
    # A broker ask us broks
    def get_broks(self, bname):
        get_broks_time = time.time()
        # IMPORTANT: always the satellite lock BEFORE brokers lock
        with self.app.sched_daemon.satellite_lock:  # protect against loading a new configuration
            with self.app.brokers_lock:  # protect against multiple brokers calls
                # Maybe it was not registered as it should, if so, do it for it
                if bname not in self.app.brokers:
                    self.__create_broker_entry(bname)
                # Now get the broks for this specific broker
                res = self.app.get_broks(bname)
                # got only one global counter for broks
                self.app.nb_broks_send += len(res)
                # we do not more have a full broks in queue
                self.app.brokers[bname]['has_full_broks'] = False
        # Now we can prepare the result into a string
        t0 = time.time()
        _cpick = cPickle.dumps(res, 2)
        t1 = time.time()
        _z_compr = zlib.compress(_cpick, 1)
        t2 = time.time()
        _b64 = base64.b64encode(_z_compr)
        t3 = time.time()
        
        get_broks_time = time.time() - get_broks_time
        cpick_time = (t1 - t0)
        zcompress_time = (t2 - t1)
        b64_time = (t3 - t2)
        if get_broks_time > 0.1 or cpick_time > 0.05 or zcompress_time > 0.05 or b64_time > 0.05:
            logger.warning('[scheduler] get_broks:: total time:[%.3f] cpick:[%.3f] zcompres:[%.3f] b64encore:[%.3f] nb_check_send:[%d] b64_size:[%d]' % (get_broks_time, cpick_time, zcompress_time, b64_time, len(res), len(_b64)))
        return _b64  # base64.b64encode(zlib.compress(cPickle.dumps(res), 2))
    
    
    get_broks.encode = 'raw'
    get_broks.need_lock = False
    
    
    # A broker is a new one, if we do not have
    # a full broks, we clean our broks, and
    # fill it with all new values
    def fill_initial_broks(self, bname):
        
        if self.app.sched_daemon.spare:
            return
        
        # Maybe the scheduler is NOT ready (did load load a conf nor load the retention, if so wait)
        # IMPORTANT: wait OUTSIDE THE LOCK PART!  SEF-5027
        watch_dog = 0
        while not self.app.scheduler_is_ready:
            time.sleep(0.5)
            watch_dog += 0.5
            
            if watch_dog > 120:
                raise IOError('Timeout error : the scheduler has not started in less than 120s')
        
        # IMPORTANT: always the satellite lock BEFORE brokers lock
        with self.app.sched_daemon.satellite_lock:  # protect against loading a new configuration
            with self.app.brokers_lock:  # protect against multiple brokers calls
                if bname not in self.app.brokers:
                    self.__create_broker_entry(bname)
                
                e = self.app.brokers[bname]
                e['broks'].clear()
                try:
                    self.app.fill_initial_broks(bname, with_logs=True)
                except Exception as e:
                    err = "Fill initial broks : cannot generate the initial broks for the broker %s : %s " % (bname, e.message)
                    logger.error(err)
                    return 'false'  # let the broker know we have a big problem and it must restart the call
        return 'true'
    
    
    fill_initial_broks.need_lock = False


class IProvider(Interface):
    def get_raw_datas(self, provider_name):
        if provider_name not in self.app.providers:
            self.app.create_providers_entry(provider_name)
        
        raw_datas = self.app.get_raw_datas(provider_name)
        self.app.nb_raw_datas_send += len(raw_datas)
        self.app.providers[provider_name][_PROVIDERS_KEY_INITAL_DONE] = False
        
        return base64.b64encode(zlib.compress(cPickle.dumps(raw_datas), 2))
    
    
    get_raw_datas.encode = 'raw'
    
    
    def fill_initial_raw_datas(self, provider_name):
        if provider_name not in self.app.providers:
            self.app.create_providers_entry(provider_name)
        
        provider = self.app.providers[provider_name]
        provider[_PROVIDERS_KEY_RAW_DATAS].clear()
        self.app.fill_initial_raw_datas(provider_name)


class IStats(Interface):
    """
    Interface for various stats about scheduler activity
    """
    
    doc = '''Get raw stats from the daemon:
        see SEF-1143
    '''
    
    
    # SEF-1143
    def get_raw_stats(self, param=''):
        sched = self.app.sched
        raw_stats = super(IStats, self).get_raw_stats(param)
        try:
            nb_hosts = 0
            nb_clusters = 0
            if hasattr(sched, 'hosts'):
                for host in sched.hosts:
                    if host.got_business_rule:
                        nb_clusters += 1
                    else:
                        nb_hosts += 1
            
            # Get the average latency of check since last_check (5min or look in param)
            last_check = time.time() - 300
            if param and '-' in param:
                s_last_check = param.split('-')[1]
                if s_last_check.isdigit():
                    last_check = int(s_last_check)
            
            elts = []
            if hasattr(sched, 'services'):
                elts.extend(sched.services)
            if hasattr(sched, 'hosts'):
                elts.extend(sched.hosts)
            
            latencies = [s.latency for s in elts if s.last_chk > last_check]
            average_latency = 0
            if len(latencies) != 0:
                average_latency = float(sum(latencies)) / len(latencies)
            
            passive_pollers = []
            for poller in sched.pollers.itervalues():
                if poller['passive']:
                    d_poller = {}
                    d_poller['addr'] = "%s:%s" % (poller['address'], poller['port'])
                    d_poller['name'] = poller['name']
                    d_poller['con'] = (poller.get('con', None) is not None)
                    d_poller['info'] = poller.get('con_info', 'Scheduler did not try to connect to poller [%s]' % d_poller['addr'])
                    d_poller['latency'] = poller.get('latency', -1) if d_poller['con'] else -1
                    passive_pollers.append(d_poller)
            
            info_pollers = []
            info_reactionners = []
            
            for executor_name, executor_stat in sched.stat_by_executor.iteritems():
                info_executor = {
                    "name"       : executor_name,
                    "realm"      : executor_stat.get('realm', ''),
                    "tags"       : executor_stat.get('tag', ''),
                    "done_by_sec": executor_stat.get('avg_nb_checks_received', 0),
                }
                
                if executor_stat.get('type', 'Poller') == 'Poller':
                    info_pollers.append(info_executor)
                else:
                    info_reactionners.append(info_executor)
            
            sched.cleanup_rogue_satellite()
            arbiter_uri = ''
            if self.app.last_arbiter:
                arbiter_uri = self.app.last_arbiter.get('uri', '')
            raw_stats.update({
                'arbiter_uri'                          : arbiter_uri,
                'realm'                                : self.app.realm,
                'have_conf'                            : self.app.already_have_conf,
                'activated'                            : self.app.activated,
                'spare'                                : self.app.spare,
                'nb_hosts'                             : nb_hosts,
                'nb_clusters'                          : nb_clusters,
                'nb_checks'                            : len(getattr(sched, 'services', ())),
                'late_checks'                          : getattr(sched, 'nb_late', 0),
                'late_checks_by_tags'                  : getattr(sched, 'late_checks_by_tags', 0),
                'average_latency'                      : average_latency,
                'checks_todo_by_sec'                   : sched.avg_checks_todo_by_sec.get_avg(0),
                'notifications_todo_by_sec'            : sched.avg_notification_todo_by_sec.get_avg(0),
                'passive_pollers'                      : passive_pollers,
                'info_pollers'                         : info_pollers,
                'info_reactionners'                    : info_reactionners,
                'rogue_pollers'                        : sched.rogue_satellites.get('Poller', {}),
                'rogue_reactionners'                   : sched.rogue_satellites.get('Reactionner', {}),
                'http_errors_count'                    : self.app.http_errors_count,
                'loop_turn_time_avg'                   : sched.loop_time_avg.get_avg(0),
                'checks_warning_threshold_cpu_usage'   : sched.checks_warning_threshold_cpu_usage[::-1][:5],  # last 5 elements in reverse order
                'checks_warning_threshold_cpu_usage_nb': len(sched.checks_warning_threshold_cpu_usage),
                'save_retention_time'                  : sched.scheduler_stat.get('save_retention_time', 0),
                'save_retention_error'                 : sched.scheduler_stat.get('save_retention_error', ''),
                'last_retention_save'                  : sched.scheduler_stat.get('last_retention_save', ''),
                'avg_checks_received_schedule_by_sec'  : sched.avg_checks_received_schedule_by_sec.get_avg(0),
                'avg_checks_received_force_by_sec'     : sched.avg_checks_received_force_by_sec.get_avg(0),
                'avg_checks_received_retry_by_sec'     : sched.avg_checks_received_retry_by_sec.get_avg(0),
                'avg_checks_received_dependency_by_sec': sched.avg_checks_received_dependency_by_sec.get_avg(0),
            })
            if self.app.have_modules and self.app.modules:
                raw_stats['module_stats'] = self._get_module_stats(getattr(self.app, 'modules_manager', None), param)
        except Exception as error:
            raw_stats['have_conf'] = False
            logger.info("[scheduler][%s] The scheduler is not initialized: [%s]" % (self.app.sched.instance_id, error))
            traceback.print_exc()
        
        return raw_stats
    
    
    get_raw_stats.doc = doc
    get_raw_stats.need_lock = False
    
    
    def get_item(self, uuid, type):
        if not self.app.sched.hosts:
            return 'scheduler not ready'
        if type == 'host':
            host = self.app.sched.hosts.find_by_uuid(uuid)
            if not host:
                return 'host !%s! not found' % uuid
            return self._get_flat_item(host)
        if type == 'check':
            host_uuid = uuid.split('-')[0]
            check_uuid = uuid.split('-')[1]
            host = self.app.sched.hosts.find_by_uuid(host_uuid)
            if not host:
                return 'host !%s! not found' % host_uuid
            check = next((c for c in host.services if c.uuid == check_uuid), None)
            if not check:
                return 'check !%s! not found' % check_uuid
            
            return self._get_flat_item(check)
        
        return 'unsupported type !%s!' % type
    
    
    def _get_flat_item(self, item):
        to_return = {
            'name'                 : item.get_full_name(),
            'state'                : item.state,
            'state_id'             : item.state_id,
            'state_type'           : item.state_type,
            'state_type_id'        : item.state_type_id,
            
            'is_problem'           : item.is_problem,
            'is_impact'            : item.is_impact,
            'source_problems'      : ','.join((i.get_full_name() for i in item.source_problems)),
            'impacts'              : ','.join((i.get_full_name() for i in item.impacts)),
            'state_validity_period': item.state_validity_period,
        }
        return to_return
    
    
    def call_for_retention(self):
        if self.app.debug:
            self.app.sched.update_retention_file(True)
        else:
            return 'You cannot make this call if your are not in debug mode'
    
    
    call_for_retention.need_lock = True


class IForArbiter(IArb):
    """ Interface for Arbiter. We ask him a for a conf and after that listen for instructions
        from the arbiter. The arbiter is the interface to the administrator, so we must listen
        carefully and give him the information he wants. Which could be for another scheduler """
    
    
    # arbiter is sending us a external command.
    # it can send us global command, or specific ones
    def run_external_commands(self, cmds):
        if hasattr(self.app.sched, 'external_command'):
            self.app.sched.run_external_commands(cmds)
        else:
            logger.info('Get external commands from a Receiver but i am not ready')
            self.app.abort(503, 'Scheduler do not receive configuration from Arbiter.')
    
    
    run_external_commands.method = 'POST'
    
    
    def put_conf(self, conf):
        self.app.sched.die()
        super(IForArbiter, self).put_conf(conf)
    
    
    put_conf.method = 'POST'
    put_conf.need_lock = False
    
    
    # Call by arbiter if it thinks we are running but we must not (like
    # if I was a spare that take a conf but the master returns, I must die
    # and wait for a new conf)
    # Us: No please...
    # Arbiter: I don't care, hasta la vista baby!
    # Us: ... <- Nothing! We are dead! you didn't follow or what??
    def wait_new_conf(self):
        with self.app.satellite_lock:
            logger.info("Arbiter wants me to wait for a new configuration")
            self.app.sched.die()
            super(IForArbiter, self).wait_new_conf()
    
    
    wait_new_conf.need_lock = False
    
    doc = 'Get the current running id of the daemon (scheduler)'
    
    
    def get_daemon_incarnation(self):
        if hasattr(self.app.sched, 'conf') and self.app.sched.conf is not None and hasattr(self.app.sched.conf, 'push_flavor'):
            return self.app.sched.push_flavor
        return 0
    
    
    get_daemon_incarnation.need_lock = False
    get_daemon_incarnation.doc = doc
    
    
    # Called by the arbiter to get the current satellites names
    # broker, pollers, receivers and reactionners, so it can detect
    # a old one
    def get_current_satellites(self):
        with self.app.satellite_lock:
            return self.app.sched.get_current_satellites()
    
    
    get_current_satellites.need_lock = False
    
    
    def satellites_to_remove(self, to_remove):
        self.app.satellites_to_remove(to_remove)
    
    
    satellites_to_remove.method = 'POST'
    satellites_to_remove.need_lock = False


class IProxyItems(Interface):
    def get_proxy_states(self, since):
        t0 = time.time()
        
        # We get a time since the last ask from the other one, this time is the last one WE (localy) give
        # to the other daemon, because this last one can have a different time than ourselve
        since = int(since)
        proxies = proxyitemsmgr.get_export_state_since(since, self.app.sched.elements_uuids)
        
        # We give the time of our local time, because the other node can have a different time to ask
        nearly_now = int(time.time()) - 1  # -1 to manage round of time, to be sure to do not lost a state
        
        to_return = {
            'diff_since': nearly_now,
            'states'    : proxies
        }
        to_return_encode = base64.b64encode(zlib.compress(cPickle.dumps(to_return), 2))
        if len(proxies) > 0:
            logger.debug("EXPORTING [%d states] since [%s] in [%.3fs]" % (len(proxies), datetime.fromtimestamp(since).strftime('%d-%m-%Y %H:%M:%S'), time.time() - t0))
        return to_return_encode
    
    
    get_proxy_states.encode = 'raw'


# The main app class
class Shinken(BaseSatellite):
    # Should we look at passive property for external connection
    is_using_passive_connection_information = True  # the scheduler is only connection to passive elements
    
    properties = BaseSatellite.properties.copy()
    properties.update({
        'pidfile'  : PathProp(default='schedulerd.pid'),
        'port'     : IntegerProp(default='7768'),
        'local_log': PathProp(default='schedulerd.log'),
    })
    
    
    # Create the shinken class:
    # Create a Pyro server (port = arvg 1)
    # then create the interface for arbiter
    # Then, it wait for a first configuration
    def __init__(self, config_file, is_daemon, do_replace, debug, debug_file, profile='', daemon_id=0):
        BaseSatellite.__init__(self, 'scheduler', config_file, is_daemon, do_replace, debug, debug_file, daemon_id)
        
        self.sched = Scheduler(self)
        scheduling_manager.load_scheduler(self.sched)
        
        # Now the interface
        self.interface = IForArbiter(self)
        self.istats = IStats(self)
        self.ichecks = IChecks(self.sched)
        self.ibroks = IBroks(self.sched)
        self.iprovider = IProvider(self.sched)
        self.iproxyitems = IProxyItems(self)
        
        self.must_run = True
        
        # TODO to del
        self.uri = None
        self.uri2 = None
        
        # And possible links for satellites only pollers is use
        # TODO load reactionners and brokers in setup new conf
        self.pollers = {}
        self.reactionners = {}
        self.brokers = {}
        
        self.realm = ''
        self.last_arbiter = {}
        
        self.have_modules = False  # to know if we must load or update teh modules in the modules manager
    
    
    def do_stop(self):
        super(Shinken, self).do_stop()
    
    
    def satellites_to_remove(self, to_remove):
        brokers_to_remove = to_remove['broker']
        logger.debug('[CONFIGURATION] The arbiter asks us to remove brokers that are no more need: %s' % (','.join(brokers_to_remove)))
        for broker_name in brokers_to_remove:
            self.sched.remove_broker(broker_name)
    
    
    def compensate_system_time_change(self, difference):
        """ Compensate a system time change of difference for all hosts/services/checks/notifs """
        logger.warning("[schedulerdeamon] A system time change of %d has been detected. Compensating..." % difference)
        # We only need to change some value
        self.program_start = max(0, self.program_start + difference)
        
        # Then we compensate all host/services
        for h in getattr(self.sched, 'hosts', []):
            h.compensate_system_time_change(difference)
        for s in getattr(self.sched, 'services', []):
            s.compensate_system_time_change(difference)
        
        # Now all checks and actions
        for c in self.sched.checks.values():
            # Already launch checks should not be touch
            if c.status == 'scheduled' and c.t_to_go is not None:
                t_to_go = c.t_to_go
                ref = c.ref
                new_t = max(0, t_to_go + difference)
                if ref.check_period is not None:
                    # But it's no so simple, we must match the timeperiod
                    new_t = ref.check_period.get_next_valid_time_from_t(new_t)
                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    c.state = 'waitconsume'
                    c.exit_status = 2
                    c.output = '(Error: there is no available check time after time change!)'
                    c.check_time = time.time()
                    c.execution_time = 0
                else:
                    scheduling_manager.on_reschedule(c.t_to_go, new_t, c)
                    c.t_to_go = new_t
                    ref.next_chk = new_t
        
        # Now all checks and actions
        for c in self.sched.actions.values():
            # Already launch checks should not be touch
            if c.status == 'scheduled':
                t_to_go = c.t_to_go
                
                #  Event handler do not have ref
                ref = getattr(c, 'ref', None)
                new_t = max(0, t_to_go + difference)
                
                # Notification should be check with notification_period
                if c.is_a == 'notification':
                    if ref.notification_period:
                        # But it's no so simple, we must match the timeperiod
                        new_t = ref.notification_period.get_next_valid_time_from_t(new_t)
                    # And got a creation_time variable too
                    c.creation_time = c.creation_time + difference
                
                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    c.state = 'waitconsume'
                    c.exit_status = 2
                    c.output = '(Error: there is no available check time after time change!)'
                    c.check_time = time.time()
                    c.execution_time = 0
                else:
                    c.t_to_go = new_t
    
    
    def manage_signal(self, sig, frame):
        logger.info("[schedulerdeamon] Received a SIGNAL %s" % sig)
        # If we got USR1, just dump memory
        if sig == signal.SIGUSR1:
            Daemon.manage_signal(self, sig, frame)
        elif sig == signal.SIGUSR2:  # usr2, dump objects
            self.sched.need_objects_dump = True
        else:  # if not, die :)
            self.sched.die()
            self.must_run = False
            Daemon.manage_signal(self, sig, frame)
    
    
    def do_loop_turn(self):
        # Ok, now the conf
        self.wait_for_initial_conf()
        if not self.new_conf:
            return
        self.setup_new_conf()
        self.sched.run()
        self.check_and_del_zombie_modules()
        # Be sure that we did clean all the memory from here, as we can have drop all our objects
        force_memory_trimming()
    
    
    # setup a new conf, byt beware about global lock management.
    # Note: don't do locking thing here, as we have the satellite lock!
    def setup_new_conf(self):
        with self.satellite_lock:
            self.really_setup_new_conf()
    
    
    def really_setup_new_conf(self):
        t0 = time.time()
        # first of all: warn the scheduler object that a configuration is being parse/load
        # so maybe there are strctures that must be avoid during this (like rogue satellites)
        self.sched.warn_about_a_new_configuration_load_in_progress()
        
        new_conf = self.new_conf
        conf_raw = new_conf['conf']
        override_conf = new_conf['override_conf']
        modules = new_conf['modules']
        satellites = new_conf['satellites']
        instance_name = new_conf['instance_name']
        push_flavor = new_conf['push_flavor']
        skip_initial_broks = new_conf['skip_initial_broks']
        realm = new_conf['realm']
        self.configuration_incarnation = new_conf['configuration_incarnation']
        logger.info("[schedulerdeamon] New configuration %s received" % self.configuration_incarnation)
        self.activated = new_conf.get('activated', True)
        self.spare = new_conf.get('spare', True)
        
        vmware_stats_reader.set_enabled(new_conf.get('vmware__statistics_compute_enable', True))
        self.last_arbiter = new_conf.get('arbiter_trace', {})
        
        del new_conf
        self.new_conf = None
        if not self.activated:
            logger.info('Stopping all modules')
            self.modules_manager.stop_all()
            self.have_modules = False
            
            # Let the scheduler clean it and know it's a spare
            self.sched.set_as_inactive(instance_name, push_flavor, self.configuration_incarnation, self.spare)
            self.modules = None
            self.realm = realm
            if self.spare:
                logger.info("[scheduler][configuration] Configuration received, I'm configured as Spare")
            else:
                logger.info("[scheduler][configuration] Configuration received, I'm an active server (not a Spare) waiting for a new shard in the future.")
            return
        
        conf_from_arbiter = cPickle.loads(conf_raw)
        del conf_raw
        
        logger.debug("[schedulerdeamon] Configuration received at [%d]. flavor: [%s]  deserialized in [%.2f] secs" % (t0, push_flavor, time.time() - t0))
        logger.set_name(instance_name)
        
        # Write back the instance_name into the configuration file so we can find it with
        # local tools
        self.save_daemon_name_into_configuration_file(instance_name)
        
        # Tag the conf with our data
        self.conf = conf_from_arbiter
        self.cur_conf = self.conf
        
        del conf_from_arbiter
        
        self.conf.push_flavor = push_flavor
        self.conf.instance_name = instance_name
        self.conf.skip_initial_broks = skip_initial_broks
        
        # maybe the arbiter is not up to date, and we don't want to crash the scheduler for this because
        # old arbiter did not send this
        self.conf.default_properties_values = getattr(self.conf, 'default_properties_values', {})
        
        self.realm = realm
        
        self.override_conf = override_conf
        self.modules = modules
        self.satellites = satellites
        
        # Set the enable/disable for human log format
        logger.set_human_format(on=self.conf.human_timestamp_log)
        
        # We need to clean the satellites we do not have, but do not void the one we want to keep, as they
        # have threads and connection we want to keep
        for (type_, our_links_dict) in [('pollers', self.pollers), ('reactionners', self.reactionners)]:
            new_satellites_ids = set(satellites[type_].keys())  # get the new ids
            old_satellites_ids = set(our_links_dict.keys())  # the current ids we have
            must_be_deleted = old_satellites_ids - new_satellites_ids
            for must_be_deleted_id in must_be_deleted:
                entry = our_links_dict[must_be_deleted_id]
                logger.info('Removing the satellite (%s:%s) %s as we do not need it anymore.' % (type_, must_be_deleted_id, entry['name']))
                del our_links_dict[must_be_deleted_id]
        
        # Refresh the old proxy items with the new ones. Here we need to :
        # * create if missing
        # * update some fields if already present
        # * remove if missing
        proxyitemsmgr.refresh_items(self.conf.item_proxies)
        
        proxyitemsgraph.reset_from_other(self.conf.proxy_items_graph)
        
        if self.conf.human_timestamp_log:
            logger.set_human_format()
        
        # Now for pollers
        for (pol_id, entry) in satellites['pollers'].iteritems():
            # Thread that manage this connection
            thread = None
            daemon_incarnation = 0
            
            # Must look if we already have it
            already_got = pol_id in self.pollers
            if already_got:
                daemon_incarnation = self.pollers[pol_id]['daemon_incarnation']
                thread = self.pollers[pol_id]['thread']
            
            self.pollers[pol_id] = entry
            
            # replacing poller address and port by those defined in satellitemap
            if entry['name'] in override_conf['satellitemap']:
                entry = dict(entry)  # make a copy
                entry.update(override_conf['satellitemap'][entry['name']])
            
            proto = 'http'
            if entry['use_ssl']:
                proto = 'https'
            
            uri = '%s://%s:%s/' % (proto, entry['address'], entry['port'])
            entry['uri'] = uri
            
            entry['instance_id'] = 0  # No use so all to 0
            entry['daemon_incarnation'] = daemon_incarnation
            entry['last_connection'] = 0
            entry['thread'] = thread
            entry['con'] = None
            entry['id'] = pol_id
            entry['type'] = 'poller'
        logger.info("We have our pollers: %s" % self.pollers)
        
        # Now reactionners
        for (rea_id, entry) in satellites['reactionners'].iteritems():
            thread = None
            daemon_incarnation = 0
            # Must look if we already have it
            already_got = rea_id in self.reactionners
            if already_got:
                daemon_incarnation = self.reactionners[rea_id]['daemon_incarnation']
                thread = self.reactionners[rea_id]['thread']
            
            self.reactionners[rea_id] = entry
            
            # replacing reactionner address and port by those defined in satellitemap
            if entry['name'] in override_conf['satellitemap']:
                entry = dict(entry)  # make a copy
                entry.update(override_conf['satellitemap'][entry['name']])
            
            proto = 'http'
            if entry['use_ssl']:
                proto = 'https'
            uri = '%s://%s:%s/' % (proto, entry['address'], entry['port'])
            entry['uri'] = uri
            entry['instance_id'] = 0  # No use so all to 0
            entry['daemon_incarnation'] = daemon_incarnation
            entry['last_connection'] = 0
            entry['thread'] = thread
            entry['con'] = None
            entry['id'] = rea_id
            entry['type'] = 'reactionner'
        logger.info("We have our reactionners: %s" % self.reactionners)
        
        # Now others schedulers
        for (sched_id, entry) in satellites['schedulers'].iteritems():
            # Must look if we already have it to do not overdie our broks
            already_got = False
            
            # Thread that manage this connection
            thread = None
            # We can already got this conf id, but with another address
            if sched_id in self.schedulers:
                prev_entry = self.schedulers[sched_id]
                new_addr = entry['address']
                old_addr = prev_entry['address']
                new_port = entry['port']
                old_port = prev_entry['port']
                thread = prev_entry['thread']  # always keep it, if not valid, will drop it self and will be restarted
                # Should got all the same to be ok :)
                if new_addr == old_addr and new_port == old_port:
                    already_got = True
            
            if already_got:
                daemon_incarnation = self.schedulers[sched_id]['daemon_incarnation']
            else:
                daemon_incarnation = 0
            
            self.schedulers[sched_id] = entry
            
            # replacing scheduler address and port by those defined in satellitemap
            if entry['name'] in override_conf['satellitemap']:
                entry = dict(entry)  # make a copy
                entry.update(override_conf['satellitemap'][entry['name']])
            proto = 'http'
            if entry['use_ssl']:
                proto = 'https'
            uri = '%s://%s:%s/' % (proto, entry['address'], entry['port'])
            entry['uri'] = uri
            
            entry['daemon_incarnation'] = daemon_incarnation
            entry['last_connection'] = 0
            entry['thread'] = thread
            entry['con'] = None
            entry['id'] = sched_id
            entry['type'] = 'scheduler'
            # IMPORTANT: force active in this element
            entry['active'] = True
            entry['last_proxy_sync'] = 0  # it means we will ask ALL states from the other node by default
            logger.debug("[%s] schedulers sched_id[%s] name[%s] instance_id[%s] push_flavor[%s]" % (self.name, sched_id, entry['name'], entry['instance_id'], entry['push_flavor']))
        
        # First mix conf and override_conf to have our definitive conf
        for prop in self.override_conf:
            # print "Overriding the property %s with value %s" % (prop, self.override_conf[prop])
            val = self.override_conf[prop]
            setattr(self.conf, prop, val)
        
        self.set_tz(self.conf.use_timezone)
        
        logger.info("[schedulerdeamon][configuration] Receiving modules:[%s] i already load modules:[%s]" % (','.join([m.get_name() for m in self.modules]), self.have_modules))
        
        if not self.have_modules:
            # Ok now start, or restart them!
            # Set modules, init them and start external ones
            self.modules_manager.set_modules(self.modules)
            self.do_load_modules()
            self.modules_manager.start_external_instances()
            self.have_modules = True
        else:  # just update the one we need
            self.modules_manager.update_modules(self.modules)
        
        logger.info("[schedulerdeamon] Loading configuration realm[%s] instance_name[%s] push_flavor[%s] total_number_of_item_in_the_realm[%d]." % (realm, instance_name, push_flavor, len(self.conf.item_proxies)))
        self.conf.explode_global_conf()
        
        # Creating the Macroresolver Class & unique instance
        self.macro_resolver = MacroResolver()
        self.macro_resolver.init(self.conf)
        
        self.sched.load_configuration_from_arbiter(self.conf, push_flavor, self.pollers, self.reactionners, self.configuration_incarnation)
        
        # Start threads if need, not a problem as starting thread is cheap and not timeout prone
        self.assert_valid_satellite_threads()
        
        # The configuration load is done, let the scheduler know it so it can be sure about all data structures
        self.sched.warn_about_the_end_of_the_configuration_load()
        
        # Be sure that we did clean all the memory from here
        force_memory_trimming()
    
    
    # Check that the distant poller/reactionner can accept connections
    # Also check if I am known by that daemon
    def ping_and_check_distant_daemon(self, sat_entry):
        if not super(Shinken, self).ping_and_check_distant_daemon(sat_entry):
            return False
        
        if sat_entry['type'] in ('reactionner', 'poller'):
            con = sat_entry['con']
            is_scheduler_known = con.get('is_scheduler_known', {'sched_id': self.conf.instance_id})
            return is_scheduler_known == "True"
        
        return True
    
    
    # Give the arbiter the data about what I manage
    # for me it's just my instance_id and my push flavor
    def what_i_managed(self):
        if not self.already_have_conf:
            return None
        if hasattr(self, 'conf'):
            return {self.conf.instance_id: self.conf.push_flavor}
        else:
            return {}
    
    
    # Get the good tabs for links by the kind. If unknown, return None
    # The scheduler need to connect to other schedulers, pollers and reactionner
    # only, and arbiter is need to keep the arbiter trace
    def get_link_from_type(self, daemon_type, daemon_id):
        t = {
            'scheduler'  : self.schedulers,
            'arbiter'    : self.arbiters,
            'poller'     : self.pollers,
            'reactionner': self.reactionners,
        }
        with self.satellite_lock:
            return t.get(daemon_type, {}).get(daemon_id, None)
    
    
    # On the scheduler we want to connect (ping+job) only for passive poller/reactionners
    def should_connect_to_distant_satellite(self, satellite_type, distant_link):
        if satellite_type == 'poller' or satellite_type == 'reactionner':
            is_passive = distant_link['passive']
            return is_passive
        # For scheduler we only connect to OTHER schedulers. We can identify by names as they are uniq by the arbiter
        elif satellite_type == 'scheduler':
            # Maybe we are not ready, so don't connect currently
            if not hasattr(self, 'sched'):
                return False
            is_another_scheduler = self.sched.instance_name != distant_link['name']
            return is_another_scheduler
        else:
            raise Exception('Error: the satellite type %s is not managed by this daemon. Cannot connect to it.' % satellite_type)
    
    
    # jobs to do in http distant thread
    def get_jobs_from_distant(self, distant_link):
        if not hasattr(self, 'sched'):
            return
        
        # We handle only poller & reactionner passif
        # If the connection is invalid the scheduler deamon will try to reconnect
        if distant_link['type'] in ('poller', 'reactionner') and distant_link.get('passive', False) and distant_link['con'] is not None:
            self.sched.push_actions_to_passives_satellites(distant_link)
            self.sched.get_actions_from_passives_satellites(distant_link)
        
        if distant_link['type'] == 'scheduler' and self.sched.instance_name != distant_link['name']:
            self.get_new_proxy_states(distant_link)
    
    
    # From others schedulers we update our states
    def get_new_proxy_states(self, sat_entry):
        # logger.debug('get_new_proxy_states:: %s' % sat_entry)
        # We check for new check in each schedulers and put
        # the result in new_checks
        sat_type = 'scheduler'
        last_proxy_sync = sat_entry['last_proxy_sync']
        try:
            con = sat_entry['con']
            if con is not None:  # None = not initialized
                t0 = time.time()
                # Before ask a call that can be long, do a simple ping to be sure it is alive
                # logger.debug('get_new_proxy_states:: ping %s' % sat_entry)
                con.get('ping')
                tmp_states = con.get('get_proxy_states', {'since': last_proxy_sync}, wait='long')
                try:
                    _t = base64.b64decode(tmp_states)
                    _t = zlib.decompress(_t)
                    tmp_result = cPickle.loads(_t)
                except (TypeError, zlib.error, cPickle.PickleError), exp:
                    logger.error('Cannot load proxy states data from %s : %s' % (sat_entry['name'], exp))
                    sat_entry['con'] = None
                    return
                states = tmp_result['states']
                diff_since = tmp_result['diff_since']
                
                if len(states) != 0:
                    logger.debug("[Scheduler] [%s] Proxy states get in [%s] from [%s-%s]" % (len(states), time.time() - t0, sat_type, sat_entry['name']))
                
                # Ok, we can add theses broks to our queues
                proxyitemsmgr.update_from_other_states(states)
                # The time we get is from the other node, because we are not sur our time and the ther
                # are sync
                sat_entry['last_proxy_sync'] = diff_since
            
            else:  # no con? make the connection
                logger.debug('get_new_proxy_states:: go to connecting to %s' % sat_entry)
                self.pynag_con_init(sat_entry)
        # Ok, con is not known, so we create it
        except KeyError, exp:
            logger.debug("Key error for get_proxy_states : %s" % str(exp))
            self.pynag_con_init(sat_entry)
        except HTTPExceptions, exp:
            logger.warning("Connection problem to the %s %s: %s" % (sat_type, sat_entry['name'], str(exp)))
            sat_entry['con'] = None
        # scheduler must not #be initialized
        except AttributeError, exp:
            logger.warning("The %s %s should not be initialized: %s" % (sat_type, sat_entry['name'], str(exp)))
        # scheduler must not have checks
        #  What the F**k? We do not know what happened,
        # so.. bye bye :)
        except Exception, x:
            logger.error(str(x))
            logger.error(traceback.format_exc())
            import sys
            sys.exit(1)
    
    
    # For a new distant scheduler, do nothing special
    def _manage_new_distant_daemon_incarnation(self, entry, old_incar, new_incar):
        pass
    
    
    # our main function, launch after the init
    def main(self):
        try:
            self.load_config_file()
            self.look_for_early_exit()
            self.do_daemon_init_and_start()
            self.load_modules_manager()
            
            self.http_daemon.register(self.interface)
            self.http_daemon.register(self.istats)
            self.http_daemon.register(self.ichecks)
            self.http_daemon.register(self.ibroks)
            self.http_daemon.register(self.iprovider)
            self.http_daemon.register(self.iproxyitems)
            
            logger.debug("[schedulerdeamon] The Scheduler Interface uri is: %s" % self.uri)
            self.uri = self.http_daemon.uri
            logger.info("[schedulerdeamon] General interface is at: %s" % self.uri)
            self.do_mainloop()
        except Exception as exp:
            logger.critical("The daemon did have an unrecoverable error. It must exit.")
            logger.critical("You can log a bug to your Shinken integrator with the error message:")
            logger.critical("%s" % (traceback.format_exc()))
            raise
