#!/usr/bin/python
# -*- coding: utf-8 -*-

# Copyright (C) 2009-2012:
#    Gabes Jean, naparuba@gmail.com
#    Gerhard Lausser, Gerhard.Lausser@consol.de
#    Gregory Starck, g.starck@gmail.com
#    Hartmut Goebel, h.goebel@goebel-consult.de
#
# This file is part of Shinken.
#
# Shinken is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Shinken is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with Shinken.  If not, see <http://www.gnu.org/licenses/>.

import base64
import json
import logging
import pickle
import signal
import time
import traceback
import zlib
from collections import deque
from datetime import datetime
from hashlib import md5

from shinken.checks_container import checks_container
from shinken.compat import SHINKEN_PICKLE_PROTOCOL
from shinken.configuration_incarnation import PartConfigurationIncarnation
from shinken.daemon import Daemon, IStatsInterface
from shinken.http_client import HTTPExceptions
from shinken.log import logger, get_chapter_string, get_section_string, LoggerFactory
from shinken.macroresolver import MacroResolver
from shinken.misc.type_hint import TYPE_CHECKING
from shinken.network_exchange_sequencer import NetworkExchangeSequencer, BROKS_EXCHANGE_PROTOCOLS
from shinken.objects.proxyitem import proxyitemsmgr, proxyitemsgraph
from shinken.property import PathProp, IntegerProp
from shinken.runtime_stats.threads_dumper import WatchDogThreadDumper
from shinken.safepickle import SafeUnpickler
from shinken.satellite import BaseSatellite, IForArbiter as IArb, Interface
from shinken.scheduler import Scheduler
from shinken.schedulerlink import SchedulerDumpCheckAuthenticationVerifier
from shinken.util import force_memory_trimming, to_float_kb
from shinken.webui import bottlecore as bottle

if TYPE_CHECKING:
    from shinken.misc.type_hint import Optional, Dict, Any, List, Tuple, Union
    from shinken.log import PartLogger
    from shinken.objects.module import Modules

ZLIB_COMPRESSION_LEVEL = 1  # use level 1 for compression as it's fast and compress quite good
BASE64_INFLATE_RATE = 4. / 3  # base64 encoding increases data size by 4/3

logger_raw = LoggerFactory.get_logger()
logger_configuration = logger_raw.get_sub_part('CONFIGURATION')
CHAPTER_GIVE_BROKS = get_chapter_string('GIVE BROKS')
_BUS_COMMANDS_STR = get_section_string('BUS COMMANDS')


# Interface for pollers/reactionners
class IChecks(Interface):
    """
    Interface for Workers:
    They connect here and see if they are still OK with our running_id, if not, they must drop their checks
    """
    app: 'Scheduler'
    
    
    # poller or reactionner ask us actions
    def get_checks(self, do_checks=False, do_actions=False, poller_tags=None, reactionner_tags=None, worker_name='none', module_types=None, request_limit=-1, request_limit_cpu=-1):
        if module_types is None:
            module_types = ['fork']
        if reactionner_tags is None:
            reactionner_tags = ['None']
        if poller_tags is None:
            poller_tags = ['None']
        get_checks_time = time.time()
        do_checks = (do_checks == 'true')
        do_actions = (do_actions == 'true')
        request_limit = int(request_limit)
        request_limit_cpu = float(request_limit_cpu)
        
        if isinstance(poller_tags, str):
            poller_tags = json.loads(poller_tags)
        if isinstance(reactionner_tags, str):
            reactionner_tags = json.loads(reactionner_tags)
        if isinstance(module_types, str):
            module_types = json.loads(module_types)
        res = self.app.get_to_run_checks(do_checks, do_actions, poller_tags, reactionner_tags, worker_name, module_types, request_limit, request_limit_cpu)
        
        # Now we can prepare the result into a string
        t0 = time.time()
        _cpick = pickle.dumps(res, SHINKEN_PICKLE_PROTOCOL)
        t1 = time.time()
        _z_compr = zlib.compress(_cpick, 1)
        t2 = time.time()
        _b64 = base64.b64encode(_z_compr)
        t3 = time.time()
        cpick_time = (t1 - t0)
        compress_time = (t2 - t1)
        b64_time = (t3 - t2)
        
        get_checks_time = time.time() - get_checks_time
        if get_checks_time > 0.1 or cpick_time > 0.05 or compress_time > 0.05 or b64_time > 0.05:
            logger.warning('[ PERF ] get_checks:: total time:[%.3f] cpick:[%.3f] compress:[%.3f] b64encode:[%.3f] nb_check_send:[%d] b64_size:[%d]' % (get_checks_time, cpick_time, compress_time, b64_time, len(res), len(_b64)))
        return _b64
    
    
    get_checks.encode = 'raw'
    get_checks.need_lock = False
    
    
    # poller or reactionner are putting us results
    def put_results(self, results):
        if not self.app.is_activated():  # maybe we are a sleeping spare, we do not want to
            return False
        nb_received = len(results)
        if nb_received != 0:
            logger.debug('Received %d results' % nb_received)
        with self.app.waiting_results_lock:
            self.app.waiting_results.extend(results)
        return True
    
    
    put_results.method = 'POST'
    put_results.need_lock = False
    put_results.display_name = 'Execution results send from a poller or reactionner'


class IBroks(Interface):
    """ Interface for Brokers:
They connect here and get all broks (data for brokers). Data must be ORDERED! (initial status BEFORE update...) """
    app: 'Scheduler'
    
    
    # Maybe it was not registered as it should, if so, do it for it
    def __create_broker_entry(self, broker_name):
        self.app.create_broker_entry(broker_name)
    
    
    def _get_broks_packet_for_old_protocol(self, broker_name, give_brok_logger):
        # type: (str, PartLogger) -> bytes
        # get all broks in one shot - pre-02.08.02 protocol
        res = self.app.get_broks(broker_name)
        _cpick = pickle.dumps(res, SHINKEN_PICKLE_PROTOCOL)
        _compressed = zlib.compress(_cpick, 1)
        brok_count = len(res)
        give_brok_logger.sub_part_logger('OLD VERSION').debug('Sending %d broks (%.1fkB) to broker "%s"' % (brok_count, to_float_kb(len(_compressed)), broker_name))
        
        # got only one global counter for broks
        self.app.nb_broks_send += brok_count
        return _compressed
    
    
    @staticmethod
    def _get_chunks_from_data_if_needed(debug_is_on, give_brok_logger, protocol, packet_size, data_to_send, brok_count):
        # type: (bool, PartLogger, int, int, bytes, int) -> Union[deque,bytes]
        if protocol == BROKS_EXCHANGE_PROTOCOLS.SPLIT_FULL_DATA_PROTOCOL and len(data_to_send) > packet_size > 0:
            i = 0
            data_len = len(data_to_send)
            chunks = deque()
            data_view = memoryview(data_to_send)
            while i < data_len:
                chunk_end = i + packet_size
                chunks.append(data_view[i:chunk_end].tobytes())
                i = chunk_end
            if debug_is_on:
                give_brok_logger.debug('Will send split packet of %s broks with %s chunks, full packet size %s md5sum:[ %s ]' % (brok_count, len(chunks), data_len, md5(data_view.tobytes()).hexdigest()))
            del data_view
            data_to_send = chunks
        return data_to_send
    
    
    @staticmethod
    def _manage_remaining_data(debug_is_on, give_brok_logger, requested_protocol, remaining_chunks, pending_data_protocol, pending_data_to_send, pending_data_brok_count):
        # type: (bool, PartLogger, int, int, int, Union[bytes,deque], int) -> Tuple[int, Optional[deque]]
        remaining_pending_chunks = len(pending_data_to_send) - 1
        protocol_is_ok = pending_data_protocol == requested_protocol == BROKS_EXCHANGE_PROTOCOLS.SPLIT_FULL_DATA_PROTOCOL
        pending_data_is_ok = isinstance(pending_data_to_send, deque)
        chunk_number_is_ok = remaining_pending_chunks == remaining_chunks
        if protocol_is_ok and pending_data_is_ok and chunk_number_is_ok and remaining_chunks > 0:
            pending_data_to_send.popleft()
            if debug_is_on:
                give_brok_logger.debug('Will continue sending split packet of %s broks with %s remaining chunks' % (pending_data_brok_count, remaining_pending_chunks))
        else:
            if remaining_chunks > 0 or (pending_data_is_ok and remaining_pending_chunks > 0):
                give_brok_logger.warning('Discarding packet of %s broks because of [%s%s%s ]' % (
                    pending_data_brok_count,
                    ' wrong protocol' if not protocol_is_ok else '',
                    ' wrong stored data type' if not pending_data_is_ok else '',
                    (' wrong chunk number asked (%s instead of %s)' % (remaining_chunks, remaining_pending_chunks)) if not chunk_number_is_ok else ''
                ))
            pending_data_to_send = None
            pending_data_brok_count = 0
        return pending_data_brok_count, pending_data_to_send
    
    
    @staticmethod
    def _detect_protocol_and_configuration_according_to_available_parameters(give_brok_logger, packet_size, seq_number, protocol_version, remaining_chunks):
        # type: (PartLogger, Optional[str], str, str, str) -> Tuple[int,int,int,int]
        
        size_hint = 0
        
        # Old versions of Scheduler ( NO_LIMIT_AND_NO_ACK_PROTOCOL ) only knew "bname" parameter
        if packet_size is None and not seq_number:
            # old call, so old protocol too
            protocol = BROKS_EXCHANGE_PROTOCOLS.NO_LIMIT_AND_NO_ACK_PROTOCOL
            packet_size = 0
            remaining_chunks = 0
        else:
            if packet_size:
                try:
                    packet_size = int(packet_size)
                    if packet_size < 0:
                        packet_size = 0
                except:
                    packet_size = 0
            else:
                packet_size = 0
            
            size_hint = float(packet_size) * BASE64_INFLATE_RATE  # desired size must be aware of overload from base64
            
            protocol = BROKS_EXCHANGE_PROTOCOLS.BY_BROK_SIZE_ESTIMATION_LIMITED_DATA_SIZE_PROTOCOL
            
            if protocol_version:
                try:
                    protocol = int(protocol_version)
                    if protocol == BROKS_EXCHANGE_PROTOCOLS.SPLIT_FULL_DATA_PROTOCOL:
                        size_hint = 0
                    elif protocol not in [BROKS_EXCHANGE_PROTOCOLS.NO_LIMIT_AND_NO_ACK_PROTOCOL, BROKS_EXCHANGE_PROTOCOLS.BY_BROK_SIZE_ESTIMATION_LIMITED_DATA_SIZE_PROTOCOL]:
                        protocol = BROKS_EXCHANGE_PROTOCOLS.BY_BROK_SIZE_ESTIMATION_LIMITED_DATA_SIZE_PROTOCOL
                        raise ValueError()
                except:
                    give_brok_logger.error('Wrong protocol number [ %s ] defaulting to [ %s ]' % (protocol_version, protocol))
            
            if remaining_chunks:
                try:
                    remaining_chunks = int(remaining_chunks)
                except:
                    remaining_chunks = 0
            else:
                remaining_chunks = 0
        
        return protocol, packet_size, size_hint, remaining_chunks
    
    
    @staticmethod
    def _prepare_response(debug_is_on, give_brok_logger, requested_protocol, seq_number, data_to_send, brok_count, nb_remaining_broks):
        # type: (bool, PartLogger, int, str, Union[bytes,deque], int, int) -> bytes
        if requested_protocol == BROKS_EXCHANGE_PROTOCOLS.SPLIT_FULL_DATA_PROTOCOL:
            
            if isinstance(data_to_send, deque):
                # Split mode activated, sending current chunk
                remaining_chunks_nb = len(data_to_send) - 1
                data_to_send = data_to_send[0]
            else:
                # only one chunk to send, data is ready to go (bytes)
                remaining_chunks_nb = 0
            
            if debug_is_on:
                give_brok_logger.debug('Sending split packet of %s broks, remaining chunks: %s current chunk size: %s' % (brok_count, remaining_chunks_nb, len(data_to_send)))
            
            response = pickle.dumps((requested_protocol, seq_number, data_to_send, nb_remaining_broks, remaining_chunks_nb), SHINKEN_PICKLE_PROTOCOL)
        else:
            response = pickle.dumps((requested_protocol, seq_number, data_to_send, nb_remaining_broks), SHINKEN_PICKLE_PROTOCOL)
        return response
    
    
    # A broker asks us broks
    def get_broks(self, bname, packet_size=None, seq_number='', shard_id='', configuration_incarnation_uuid='', protocol_version='', remaining_chunks=''):
        # type: (str, Optional[str], str, str, str, str, str) -> bytes
        
        debug_is_on = logger.is_debug()
        give_brok_logger = LoggerFactory.get_logger(CHAPTER_GIVE_BROKS).get_sub_part(bname)
        
        requested_protocol, packet_size, size_hint, requested_remaining_chunks = self._detect_protocol_and_configuration_according_to_available_parameters(give_brok_logger, packet_size, seq_number, protocol_version, remaining_chunks)
        
        # IMPORTANT: always the satellite lock BEFORE brokers lock
        with self.app.sched_daemon.satellite_lock:  # protect against loading a new configuration
            
            # SEF-10118: Broker has already loaded our version, and does not need initial broks => activate its brok's queue
            if shard_id and configuration_incarnation_uuid and bname not in self.app.already_generated_initial_broks_for_this_configuration:
                daemon_incarnation = self.app.get_daemon_incarnation()
                if shard_id == str(daemon_incarnation.get('shard_id', '')) and configuration_incarnation_uuid == daemon_incarnation.get('configuration_incarnation_uuid', ''):
                    if debug_is_on:
                        give_brok_logger.debug('Activate queue for Broker %s' % bname)
                    self.app.already_generated_initial_broks_for_this_configuration.add(bname)
            
            with self.app.brokers_lock:  # protect against multiple brokers calls
                # Maybe it was not registered as it should, if so, do it for it
                if bname not in self.app.brokers:
                    self.__create_broker_entry(bname)
                
                broker_entry = self.app.brokers[bname]
                
                # Old protocol asked? give ALL broks in the old protocol (all data, and no ack)
                if requested_protocol == BROKS_EXCHANGE_PROTOCOLS.NO_LIMIT_AND_NO_ACK_PROTOCOL:
                    _compressed = self._get_broks_packet_for_old_protocol(bname, give_brok_logger)
                    _b64 = base64.b64encode(_compressed)
                    return _b64
                
                # Here are protocols requiring acknowledgement (with seq number)
                data_to_send = None
                
                # check previous chunk was received, resend it as needed
                exchange_sequencer = broker_entry['network_sequencer']  # type: NetworkExchangeSequencer
                if exchange_sequencer.have_data():  # do not look for valid seq number if it's our first send
                    if not exchange_sequencer.is_valid_sequence_number(seq_number):
                        # we have an un-acknowledged packet. resend it. Restore protocol version in order to have data correctly interpreted
                        requested_protocol, data_to_send, brok_count = exchange_sequencer.get_unacknowledged_data()
                        give_brok_logger.warning('Packet number mismatch "%s" != "%s" : Re-sending broks (%.1fkB)' % (exchange_sequencer.get_sequence_number(), seq_number, to_float_kb(len(data_to_send))))
                    else:
                        # last chunk has been received, is there remaining data (compatible with requested protocol) ?
                        _protocol, data_to_send, brok_count = exchange_sequencer.ack(seq_number)
                        brok_count, data_to_send = self._manage_remaining_data(debug_is_on, give_brok_logger, requested_protocol, requested_remaining_chunks, _protocol, data_to_send, brok_count)
                
                # We do not have previous data to resent, so we can get new broks
                if data_to_send is None:
                    # get broks until we reach max_size
                    res = self.app.get_broks(bname, size_hint=size_hint)
                    brok_count = len(res)
                    _cpick = pickle.dumps(res, SHINKEN_PICKLE_PROTOCOL)
                    data_to_send = zlib.compress(_cpick, ZLIB_COMPRESSION_LEVEL)
                    del _cpick
                    del res
                    
                    nb_remaining_broks = len(broker_entry['broks'])
                    still_to_send_string = '' if nb_remaining_broks == 0 else '[chunk, still %4d to send] ' % nb_remaining_broks
                    give_brok_logger.info('Sending %4d broks (%.1fkB) %s' % (brok_count, to_float_kb(len(data_to_send)), still_to_send_string))
                    
                    # got only one global counter for broks
                    self.app.nb_broks_send += brok_count
                    
                    data_to_send = self._get_chunks_from_data_if_needed(debug_is_on, give_brok_logger, requested_protocol, packet_size, data_to_send, brok_count)
                nb_remaining_broks = len(broker_entry['broks'])  # give to the broker how much broks we are in queue for it
                
                # Store data for acknowledge
                seq_number = exchange_sequencer.push(data_to_send, requested_protocol, brok_count)
                
                # Now we can prepare the result into a string
                response = self._prepare_response(debug_is_on, give_brok_logger, requested_protocol, seq_number, data_to_send, brok_count, nb_remaining_broks)
                _b64 = base64.b64encode(response)
            
            return _b64
    
    
    get_broks.encode = 'raw'
    get_broks.need_lock = False
    
    
    # A broker is a new one, if we do not have
    # a full broks, we clean our broks, and
    # fill it with all new values
    def fill_initial_broks(self, bname):
        # type: (str) -> str
        if not self.app.is_activated():  # maybe we are a sleeping spare, we do not want to generate it
            return 'false'
        
        # Maybe the scheduler is NOT ready (did not load a conf nor load the retention, if so wait)
        # IMPORTANT: wait OUTSIDE THE LOCK PART!  SEF-5027
        if not self.app.wait_main_loop_has_started(120):
            logger.error('The broker %s ask for initial broks, but the scheduler has not started in less than 120s. Retrying.' % bname)
            return 'false'
        
        # Register the broker, so we are asking for an initial broks generations
        self.app.initial_broks_factory.register_for_generation(bname)
        
        # During this time, the InitialBroksFactory is generating broks for us, and
        # also others brokers that are in the same step
        
        # Now wait until it's done
        while True:  # Will exit because requestor will time out
            is_completed = self.app.initial_broks_factory.is_completed(bname)
            if is_completed:
                return 'true'  # this broker can get back and have it's broks
            time.sleep(0.1)  # wait until it's done
    
    
    fill_initial_broks.need_lock = False


class IStats(IStatsInterface):
    # Interface for various stats about scheduler activity
    app: 'Shinken'
    
    
    def get_raw_stats(self, param='', module=''):
        # type: (str, str) -> Dict[str, Any]
        return super(IStats, self).get_raw_stats(param=param, module=module)
    
    
    get_raw_stats.doc = 'get stats of the daemon'
    get_raw_stats.need_lock = False
    
    
    # SEF-1143
    def _daemon_get_raw_stats(self, param='', module_wanted=None):
        # type: (str, List) -> Dict[str, Any]
        if module_wanted is None:
            module_wanted = []
        sched = self.app.sched
        
        if not self.app.have_configuration or sched.new_configuration_load_in_progress:
            logger.info('[scheduler][%s] Someone asks to get the raw stats (deamon Health) but the scheduler is not initialized' % sched.instance_id)
            return {
                'have_conf': False
            }
        nb_hosts = 0
        nb_clusters = 0
        if hasattr(sched, 'hosts'):
            for host in sched.hosts:
                if host.got_business_rule:
                    nb_clusters += 1
                else:
                    nb_hosts += 1
        
        # Get the average latency of check since last_check (5min or look in param)
        last_check = time.time() - 300
        if param and '-' in param:
            s_last_check = param.split('-')[1]
            if s_last_check.isdigit():
                last_check = int(s_last_check)
        
        elements = []
        if hasattr(sched, 'services'):
            elements.extend(sched.services)
        if hasattr(sched, 'hosts'):
            elements.extend(sched.hosts)
        
        latencies = [s.latency for s in elements if s.last_chk > last_check]
        notifications_latencies = [s.notification_latency for s in elements if s.last_notification > last_check]
        events_latencies = [s.eventhandler_latency for s in elements if s.last_event_handler > last_check]
        
        average_latency = 0
        if len(latencies) != 0:
            average_latency = float(sum(latencies)) / len(latencies)
        average_notifications_latency = 0
        if len(notifications_latencies) != 0:
            average_notifications_latency = float(sum(notifications_latencies)) / len(notifications_latencies)
        average_events_latency = 0
        if len(events_latencies) != 0:
            average_events_latency = float(sum(events_latencies)) / len(events_latencies)
        
        passive_pollers = []
        for poller in sched.pollers.values():
            if poller['passive']:
                d_poller = {
                    'addr': '%s:%s' % (poller['address'], poller['port']),
                    'name': poller['name'],
                    'con' : (poller.get('con', None) is not None),
                }
                d_poller['info'] = poller.get('con_info', 'Scheduler did not try to connect to poller [%s]' % d_poller['addr'])
                d_poller['latency'] = poller.get('latency', -1) if d_poller['con'] else -1
                
                passive_pollers.append(d_poller)
        
        info_pollers = []
        info_reactionners = []
        
        for executor_name, executor_stat in sched.stat_by_executor.items():
            info_executor = {
                'name'       : executor_name,
                'realm'      : executor_stat.get('realm', ''),
                'tags'       : executor_stat.get('tag', ''),
                'done_by_sec': executor_stat.get('avg_nb_checks_received', 0),
            }
            
            if executor_stat.get('type', 'Poller') == 'Poller':
                info_pollers.append(info_executor)
            else:
                info_reactionners.append(info_executor)
        
        sched.cleanup_rogue_satellite()
        arbiter_uri = ''
        last_arbiter_trace = self.app.get_arbiter_trace()
        if last_arbiter_trace:
            arbiter_uri = last_arbiter_trace.get('uri', '')
        
        checks_n_actions_stats = getattr(sched, 'checks_n_actions_stats', {})
        # DEPRECATED : keep late_checks and late_checks_by_tags for check_shinken compatibility
        late_checks = checks_n_actions_stats.get('check', {}).get('nb_late', 0)
        late_checks_by_tags = checks_n_actions_stats.get('check', {}).get('late_by_tags', {})
        checks_n_actions_stats['check']['latency'] = average_latency
        checks_n_actions_stats['notification']['latency'] = average_notifications_latency
        checks_n_actions_stats['eventhandler']['latency'] = average_events_latency
        
        raw_stats = {
            'arbiter_uri'                          : arbiter_uri,
            'realm'                                : self.app.realm,
            'have_conf'                            : self.app.already_have_conf,
            'activated'                            : self.app.is_activated(),
            'spare'                                : self.app.spare,
            'nb_hosts'                             : nb_hosts,
            'nb_clusters'                          : nb_clusters,
            'nb_checks'                            : len(getattr(sched, 'services', ())),
            'late_checks'                          : late_checks,  # DEPRECATED since 02.07.06-Patched-07 #SEF-7573
            'late_checks_by_tags'                  : late_checks_by_tags,  # DEPRECATED since 02.07.06-Patched-07 #SEF-7573
            'average_latency'                      : average_latency,  # DEPRECATED since 02.07.06-Patched-07 #SEF-7573
            'checks_todo_by_sec'                   : sched.avg_checks_todo_by_sec.get_avg(0),
            'notifications_todo_by_sec'            : sched.avg_notification_todo_by_sec.get_avg(0),
            'event_handlers_todo_by_sec'           : sched.avg_event_handler_todo_by_sec.get_avg(0),
            'passive_pollers'                      : passive_pollers,
            'info_pollers'                         : info_pollers,
            'info_reactionners'                    : info_reactionners,
            'rogue_pollers'                        : sched.rogue_satellites.get('Poller', {}),
            'rogue_reactionners'                   : sched.rogue_satellites.get('Reactionner', {}),
            'http_errors_count'                    : self.app.http_errors_count,
            'loop_turn_time_avg'                   : sched.loop_time_avg.get_avg(0),
            'checks_warning_threshold_cpu_usage'   : sched.checks_warning_threshold_cpu_usage[::-1][:5],  # last 5 elements in reverse order
            'checks_warning_threshold_cpu_usage_nb': len(sched.checks_warning_threshold_cpu_usage),
            'save_retention_time'                  : sched.scheduler_stat.get('save_retention_time', 0),
            'save_retention_error'                 : sched.scheduler_stat.get('save_retention_error', ''),
            'retention_save_interval'              : sched.get_retention_save_interval(),
            'last_retention_save'                  : sched.scheduler_stat.get('last_retention_save', ''),
            'last_retention_save_try'              : sched.scheduler_stat.get('last_retention_save_try', ''),
            'last_retention_load_epoch'            : sched.scheduler_stat.get('last_retention_load_epoch', ''),
            'last_retention_load_duration'         : sched.scheduler_stat.get('last_retention_load_duration', ''),
            'avg_checks_received_schedule_by_sec'  : sched.avg_checks_received_schedule_by_sec.get_avg(0),
            'avg_checks_received_force_by_sec'     : sched.avg_checks_received_force_by_sec.get_avg(0),
            'avg_checks_received_retry_by_sec'     : sched.avg_checks_received_retry_by_sec.get_avg(0),
            'avg_checks_received_dependency_by_sec': sched.avg_checks_received_dependency_by_sec.get_avg(0),
            'checks_n_actions_stats'               : checks_n_actions_stats,
        }
        if self.app.modules:
            raw_stats['module_stats'] = self._get_module_stats(getattr(self.app, 'modules_manager', None), param)
        
        return raw_stats
    
    
    def get_item(self, uuid, item_type):
        if not self.app.sched.hosts:
            return 'scheduler not ready'
        if item_type == 'host':
            host = self.app.sched.hosts.find_by_uuid(uuid)
            if not host:
                return 'host !%s! not found' % uuid
            return self._get_flat_item(host)
        if item_type == 'check':
            host_uuid = uuid.split('-')[0]
            check_uuid = uuid.split('-')[1]
            host = self.app.sched.hosts.find_by_uuid(host_uuid)
            if not host:
                return 'host !%s! not found' % host_uuid
            check = next((c for c in host.services if c.uuid == check_uuid), None)
            if not check:
                return 'check !%s! not found' % check_uuid
            
            return self._get_flat_item(check)
        
        return 'unsupported type !%s!' % item_type
    
    
    @staticmethod
    def _get_flat_item(item):
        to_return = {
            'name'                 : item.get_full_name(),
            'state'                : item.state,
            'state_id'             : item.state_id,
            'state_type'           : item.state_type,
            'state_type_id'        : item.state_type_id,
            
            'is_problem'           : item.is_problem,
            'is_impact'            : item.is_impact,
            'source_problems'      : ','.join((i.get_full_name() for i in item.source_problems)),
            'impacts'              : ','.join((i.get_full_name() for i in item.impacts)),
            'state_validity_period': item.state_validity_period,
        }
        return to_return
    
    
    def call_for_retention(self):
        if self.app.debug:
            self.app.sched.update_retention_file(True)
        else:
            return 'You cannot make this call if your are not in debug mode'
    
    
    call_for_retention.need_lock = True
    
    
    # A user can ask (with the help of the support) to dump in a /tmp csv file about
    # * this scheduler (name, dump time, realm)
    # * all checks with epoch_to_go, ids, check/retry interval, cpu time
    # NOTE: there is a protection to not allow anyone to call it (fill disk, etc)
    #       the password is ....
    #       .... really? You did think I'll put it in the source code? Nope
    def export_data(self, authentication_token):
        # type: (str) -> Dict[str,Any]
        
        export_logger = LoggerFactory.get_logger('EXPORT DATA')
        
        if not self.app.scheduler__export_data__enabled:
            export_logger.warning('Request from Arbiter IP〖%(ip)s〗fails. Export is disabled by configuration parameter 〖scheduler__export_data__enabled〗' % {'ip': bottle.request.environ.get('REMOTE_ADDR', '(unknown)')})
            return {'state': 'ERROR', 'error': 'The Scheduler configuration disabled data export with the scheduler__export_data__enabled parameter.'}
        
        # Too early, the scheduler is just booting
        if not hasattr(self.app.sched, 'instance_name'):
            export_logger.warning('Request from Arbiter IP〖%(ip)s〗fails. Scheduler is not ready (initialisation ongoing)' % {'ip': bottle.request.environ.get('REMOTE_ADDR', '(unknown)')})
            return {'state': 'ERROR', 'error': 'The Scheduler does not have any configuration'}
        
        # Spare
        if not self.app.is_activated():
            return {'state': 'OK', 'skip': True, 'data': self.app.sched.get_export_data_struct()}
        
        # a void authentication_token mean that the caller is asking for an anonymous dump, it's always ok
        dump_with_private_information = False
        if authentication_token != '':
            
            # Give the token to the verifier. If the caller did ask for auth but fail, just give up and give it the error
            scheduler_name = self.app.sched.instance_name
            daemon__export_data__password = self.app.daemon__export_data__password
            
            is_ok, err = SchedulerDumpCheckAuthenticationVerifier().is_authentication_token_authorized(authentication_token, scheduler_name, daemon__export_data__password)
            if not is_ok:
                export_logger.warning('Request from Arbiter IP〖%(ip)s〗fails. %(error)s' % {
                    'ip'   : bottle.request.environ.get('REMOTE_ADDR', '(unknown)'),
                    'error': err
                })
                return {'state': 'ERROR', 'error': err}
            # The token was valid, we can dump with private info, like names
            dump_with_private_information = True
        
        # Maybe the scheduler is NOT initialized, then error, because we don't know if we will have jobs or not
        if not self.app.have_configuration:
            export_logger.warning('Request from Arbiter IP〖%(ip)s〗fails. Scheduler is not ready (waiting for configuration)' % {'ip': bottle.request.environ.get('REMOTE_ADDR', '(unknown)')})
            return {'state': 'ERROR', 'error': 'The Scheduler does not have any configuration'}
        
        # NOTE: Maybe the scheduler is just a spare, then it will just dump nothing, it's not an error case
        start_time = time.time()
        data = self.app.sched.export_data(self.app.realm, dump_with_private_information)
        export_logger.info('%(type)s export of %(nb_elements)s elements in %(duration).3fs from Arbiter IP〖%(ip)s〗' % {
            'type'       : 'full' if dump_with_private_information else 'anonymous',
            'nb_elements': len(data.get('entries', [])),
            'duration'   : time.time() - start_time,
            'ip'         : bottle.request.environ.get('REMOTE_ADDR', '(unknown)')
        })
        return {'state': 'OK', 'data': data}
    
    
    export_data.need_lock = False  # already use a lock on checks, do not need the big one


class IForArbiter(IArb):
    """ Interface for Arbiter. We ask him a for a conf and after that listen for instructions
        from the arbiter. The arbiter is the interface to the administrator, so we must listen
        carefully and give him the information he wants. Which could be for another scheduler """
    
    run_external_commands_ack = 'OK'
    
    app = None  # type: Shinken
    
    
    # arbiter is sending us an external command.
    # it can send us global command, or specific ones
    def run_external_commands(self, cmds, receiver_name=''):
        if hasattr(self.app.sched, 'external_command'):
            logger.info('%s Running %s shinken external commands (like recheck, set acknowledge, etc) received from the Receiver %s' % (_BUS_COMMANDS_STR, len(cmds), receiver_name))
            self.app.sched.run_external_commands(cmds)
            self.run_external_commands_ack = 'OK'
        else:
            if self.run_external_commands_ack != 'scheduler is not ready':
                logger.info('%s Get shinken external commands (like recheck, set acknowledge, etc) from the Receiver %s but i am not ready. Waiting for configuration from Arbiter.' % (_BUS_COMMANDS_STR, receiver_name))
                self.run_external_commands_ack = 'scheduler is not ready'
        return self.run_external_commands_ack
    
    
    run_external_commands.method = 'POST'
    run_external_commands.display_name = 'Shinken external commands (like recheck, set acknowledge, etc) received from Arbiter or Receiver server'
    
    
    def put_conf(self, conf):
        self.app.sched.die()
        super(IForArbiter, self).put_conf(conf)
        self.app.early_scheduler_configuration_load()
    
    
    put_conf.method = 'POST'
    put_conf.need_lock = False
    put_conf.display_name = 'Configuration reception from an Arbiter server'
    
    
    # Call by arbiter if it thinks we are running, but we must not (like
    # if I was a spare that take a conf but the master returns, I must die
    # and wait for a new conf)
    # Us: No please...
    # Arbiter: I don't care, hasta la vista baby!
    # Us: ... <- Nothing! We are dead! you didn't follow or what??
    def wait_new_conf(self):
        with self.app.satellite_lock:
            super(IForArbiter, self).wait_new_conf()
            self.app.wait_new_conf()
    
    
    wait_new_conf.need_lock = False
    
    doc = 'Get the current running id of the daemon (scheduler)'
    
    
    def get_daemon_incarnation(self):
        return self.app.sched.get_daemon_incarnation()
    
    
    get_daemon_incarnation.need_lock = False
    get_daemon_incarnation.doc = doc
    
    
    # Called by the arbiter to get the current satellites names
    # broker, pollers, receivers and reactionners, so it can detect
    # an old one
    def get_current_satellites(self):
        with self.app.satellite_lock:
            return self.app.sched.get_current_satellites()
    
    
    get_current_satellites.need_lock = False
    
    
    def satellites_to_remove(self, to_remove):
        self.app.satellites_to_remove(to_remove)
    
    
    satellites_to_remove.method = 'POST'
    satellites_to_remove.need_lock = False
    satellites_to_remove.display_name = 'Order from Arbiter server to remove dead daemon connections'
    
    doc = 'disabled'
    
    
    # Old call for scheduler, to be removed in the future when not install as a patch
    def what_i_managed(self):
        return None
    
    
    what_i_managed.need_lock = False
    what_i_managed.doc = doc
    
    doc = 'Return the managed configuration ids (internal)'
    
    
    # Arbiter ask me which shard I do manage
    def get_currently_managed_configuration(self):
        managed_configuration = self.app.get_currently_managed_configuration()
        logger.debug("The arbiter asked me what I manage. It's %s" % managed_configuration)
        return managed_configuration
    
    
    get_currently_managed_configuration.need_lock = False
    get_currently_managed_configuration.doc = doc


class IProxyItems(Interface):
    app: 'Shinken'
    
    
    def get_proxy_states(self, since):
        t0 = time.time()
        
        # We get a time since the last ask from the other one, this time is the last one WE (locally) give
        # to the other daemon, because this last one can have a different time than ourselves
        since = int(since)
        proxies = proxyitemsmgr.get_export_state_since(since, self.app.sched.elements_uuids)
        
        # We give the time of our local time, because the other node can have a different time to ask
        nearly_now = int(time.time()) - 1  # -1 to manage round of time, to be sure to do not lost a state
        
        to_return = {
            'diff_since': nearly_now,
            'states'    : proxies
        }
        to_return_encode = base64.b64encode(zlib.compress(pickle.dumps(to_return, SHINKEN_PICKLE_PROTOCOL)))
        if len(proxies) > 0:
            logger.debug("EXPORTING [%d states] since [%s] in [%.3fs]" % (len(proxies), datetime.fromtimestamp(since).strftime('%d-%m-%Y %H:%M:%S'), time.time() - t0))
        return to_return_encode
    
    
    get_proxy_states.encode = 'raw'


# The main app class
class Shinken(BaseSatellite):
    # Should we look at passive property for external connection
    is_using_passive_connection_information = True  # the scheduler is only connection to passive elements
    
    properties = BaseSatellite.properties.copy()
    properties.update({
        'pidfile'  : PathProp(default='schedulerd.pid'),
        'port'     : IntegerProp(default='7768'),
        'local_log': PathProp(default='schedulerd.log'),
    })
    
    
    # Create the shinken class:
    # Create a Pyro server (port = arvg 1)
    # then create the interface for arbiter
    # Then, it waits for a first configuration
    def __init__(self, config_file, is_daemon, do_replace, debug, debug_file, profile='', daemon_id=0, force_check_spread_out=False):
        BaseSatellite.__init__(self, 'scheduler', config_file, is_daemon, do_replace, debug, debug_file, daemon_id)
        self.force_check_spread_out = force_check_spread_out
        
        self.conf = None
        self.override_conf = None
        self.modules = None
        self.satellites = None
        self.macro_resolver = None
        self._pending_shard_id = None
        
        self.sched = Scheduler(self)
        
        # Now the interface
        self.ibroks = IBroks(self.sched)
        self._add_http_interface(IForArbiter(self))
        self._add_http_interface(IStats(self))
        self._add_http_interface(IChecks(self.sched))
        self._add_http_interface(self.ibroks)
        self._add_http_interface(IProxyItems(self))
        
        self.must_run = True
        
        # TODO to del
        self.uri = None
        self.uri2 = None
        
        # And possible links for satellites only pollers is use
        self.pollers = {}
        self.reactionners = {}
        self.brokers = {}
        
        self.realm = ''
        self.daemon__export_data__password = ''  # This password is used for checks dumps: we only add names if the password is OK
        self.scheduler__export_data__enabled = True  # If the dump is allowed, even in anonymous mode
        
        # For early configuration load: we need to have this information if the arbiter
        # ask us what we are managing, but we should not let the Broks() be updated by this new thing
        # until the self.sched updated is fully load with the new information
        self._have_pending_configuration = False
        self.partial_configuration_incarnation = None  # type: Optional[PartConfigurationIncarnation]
        self._partial_is_activated = False
    
    
    def _reset_managed_configuration(self):
        self._have_pending_configuration = False
        self.partial_configuration_incarnation = None
        self._partial_is_activated = False
    
    
    def do_stop(self):
        self.go_to_idle_mode(modules=self.modules)
        super(Shinken, self).do_stop()
    
    
    def wait_new_conf(self):
        with self.satellite_lock:
            super(Shinken, self).wait_new_conf()
            self.sched.die()
            self._reset_managed_configuration()
    
    
    def satellites_to_remove(self, to_remove):
        brokers_to_remove = to_remove['broker']
        logger.debug('[CONFIGURATION] The arbiter asks us to remove brokers that are no more need: %s' % (','.join(brokers_to_remove)))
        for broker_name in brokers_to_remove:
            self.sched.remove_broker(broker_name)
    
    
    def compensate_system_time_change(self, difference):
        """ Compensate a system time change of difference for all hosts/services/checks/notifications """
        super(Shinken, self).compensate_system_time_change(difference)
        # We only need to change some value
        self.program_start = max(0, self.program_start + difference)
        
        # Then we compensate all host/services
        for host in getattr(self.sched, 'hosts', []):
            host.compensate_system_time_change(difference)
        for service in getattr(self.sched, 'services', []):
            service.compensate_system_time_change(difference)
        
        # Now all checks and actions
        for c in list(self.sched.checks.values()):
            # Already launch checks should not be touch
            if c.status == 'scheduled' and c.t_to_go is not None:
                t_to_go = c.t_to_go
                ref = c.ref
                new_t = max(0.0, t_to_go + difference)
                if ref.check_period is not None:
                    # But it's no so simple, we must match the timeperiod
                    new_t = ref.check_period.get_next_valid_time_from_t(new_t)
                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    c.simulate_execution_return(exit_status=2, output='(Error: there is no available check time after time change!)', check_time=time.time(), execution_time=0)
                    # No more need in index for poller
                    checks_container.delete_job_from_index_seconds(c)
                else:
                    c.force_time_change(new_t)
                    checks_container.do_index_job_execution(c)
                    ref.next_chk = new_t
        
        # Now all checks and actions
        for c in list(self.sched.actions.values()):
            # Already launch checks should not be touch
            if c.status == 'scheduled':
                t_to_go = c.t_to_go
                
                #  Event handler do not have a ref
                ref = getattr(c, 'ref', None)
                new_t = max(0, t_to_go + difference)
                
                # Notification should be checked with notification_period
                if c.is_a == 'notification':
                    if ref.notification_period:
                        # But it's no so simple, we must match the timeperiod
                        new_t = ref.notification_period.get_next_valid_time_from_t(new_t)
                    # And got a creation_time variable too
                    c.creation_time = c.creation_time + difference
                
                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    c.simulate_execution_return(exit_status=2, output='(Error: there is no available check time after time change!)', check_time=time.time(), execution_time=0)
                else:
                    c.t_to_go = new_t
    
    
    def manage_signal(self, sig, frame):
        logger.info("[schedulerdeamon] Received a SIGNAL %s" % sig)
        # If we got USR1, just dump memory
        if sig == signal.SIGUSR1 or sig == signal.SIGPWR:  # Both signals are for the scheduler directly
            Daemon.manage_signal(self, sig, frame)
        elif sig == signal.SIGUSR2:  # usr2, dump objects
            self.sched.need_objects_dump = True
        else:  # if not known, die :)
            logger.info("[schedulerdeamon] The SIGNAL %s is interpreted as a stop daemon." % sig)
            self.sched.die()
            self.must_run = False
            Daemon.manage_signal(self, sig, frame)
    
    
    def do_loop_turn(self):
        # Ok, now the conf
        self.wait_for_initial_conf()
        if not self.new_conf:
            return
        self.setup_new_conf()
        self.sched.run()
        self.check_and_del_zombie_modules(restart_dead=False)
        # Be sure that we did clean all the memory from here, as we can have dropped all our objects
        force_memory_trimming(context='because the Arbiter asked the Scheduler to wait a new configuration')
    
    
    def early_scheduler_configuration_load(self):
        # Maybe it's an old arbiter (<02.08.02) that try to give us the conf
        if self.new_conf is None:
            logger.error('The configuration received is invalid, skipping it')
            return
        self._partial_is_activated = self.new_conf['activated']
        if self._partial_is_activated:
            # Protection against old configuration version
            if 'shard_id' not in self.new_conf:
                logger.error('The configuration received is invalid, from an old arbiter. Skipping it')
                return
            _pending_shard_id = self.new_conf['shard_id']
        else:  # spare not active
            _pending_shard_id = 0
        _pending_instance_name = self.new_conf['instance_name']
        self.partial_configuration_incarnation = PartConfigurationIncarnation(self.configuration_incarnation, _pending_shard_id, _pending_instance_name, is_active=self._partial_is_activated)
        self._have_pending_configuration = True
        if self.partial_configuration_incarnation.get_part_id() != 0:
            logger_configuration.info('Ready to load the configuration %s' % self.partial_configuration_incarnation)
    
    
    def after_scheduler_configuration_load(self, conf_information):
        # type: (Dict) -> None
        # #SEF-9780 ensure configuration identifiers are up-to-date with applied configuration, as put_conf from Arbiter cannot be done with a lock ...
        if self.new_conf:
            logger_configuration.info(
                'Arbiter sent a new configuration [%s] while we where loading configuration [%s], skipping configuration finalisation' % (self.new_conf.get('configuration_incarnation', None), conf_information['configuration_incarnation']))
            return
        conf_incarnation = conf_information['configuration_incarnation']
        if not conf_incarnation:
            logger_configuration.error('Applied a configuration without a valid configuration_incarnation, keeping old value %s' % self.configuration_incarnation)
            return
        self.configuration_incarnation = conf_information['configuration_incarnation']
        self._partial_is_activated = conf_information['activated']
        if self._partial_is_activated and 'shard_id' not in conf_information:
            logger_configuration.error('Could not find shard_id of applied configuration, unable to update partial_configuration_incarnation value, keeping old value %s' % self.partial_configuration_incarnation)
            return
        self.partial_configuration_incarnation = PartConfigurationIncarnation(self.configuration_incarnation, conf_information['shard_id'], conf_information['instance_name'], is_active=self._partial_is_activated)
    
    
    # Set properties we want to set in our new schedulers
    def _set_default_values_to_scheduler_entry(self, entry):
        # IMPORTANT: mut be LOCAL, so each scheduler have their own {} and []
        default_scheduler_properties = {
            'con'               : None,
            'type'              : 'scheduler',
            'thread'            : None,
            'daemon_incarnation': {},
            'active'            : True,  # IMPORTANT: force active in this element to allow connection to it
            'last_proxy_sync'   : 0,
            'last_connection'   : 0,
        }
        
        entry.update(default_scheduler_properties)
    
    
    def _set_daemon_id_of_scheduler(self, daemon, daemon_id):
        daemon['id'] = daemon_id
    
    
    def _load_poller_or_reactionner_from_configuration(self, daemon, daemon_id, override_conf, _type, into, new_list, _logger):
        # Thread that manage this connection
        thread = None
        
        # replacing poller address and port by those defined in satellite map
        self._update_daemon_addr_port_with_satellitemap(daemon, override_conf['satellitemap'], _logger)
        daemon['uri'] = self._get_daemon_uri(daemon)
        
        # Must look if we already have it
        already_got = daemon_id in into
        if already_got:
            thread = into[daemon_id]['thread']
        
        daemon['instance_id'] = 0  # No use so all to 0
        daemon['daemon_incarnation'] = {}  # no use
        daemon['last_connection'] = 0
        daemon['thread'] = thread
        daemon['con'] = None
        daemon['id'] = daemon_id
        daemon['type'] = _type
        if not already_got:
            new_list.append((daemon['name'], daemon['uri']))
        into[daemon_id] = daemon
    
    
    # set up a new conf, byt beware of global lock management.
    # Note: don't do locking thing here, as we have the satellite lock!
    def setup_new_conf(self):
        # Note: we will dump threads if the configuration is not finish to be loaded
        # * after 5 minutes
        # * dump every minute
        with WatchDogThreadDumper('CONFIGURATION LOADING', 5 * 60, 60):
            with self.satellite_lock:
                self.really_setup_new_conf()
    
    
    def really_setup_new_conf(self):
        t0 = time.time()
        
        _logger = self._print_new_update_conf_received()
        # If the configuration was giving us a new configuration incarnation, show it
        self.print_configuration_incarnation_log_entry_if_need(_logger)
        
        # first: warn the scheduler object that a configuration is being parsed/loaded
        # so maybe there are structures that must be avoided during this (like rogue satellites)
        self.sched.warn_about_a_new_configuration_load_in_progress()
        
        new_conf = self.new_conf
        conf_raw = new_conf['conf']
        configuration_incarnation = new_conf.get('configuration_incarnation', None)
        applied_conf = {'activated': new_conf['activated'], 'instance_name': new_conf['instance_name'], 'configuration_incarnation': configuration_incarnation}
        if 'shard_id' in new_conf:
            applied_conf.update({'shard_id': new_conf['shard_id']})
        override_conf = new_conf['override_conf']
        modules = new_conf['modules']
        satellites = new_conf['satellites']
        instance_name = new_conf['instance_name']
        skip_initial_broks = new_conf['skip_initial_broks']
        # NOTE: scheduler__export_data__enabled was added in a fix, so maybe the arbiter is not up-to-date
        self.scheduler__export_data__enabled = new_conf.get('scheduler__export_data__enabled', True)
        realm = new_conf['realm']
        _logger.info('Configuration %s is being parsed and loaded' % configuration_incarnation)
        
        self._set_spare(new_conf.get('spare', True), _logger)
        self._set_is_activated(new_conf.get('activated', True), _logger)
        
        del new_conf
        self.new_conf = None
        if not self.is_activated():
            self.go_to_idle_mode(instance_name, modules, realm)
            return
        
        conf_from_arbiter = SafeUnpickler.loads(conf_raw, 'Configuration received')
        del conf_raw
        
        _logger.debug('Configuration received at [%d]. Deserialized in [%.2f] secs' % (t0, time.time() - t0))
        
        # Wait for retention to be saved before changing conf !! (Could be optimized by "wait for forked process to be started")
        self.sched.wait_for_retention_update_thread()
        
        # Write back the instance_name into the configuration file, so we can find it with
        # local tools
        self.save_daemon_name_into_configuration_file(instance_name)
        self.update_daemon_name(instance_name)
        
        # Tag the conf with our data
        self.conf = conf_from_arbiter
        self.cur_conf = self.conf
        
        del conf_from_arbiter
        
        self.conf.instance_name = instance_name
        self.conf.skip_initial_broks = skip_initial_broks
        
        # maybe the arbiter is not up-to-date, and we don't want to crash the scheduler for this because
        # old arbiter did not send this
        self.conf.default_properties_values = getattr(self.conf, 'default_properties_values', {})
        
        # NOTE: daemon__export_data__password was added in a fix, so maybe the arbiter is not up-to-date
        self.daemon__export_data__password = getattr(self.conf, 'daemon__export_data__password', '')
        
        self.realm = realm
        
        self.override_conf = override_conf
        self.modules = modules
        self.satellites = satellites
        
        # Set the enable/disable for human log format
        logger.set_human_format(on=self.conf.human_timestamp_log)
        
        new_schedulers = []  # for logging
        deleted_schedulers = []
        new_pollers = []
        deleted_pollers = []
        new_reactionners = []
        deleted_reactionners = []
        
        # We need to clean the satellites we do not have, but do not void the one we want to keep, as they
        # have threads and connection we want to keep
        for (type_, our_links_dict, delete_list) in [('pollers', self.pollers, deleted_pollers), ('reactionners', self.reactionners, deleted_reactionners)]:
            new_satellites_ids = set(satellites[type_].keys())  # get the new ids
            old_satellites_ids = set(our_links_dict.keys())  # the current ids we have
            must_be_deleted = old_satellites_ids - new_satellites_ids
            for must_be_deleted_id in must_be_deleted:
                entry = our_links_dict[must_be_deleted_id]
                logger.info('Removing the satellite (%s:%s) %s as we do not need it anymore.' % (type_, must_be_deleted_id, entry['name']))
                delete_list.append((entry['name'], entry['uri']))
                del our_links_dict[must_be_deleted_id]
        
        # Refresh the old proxy items with the new ones. Here we need to :
        # * create if missing
        # * update some fields if already present
        # * remove if missing
        proxyitemsmgr.refresh_items(self.conf.item_proxies)
        
        proxyitemsgraph.reset_from_other(self.conf.proxy_items_graph)
        
        if self.conf.human_timestamp_log:
            logger.set_human_format()
        
        # Now for pollers
        for (daemon_id, daemon) in satellites['pollers'].items():
            self._load_poller_or_reactionner_from_configuration(daemon, daemon_id, override_conf, 'poller', self.pollers, new_pollers, _logger)
            continue
        
        # Now reactionners
        for (daemon_id, daemon) in satellites['reactionners'].items():
            self._load_poller_or_reactionner_from_configuration(daemon, daemon_id, override_conf, 'reactionner', self.reactionners, new_reactionners, _logger)
            continue
        
        # Now others schedulers
        for (daemon_id, daemon) in satellites['schedulers'].items():
            # Here for schedulers we do NOT want to log the "id" as it's the scheduler .cfg id, and not the shard id
            self._set_or_update_scheduler_from_configuration(daemon, daemon_id, override_conf, new_schedulers, deleted_schedulers, _logger, do_stack_daemon_id=False)
            continue
        
        # We can now show new/deleted elements
        self._print_new_and_deleted_daemons(new_schedulers=new_schedulers, deleted_schedulers=deleted_schedulers,
                                            new_pollers=new_pollers, deleted_pollers=deleted_pollers,
                                            new_reactionners=new_reactionners, deleted_reactionners=deleted_reactionners,
                                            _logger=_logger)
        
        # First mix conf and override_conf to have our definitive conf
        for prop in self.override_conf:
            # print "Overriding the property %s with value %s" % (prop, self.override_conf[prop])
            val = self.override_conf[prop]
            setattr(self.conf, prop, val)
        
        self.set_tz(self.conf.use_timezone)
        
        _logger.debug('Receiving modules:[%s]' % (','.join([m.get_name() for m in self.modules])))
        self.modules_manager.update_modules(self.modules)
        
        _logger.debug("Loading configuration realm[%s] instance_name[%s] total_number_of_item_in_the_realm[%d]." % (realm, instance_name, len(self.conf.item_proxies)))
        self.conf.explode_global_conf()
        
        # Creating the Macroresolver Class & unique instance
        self.macro_resolver = MacroResolver()
        self.macro_resolver.init(self.conf)
        
        self.sched.load_configuration_from_arbiter(self.conf, self.pollers, self.reactionners, configuration_incarnation)
        
        # Start threads if needed, not a problem as starting thread is cheap and not timeout prone
        self.assert_valid_satellite_threads()
        
        # Ok we have finished loading the configuration, we can switch to answer to the Arbiter with our real
        # configuration_id and shard_id, instead of the pending one
        if self.new_conf is None:
            self._pending_shard_id = None
            self._have_pending_configuration = False
            self.have_configuration = True
        
        # The configuration load is done, let the scheduler know it, so it can be sure about all data structures
        self.sched.warn_about_the_end_of_the_configuration_load()
        
        # #SEF-9780 avoid mismatch between freshly applied conf and conf identifiers
        self.after_scheduler_configuration_load(applied_conf)
        
        # Be sure that we did clean all the memory from here
        force_memory_trimming(context='because the Scheduler loaded a new configuration')
    
    
    def go_to_idle_mode(self, instance_name: 'str|None' = None, modules: 'Modules|None' = None, realm: 'str|None' = None):
        self.sched.die()
        self.sched.wait_main_loop_has_exited()
        self.sched.wait_for_retention_update_thread()
        self.modules_manager.go_to_idle_mode(modules)
        with self.satellite_lock:
            self._clean_known_daemons()
            if not instance_name:
                instance_name = self.name
            else:
                self.update_daemon_name(instance_name)
            # Let the scheduler clean it and know it's a spare
            self.sched.set_as_inactive(instance_name, self.configuration_incarnation, self.spare)
            self.conf = None
            self.cur_conf = None
            self.daemon__export_data__password = ''
            self.override_conf = None
            proxyitemsmgr.refresh_items([])
            proxyitemsgraph.reset()
            self.modules = None
            if realm:
                self.realm = realm
            self.have_configuration = True
            self._have_pending_configuration = False
            self.sched.warn_about_the_end_of_the_configuration_load()
    
    
    # Check that the distant poller/reactionner can accept connections
    # Also check if I am known by that daemon
    def ping_and_check_distant_daemon(self, sat_entry):
        if not super(Shinken, self).ping_and_check_distant_daemon(sat_entry) or not self.conf:
            return False
        
        if sat_entry['type'] in ('reactionner', 'poller'):
            con = sat_entry['con']
            is_scheduler_known = con.get('is_scheduler_known', {'sched_id': self.conf.instance_id})
            return is_scheduler_known == "True"
        
        return True
    
    
    # Give the arbiter the data about what I manage
    # for me, it's just my instance_id and my push flavor
    def get_currently_managed_configuration(self):
        # If the arbiter did give us a pending configuration, give back information about it, so he knows that we did receive and will load it
        if self._have_pending_configuration:
            logger.debug('get_currently_managed_configuration:: GIVE EARLY CONF INFO %s' % self.partial_configuration_incarnation)
            return self.partial_configuration_incarnation.dump_as_json()
        
        if not self.already_have_conf:
            return {}
        
        if not hasattr(self, 'conf'):
            return {}
        
        # Maybe we are currently reloading it, and the arbiter asked us to wait a new conf
        if self.partial_configuration_incarnation is None:
            return {}
        
        return self.partial_configuration_incarnation.dump_as_json()
    
    
    # Get the good tabs for links by the kind. If unknown, return None
    # The scheduler need to connect to other schedulers, pollers and reactionner
    # only, and arbiter is need to keep the arbiter trace
    def get_link_from_type(self, daemon_type, daemon_id):
        t = {
            'scheduler'  : self.schedulers,
            'arbiter'    : self.arbiters,
            'poller'     : self.pollers,
            'reactionner': self.reactionners,
        }
        with self.satellite_lock:
            return t.get(daemon_type, {}).get(daemon_id, None)
    
    
    # On the scheduler we want to connect (ping+job) only for passive poller/reactionners
    def should_connect_to_distant_satellite(self, satellite_type, distant_link):
        if not self.is_activated():
            return False
        if satellite_type == 'poller' or satellite_type == 'reactionner':
            is_passive = distant_link['passive']
            return is_passive
        # For scheduler, we only connect to OTHER schedulers. We can identify by names as they are uniq by the arbiter
        elif satellite_type == 'scheduler':
            # Maybe we are not ready, so don't connect currently
            if not hasattr(self, 'sched'):
                return False
            is_another_scheduler = self.sched.instance_name != distant_link['name']
            return is_another_scheduler
        else:
            raise Exception('Error: the satellite type %s is not managed by this daemon. Cannot connect to it.' % satellite_type)
    
    
    # jobs to do in http distant thread
    def get_jobs_from_distant(self, distant_link):
        if not hasattr(self, 'sched'):
            return
        
        # We handle only poller & reactionner passif
        # If the connection is invalid the scheduler daemon will try to reconnect
        if distant_link['type'] in ('poller', 'reactionner') and distant_link.get('passive', False) and distant_link['con'] is not None:
            self.sched.push_actions_to_passives_satellites(distant_link)
            self.sched.get_actions_from_passives_satellites(distant_link)
        
        if distant_link['type'] == 'scheduler' and self.sched.instance_name != distant_link['name']:
            self.get_new_proxy_states(distant_link)
    
    
    # From others schedulers we update our states
    def get_new_proxy_states(self, sat_entry):
        # logger.debug('get_new_proxy_states:: %s' % sat_entry)
        # We check for new check in each scheduler and put
        # the result in new_checks
        sat_type = 'scheduler'
        last_proxy_sync = sat_entry['last_proxy_sync']
        try:
            con = sat_entry['con']
            if con is not None:  # None = not initialized
                t0 = time.time()
                # Before ask a call that can be long, do a simple ping to be sure it is alive
                # logger.debug('get_new_proxy_states:: ping %s' % sat_entry)
                con.get('ping')
                tmp_states = con.get('get_proxy_states', {'since': last_proxy_sync}, wait='long')
                try:
                    _t = base64.b64decode(tmp_states)
                    _t = zlib.decompress(_t)
                    tmp_result = SafeUnpickler.loads(_t, 'Proxy exchange between schedulers')
                except (TypeError, zlib.error, pickle.PickleError) as exp:
                    logger.error('Cannot load proxy states data from %s : %s' % (sat_entry['name'], exp))
                    try:
                        sat_entry['con'].con.close()
                    except:
                        pass
                    sat_entry['con'] = None
                    return
                states = tmp_result['states']
                diff_since = tmp_result['diff_since']
                
                if len(states) != 0:
                    logger.debug("[Scheduler] [%s] Proxy states get in [%.3f]s from [%s], start at time [ %s ]" % (len(states), time.time() - t0, sat_entry['name'], last_proxy_sync))
                
                # Ok, we can add theses broks to our queues
                proxyitemsmgr.update_from_other_states(states)
                # The time we get is from the other node, because we are not sure our and their time
                # are sync
                sat_entry['last_proxy_sync'] = diff_since
            
            else:  # no con? make the connection
                logger.debug('get_new_proxy_states:: go to connecting to %s' % sat_entry)
                self.pynag_con_init(sat_entry)
        # Ok, con is not known, so we create it
        except KeyError as exp:
            logger.debug("Key error for get_proxy_states : %s" % str(exp))
            self.pynag_con_init(sat_entry)
        except HTTPExceptions as exp:
            logger.warning("Connection problem to the %s %s: %s" % (sat_type, sat_entry['name'], str(exp)))
            try:
                sat_entry['con'].con.close()
            except:
                pass
            sat_entry['con'] = None
        # scheduler must not #be initialized
        except AttributeError as exp:
            logger.warning("The %s %s should not be initialized: %s" % (sat_type, sat_entry['name'], str(exp)))
        # scheduler must not have checks
        #  What the F**k? We do not know what happened,
        # so.. bye bye :)
        except Exception as x:
            logger.error(str(x))
            logger.error(traceback.format_exc())
            import sys
            sys.exit(1)
    
    
    # For a new distant scheduler, we should reset it's last_proxy_sync, so we will ask ALL proxy and have an up-to-date
    # view of our clusters
    def _manage_new_distant_daemon_incarnation(self, entry, old_incarnation, new_incarnation):
        if entry['type'] == 'scheduler':
            entry['last_proxy_sync'] = 0
            old_incarnation_uuid = old_incarnation.get('configuration_incarnation_uuid', '-no incarnation-')
            new_incarnation_uuid = new_incarnation.get('configuration_incarnation_uuid', '-no incarnation-')
            logger.info('The scheduler %s did change incarnation ( %s => %s ) so we will ask for all proxy states in the next turn.' % (entry['name'], old_incarnation_uuid, new_incarnation_uuid))
    
    
    # our main function, launch after the init
    def main(self):
        try:
            self.daily_log_version()
            self.load_config_file()
            self.look_for_early_exit()
            self.do_daemon_init_and_start()
            self.load_modules_manager()
            
            self.uri = self.http_daemon.uri
            
            if '0.0.0.0' in self.uri:
                logger.info('[schedulerdeamon] The daemon listens on all network interfaces.')
            else:
                logger.info('[schedulerdeamon] The daemon listens on the %s network interface.' % self.uri[7:])
            
            self.do_mainloop()
        except Exception:
            logger.critical('The daemon did have an unrecoverable error. It must exit.')
            logger.critical('You can log a bug to your Shinken integrator with the error message:')
            logger.print_stack(level=logging.CRITICAL)
            raise
