#!/usr/bin/python
# -*- coding: utf-8 -*-

# Copyright (C) 2009-2012:
#    Gabes Jean, naparuba@gmail.com
#    Gerhard Lausser, Gerhard.Lausser@consol.de
#    Gregory Starck, g.starck@gmail.com
#    Hartmut Goebel, h.goebel@goebel-consult.de
#
# This file is part of Shinken.
#
# Shinken is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Shinken is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with Shinken.  If not, see <http://www.gnu.org/licenses/>.

import itertools
import json
import logging
import os
import pickle
import shutil
import socket
import sys
import threading
import time
import traceback
from multiprocessing import Process
from queue import Empty

import shinken.misc.configuration_error_log as arbiter_configuration_messages
from shinken.brok import Brok, PersistantBrok
from shinken.component.arbiter.monitoring_start_time_component import MonitoringStartTimeComponent
from shinken.configuration_incarnation import ConfigurationIncarnation
from shinken.daemon import Daemon, Interface, IStatsInterface
from shinken.dispatcher import Dispatcher
from shinken.external_command import ExternalCommand
from shinken.external_command import ExternalCommandManager
from shinken.inter_daemon_message import InterDaemonMessage
from shinken.log import logger, get_chapter_string, LoggerFactory
from shinken.misc.type_hint import TYPE_CHECKING
from shinken.objects.config import Config, OLD_ARBITER_RETENTION_JSON, REALM_RETENTION_JSON
from shinken.runtime_stats.cpu_stats import cpu_stats_helper
from shinken.runtime_stats.threads_dumper import WatchDogThreadDumper
from shinken.safepickle import SafeUnpickler
from shinken.util import jsonify_r, set_process_name
from shinkensolutions.arbiter_configuration_messages import start_write_arbiter_messages, flush_arbiter_message_buffer, COMMON_FILE_NAME, end_write_arbiter_messages, write_final_json
from shinkensolutions.system_tools import set_ownership

if TYPE_CHECKING:
    from shinken.misc.type_hint import Optional, Dict, Any, List
    from shinken.objects.realm import Realm
    from shinken.satellitelink import SatelliteLink

raw_logger = LoggerFactory.get_logger()
logger_perf = raw_logger.get_sub_part('PERF', part_name_size=4)
logger_configuration = raw_logger.get_sub_part('CONFIGURATION')
logger_latest_monitoring_configuration = logger_configuration.get_sub_part('LATEST-MONITORING-CONFIGURATION')
logger_wait_for_master_death = raw_logger.get_sub_part('WAIT-FOR-MASTER-DEATH')

_PATH_ALIVE_DAEMONS = '/var/lib/shinken/alive_daemons_retention.json'
_PATH_LAST_SPARE_CONF = '/var/lib/shinken/arbiter_spare_last_conf.dat'

CHAPTER_CONFIGURATION = get_chapter_string('CONFIGURATION')


# Interface for the other Arbiter
# It connects, and together we decide who's the Master and who's the Slave, etc.
# Here is also a function to get a new conf from the master
class IForArbiter(Interface):
    app: 'Arbiter'
    
    
    def get_daemon_infos(self):
        to_return = {
            'arbiter'   : {
                'activated'  : self.app.must_run,
                'spare'      : self.app.me.spare,
                'version'    : self.get_context()['current_version'],
                'api_version': Interface.RAW_STATS_API_VERSION
            },
            'satellites': []
        }
        if self.app.dispatcher:
            all_other_daemons = self.app.dispatcher.get_all_other_daemons()
            
            for e in all_other_daemons:
                elt = {
                    'diff_time_with_arbiter': e.diff_time_with_arbiter,
                    'display_name'          : e.get_name(),
                    'alive'                 : e.alive,
                    'reachable'             : e.reachable,
                    'type'                  : e.my_type,
                    'daemon_version'        : e.daemon_version,
                }
                to_return['satellites'].append(elt)
        return to_return
    
    
    get_daemon_infos.doc = "read the sources"
    get_daemon_infos.need_lock = False
    
    
    # DISABLED
    def have_conf(self):
        return False
    
    
    have_conf.doc = 'DISABLED'
    have_conf.need_lock = False
    
    doc = 'Put a new configuration to the daemon'
    
    
    # The master Arbiter is sending us a new conf in a pickle way. Ok, we take it
    def put_conf(self, conf):
        if not self.app.accept_configuration():
            logger_configuration.error('Another Arbiter try to send us a configuration, but we are a MASTER so we are refusing it.')
            return
        super(IForArbiter, self).put_conf(conf)
        self.app.arbiter_master_send_us_a_conf()
    
    
    put_conf.method = 'POST'
    put_conf.doc = doc
    put_conf.display_name = 'Configuration reception from an Arbiter server'
    
    doc = 'Get the managed configuration (internal)'
    
    
    def get_config(self):
        return self.app.conf
    
    
    get_config.doc = doc
    get_config.need_lock = False
    
    doc = 'Ask the daemon to do not run'
    
    
    # The master arbiter asks me not to run!
    def do_not_run(self):
        self.app.do_not_run()
    
    
    do_not_run.need_lock = False
    do_not_run.doc = doc
    
    
    def wait_new_conf(self):
        super(IForArbiter, self).wait_new_conf()
        self.app.must_run = False
        self.app.conf = None
        self.app.cur_conf = None
        
        logger.debug('Received wait_new_conf')
        try:
            os.remove(_PATH_LAST_SPARE_CONF)
        except Exception:
            pass
    
    
    wait_new_conf.need_lock = False
    wait_new_conf.doc = doc
    
    doc = 'Get the satellite names sort by type'
    
    
    # Here a function called by check_shinken to get daemons list
    def get_satellite_list(self, daemon_type=''):
        res = {}
        for t in ['arbiter', 'scheduler', 'poller', 'reactionner', 'receiver', 'broker']:
            if daemon_type and daemon_type != t:
                continue
            satellite_list = []
            res[t] = satellite_list
            daemon_name_attr = t + "_name"
            daemons = self.app.get_daemons(t)
            for dae in daemons:
                if hasattr(dae, daemon_name_attr):
                    satellite_list.append(getattr(dae, daemon_name_attr))
        return res
    
    
    get_satellite_list.doc = doc
    get_satellite_list.need_lock = False
    
    doc = 'Dummy call for the arbiter'
    
    
    # Dummy call. We are the master, we manage what we want
    def what_i_managed(self):
        return {}
    
    
    what_i_managed.need_lock = False
    what_i_managed.doc = doc
    
    doc = 'Return all the data of the satellites'
    
    
    # Arbiter ask me which shard I do manage
    def get_currently_managed_configuration(self):
        managed_configuration = self.app.get_currently_managed_configuration()
        logger.debug("%s Anoter arbiter asked me what I manage. It's %s" % (CHAPTER_CONFIGURATION, managed_configuration))
        return managed_configuration
    
    
    get_currently_managed_configuration.need_lock = False
    get_currently_managed_configuration.doc = 'Return the managed configuration ids (internal)'
    
    
    # We will try to export all data from our satellites, but only the json-able fields
    def get_all_states(self):
        res = {}
        for t in ['arbiter', 'scheduler', 'poller', 'reactionner', 'receiver',
                  'broker']:
            lst = []
            res[t] = lst
            for d in getattr(self.app.conf, t + 's'):
                cls = d.__class__
                e = {}
                ds = [cls.properties, cls.running_properties]
                for _d in ds:
                    for prop in _d:
                        if hasattr(d, prop):
                            v = getattr(d, prop)
                            # give a try to a json able object
                            try:
                                json.dumps(v)
                                e[prop] = v
                            except Exception as exp:
                                print(exp)
                    lst.append(e)
        
        return lst
    
    
    get_all_states.doc = doc
    get_all_states.need_lock = False
    
    # Try to give some properties of our objects
    doc = 'Dump all objects of the type in [hosts, services, contacts, commands, hostgroups, servicegroups]'
    
    
    def get_objects_properties(self, table, fields=None):
        logger.debug('get_objects_properties for table:[%s] and fields:[%s]' % (str(table), fields))
        objs = getattr(self.app.conf, table, None)
        if not objs:
            return []
        res = []
        
        if fields is not None:
            fields = fields.split(',')
        for obj in objs:
            if fields:
                dst_obj = {}
                for field in fields:
                    dst_obj[field] = getattr(obj, field, None)
            else:
                dst_obj = obj
            res.append(jsonify_r(dst_obj))
        return res
    
    
    get_objects_properties.doc = doc
    get_objects_properties.need_lock = False
    
    
    # For the healthcheck, we can give info about who is master/spare about others
    def get_satellite_spare_info(self, daemon_type, daemon_name):
        logger.debug('get_satellite_spare_daemon:: asking for %s / %s' % (daemon_type, daemon_name))
        lst = getattr(self.app.conf, daemon_type + 's')
        daemon = lst.find_by_name(daemon_name)
        r = {'spare': {'founded': False}, 'master': {'founded': False}}
        if daemon is None:
            logger.info('HTTP API: get_satellite_spare_daemon:: asking for %s / %s => found nothing' % (daemon_type, daemon_name))
            return r
        
        if hasattr(daemon, 'spare_daemon'):
            r['spare']['founded'] = True
            spare_daemon = getattr(daemon, 'spare_daemon', None)
            r['spare']['daemon'] = '' if spare_daemon is None else spare_daemon.get_name()
            r['spare']['require_same_modules'] = getattr(daemon, '%s__manage_spare__spare_must_have_the_same_list_of_module_type' % daemon_type, '1')
        
        if hasattr(daemon, 'master_daemon'):
            r['master']['founded'] = True
            master_daemon = getattr(daemon, 'master_daemon', None)
            r['master']['daemon'] = '' if master_daemon is None else master_daemon.get_name()
        
        logger.debug('get_satellite_spare_daemon:: asking for %s / %s => found %s' % (daemon_type, daemon_name, r))
        return r
    
    
    get_satellite_spare_info.doc = 'Give for a daemon its spare_daemon'
    get_satellite_spare_info.need_lock = False


class IStats(IStatsInterface):
    app: 'Arbiter'
    
    
    def get_raw_stats(self, param='', module=''):
        # type: (str, str) -> Dict[str, Any]
        return super(IStats, self).get_raw_stats(param=param, module=module)
    
    
    get_raw_stats.doc = 'get stats of the daemon'
    get_raw_stats.need_lock = False
    
    
    def _daemon_get_raw_stats(self, param: str = '', module_wanted: 'list[str]|None' = None) -> 'dict[str, Any]':
        if module_wanted is None:
            module_wanted = []
        raw_stats = {}
        raw_stats.update({
            'http_errors_count': self.app.http_errors_count,
            'have_conf'        : self.app.is_master or self.app.cur_conf is not None,
            'activated'        : self.app.must_run,
            'api_version'      : Interface.RAW_STATS_API_VERSION,
            'arbiter_version'  : self.get_context()['current_version'],
            'spare'            : self.app.me.spare,
        })
        return raw_stats


# Main Arbiter Class
class Arbiter(Daemon):
    def __init__(self, config_files, is_daemon, do_replace, verify_only, debug, debug_file, profile=None, analyse=None, migrate=None, arb_name='', daemon_id=0):
        arbiter_configuration_messages.enable_message_arbiter()
        start_write_arbiter_messages(COMMON_FILE_NAME)
        super(Arbiter, self).__init__('arbiter', config_files[0], is_daemon, do_replace, debug, debug_file, daemon_id)
        
        # Keep only the cfg files, not the ini
        self.config_files = [c for c in config_files if c.endswith('.cfg')]
        
        # The ini specific configuration file, should be only one file
        self.config_file = None
        self.server_uuid = self._get_server_uuid()
        
        for c in config_files:
            if c.endswith('.ini'):
                self.config_file = c
        
        self.verify_only = verify_only
        self.analyse = analyse
        self.migrate = migrate
        self.arb_name = arb_name
        
        self.broks = {}
        self.persistant_broks = {}
        
        self.is_master = False
        self.dispatcher = None  # type: Optional[Dispatcher]
        self.me = None
        
        self.last_run_alive_daemons = {}
        self.nb_broks_send = 0
        
        # Now tab for external_commands
        self.external_commands = []
        
        self.messages_from_satellites_lock = threading.RLock()
        self.messages_from_satellites = []  # type: List[InterDaemonMessage]
        self.messages_to_satellites_lock = threading.RLock()
        self.messages_to_satellites = []  # type: List[InterDaemonMessage]
        
        # Used to work out if we must still be alive or not
        self.must_run = True
        self._is_retention_load_already_try = False  # Only load retention one time
        
        self.interface = IForArbiter(self)
        self._add_http_interface(IStats(self))
        self._add_http_interface(self.interface)
        
        self.conf = Config()
        self.trace = None
        
        self.external_command = None
        
        self.last_master_speak = None
        
        # We are creating the configuration incarnation that will be given
        # with ALL the configuration, but currently we just don't know
        # our own name, so we will give it later
        self.configuration_incarnation = ConfigurationIncarnation()
        
        # Architecture name: name of the Shinken Enterprise installation
        # across all the nodes, used to detect several shinken enteprise
        # installations that are crossed, like testing giving conf to a prod node
        self._architecture_name = None
        self.monitoring_start_time_component = None
    
    
    def _get_environment_variables_to_log(self) -> dict[str, tuple[str, ...]]:
        env_vars = super(Arbiter, self)._get_environment_variables_to_log()
        env_vars['Arbiter configuration'] = (
            'CHECK_PROPOSE',
            'CHECK_STAGGING',
            'CHECK_PREPROD',
            'FORBIDDEN_TYPES',
            'ARBITER_CONFIGURATION_MESSAGES_UUID',
        )
        return env_vars
    
    
    # Currently, the architecture name
    def _get_and_set_architecture_name(self):
        # Now we ask for configuration modules if they
        # got items for us
        for inst in self.modules_manager.get_all_instances():
            if not hasattr(inst, 'get_architecture_name'):
                continue
            architecture_name = inst.get_architecture_name()
            # If already set, must be the same
            if self._architecture_name is not None and architecture_name != self._architecture_name:
                logger.error('Your have more than one architecture name defined in your modules, and they do not match: %s != %s' % (self._architecture_name, architecture_name))
                sys.exit(2)
            self._architecture_name = architecture_name
            logger_configuration.info('Setting the architecture name "%s" from the module "%s"' % (self._architecture_name, inst.get_name()))
        # maybe there was no module with architecture_name, so set shinken-HOSTNAME
        if self._architecture_name is None:
            self._architecture_name = 'shinken-%s' % socket.gethostname()
            logger_configuration.info('No architecture name was found in our modules, using the default value based on the server name: "%s" ' % self._architecture_name)
    
    
    def get_satellite_connections(self):
        if self.dispatcher:
            return self.dispatcher.get_satellite_connections()
        return ()
    
    
    def _get_server_uuid(self):
        ##
        # The 'server_uuid_file' contain the value of the file
        # "/sys/class/dmi/id/product_uuid" unique across machines AND machines
        # clones.
        # It is created and updated if required (clone) by the startup
        # script /etc/init.d/shinken...
        server_uuid_file = '/var/lib/shinken/server.uuid'
        
        # ... no try/catch/with, the file MUST exist.
        with open(server_uuid_file, 'r') as f:
            self.server_uuid = f.read().strip()
        return self.server_uuid
    
    
    # Generate the trace sent to my managed satellites to identify  myself.
    def _generate_and_link_arbiter_trace(self):
        # Create our trace structure. 'expire_period' is used by daemon.py to expire and forget an old arbiter trace.
        proto = 'https' if self.me.use_ssl else 'http'
        uri = '%s://%s:%s/' % (proto, self.me.address, self.me.port)
        self.trace = {
            'master_arbiter_uuid': self.conf.master_arbiter_uuid,
            'expire_period'      : self.me.check_interval * self.me.max_check_attempts,  # default value, will be changed for every daemon
            'check_interval'     : self.me.check_interval,  # only for daemon with a version < 02.07.05-Patched-05 (02.07.06) do not use this value anymore, use expire_period instead
            'spare'              : self.me.spare,
            'name'               : self.me.arbiter_name,
            'uri'                : uri,
            'identifier'         : self.server_uuid,
            'version'            : self.interface.get_context()['current_version'],
            'architecture_name'  : self._architecture_name,
        }
        conf = self.conf
        for lst in (conf.schedulers, conf.brokers, conf.pollers, conf.reactionners, conf.receivers, conf.arbiters):
            for satellite_link in lst:
                satellite_link.set_arbiter_trace(self.trace)
    
    
    # Use for adding things like broks
    def add(self, b):
        if isinstance(b, PersistantBrok):
            self.persistant_broks[b.id] = b
        elif isinstance(b, Brok):
            self.broks[b.id] = b
        elif isinstance(b, ExternalCommand):
            self.external_commands.append(b)
        else:
            logger.warning('Cannot manage object type %s (%s)' % (type(b), b))
    
    
    def add_Brok(self, b):
        self.broks[b.id] = b
    
    
    # We must push our broks to the broker because it's stupid to make a crossing connection,
    # so we find the broker responsible for our broks, and we send it to him
    def push_broks_to_broker(self):
        for brk in self.conf.brokers:
            # Send only if alive of course
            if brk.alive and brk.reachable:
                is_send = brk.push_broks(self.broks)
                if is_send:
                    # They are gone, we keep none!
                    self.broks.clear()
                # now push the persistant broks and don't delete them
                brk.push_broks(self.persistant_broks)
    
    
    # We must take external_commands from all satellites like brokers, pollers, reactionners or receivers
    def get_external_commands_from_satellites(self):
        sat_lists = [self.conf.brokers, self.conf.receivers, self.conf.pollers, self.conf.reactionners]
        for lst in sat_lists:
            for sat in lst:
                # Get only if alive of course
                if sat.alive:
                    new_cmds = sat.get_external_commands()
                    for new_cmd in new_cmds:
                        self.external_commands.append(new_cmd)
    
    
    def get_messages_from_satellites(self):
        # type: () -> None
        logger_message = LoggerFactory.get_logger('MANAGE INTER DAEMON MESSAGE')
        for satellite in itertools.chain(self.conf.brokers, self.conf.receivers, self.conf.pollers, self.conf.reactionners):
            # # Get only if alive of course
            # if not satellite.alive:
            #     continue
            
            t0 = time.time()
            messages = satellite.get_messages()
            with self.messages_from_satellites_lock:
                self.messages_from_satellites.extend(messages)
            
            if messages:
                logger_message.info('Got 〖%s〗 new messages from 〖%s〗 in 〖%s〗' % (len(messages), satellite.get_name(), logger_message.format_chrono(t0)))
                if logger_message.is_debug():
                    for m in messages:
                        logger_message.debug('Got message 〖%s〗' % (m.get_message_info()))
        self._process_received_messages_from_satellites()
    
    
    def _process_received_messages_from_satellites(self):
        # type: () -> None
        
        logger_message = LoggerFactory.get_logger('MANAGE INTER DAEMON MESSAGE')
        
        with self.messages_from_satellites_lock:
            if not self.messages_from_satellites:
                return
            messages_from_satellites = self.messages_from_satellites
            self.messages_from_satellites = []
        
        nb_message_inter_daemon = len(messages_from_satellites)
        process_messages = []
        for inter_daemon_message in messages_from_satellites:
            if inter_daemon_message.message_to.daemon_name != self.name:
                continue
            if inter_daemon_message.message_to.module_name == '':
                # Message is for our daemon, not its modules
                self.handle_received_inter_daemon_message_for_arbiter(inter_daemon_message)
                process_messages.append(inter_daemon_message)
                continue
            
            # OK : it's a message for one type of module, we give it to all the modules concerned
            for module in self.modules_manager.get_all_alive_instances():
                if inter_daemon_message.message_to.module_name == module.name:
                    module.handle_messages_received_from_arbiter(inter_daemon_message)
                    process_messages.append(inter_daemon_message)
        
        for process_message in process_messages:
            logger_message.debug('Processed message from satellites:〖%s〗' % process_message.get_message_info())
            messages_from_satellites.remove(process_message)
        
        logger_message.info('Message from satellites:〖%s〗 processed:〖%s〗 remaining:〖%s〗' % (nb_message_inter_daemon, len(process_messages), len(messages_from_satellites)))
        if logger_message.is_debug():
            for remaining_message in messages_from_satellites:
                logger_message.debug('Remaining message from arbiter:〖%s〗' % remaining_message.get_message_info())
        
        with self.messages_from_satellites_lock:
            self.messages_from_satellites.extend(messages_from_satellites)
    
    
    def handle_received_inter_daemon_message_for_arbiter(self, inter_daemon_message):
        # type: (InterDaemonMessage) -> None
        
        if inter_daemon_message.message_type == 'ping':
            self.send_message_to_satellites(InterDaemonMessage('pong', message_to=inter_daemon_message.message_from, message_from=inter_daemon_message.message_to, data={}))
            return
        
        if inter_daemon_message.message_type == 'response_monitoring_start_time':
            self.monitoring_start_time_component.handle_response_monitoring_start_time(inter_daemon_message)
    
    
    def send_messages_to_satellites(self):
        # type: () -> None
        logger_message = LoggerFactory.get_logger('MANAGE INTER DAEMON MESSAGE')
        for satellite in itertools.chain(self.conf.brokers, self.conf.receivers, self.conf.pollers, self.conf.reactionners):  # type: SatelliteLink
            # Get only if alive of course
            if not satellite.alive or not satellite.is_activated():
                continue
            
            t0 = time.time()
            with self.messages_to_satellites_lock:
                messages_to_send = [m for m in self.messages_to_satellites if self._can_send_message(satellite, m)]
                for m in messages_to_send:
                    self.messages_to_satellites.remove(m)
            satellite.send_messages(messages_to_send)
            
            if messages_to_send:
                logger_message.info('Send 〖%s〗 messages to 〖%s〗 in 〖%s〗' % (len(messages_to_send), satellite.get_name(), logger_message.format_chrono(t0)))
                if logger_message.is_debug():
                    for m in messages_to_send:
                        logger_message.debug('Send messages 〖%s〗' % (m.get_message_info()))
    
    
    @staticmethod
    def _can_send_message(satellite, message):
        # type: (SatelliteLink, InterDaemonMessage) -> bool
        message_to = message.message_to
        return message_to.daemon_name == satellite.get_name() and (not message_to.module_name or message_to.module_name in [m.get_name() for m in satellite.modules])
    
    
    def send_message_to_satellites(self, inter_daemon_message):
        # type: (InterDaemonMessage) -> None
        with self.messages_to_satellites_lock:
            self.messages_to_satellites.append(inter_daemon_message)
    
    
    # Our links to satellites can raise broks. We must send them
    def get_broks_from_satellitelinks(self):
        tabs = [self.conf.brokers, self.conf.schedulers,
                self.conf.pollers, self.conf.reactionners,
                self.conf.receivers]
        for tab in tabs:
            for s in tab:
                new_broks = s.get_all_broks()
                for b in new_broks:
                    self.add(b)
    
    
    # Our links to satellites can raise broks. We must send them
    def get_initial_broks_from_satellitelinks(self):
        tabs = [self.conf.brokers, self.conf.schedulers,
                self.conf.pollers, self.conf.reactionners,
                self.conf.receivers]
        for tab in tabs:
            for s in tab:
                b = s.get_initial_status_brok()
                self.add(b)
    
    
    def get_daemon_links(self, daemon_type):
        # the attribute name to get these, differs for schedulers and arbiters
        return daemon_type + 's'
    
    
    # return a json containing a list of daemons enabled during the last conf
    def _load_last_run_alive_daemons(self):
        try:
            file_path = _PATH_ALIVE_DAEMONS
            if not os.path.exists(os.path.dirname(file_path)):
                os.makedirs(os.path.dirname(file_path))
            if os.path.isfile(file_path):
                with open(file_path, 'r') as _file:
                    self.last_run_alive_daemons = json.load(_file)
        except Exception as e:
            logger.warning("[%s] Previous alive daemons retention cannot be load from file [%s]. \n%s" % (self.daemon_type, _PATH_ALIVE_DAEMONS, e))
    
    
    def _save_alive_daemons(self):
        if self.conf:
            all_daemons = self.conf.get_all_daemons()
            daemons_to_write = {}
            # transform object reference into string
            _types_stats = {}
            for type_, daemons in all_daemons.items():
                if type_ == 'arbiters' and self.me in daemons:
                    daemons.remove(self.me)
                daemons_to_write[type_] = [d.give_satellite_cfg() for d in daemons]
                _types_stats[type_] = len(daemons_to_write[type_])
            try:
                file_path = _PATH_ALIVE_DAEMONS
                if not os.path.exists(os.path.dirname(file_path)):
                    os.makedirs(os.path.dirname(file_path))
                with open(file_path, 'w') as file:
                    json.dump(daemons_to_write, file)
                logger.info('Current daemons are saved in file "%s" for checking for deleted daemons in next run: ' % file_path)
                for (type_, nb_daemons) in _types_stats.items():
                    logger.info('   - %-15s : %d daemon(s)' % (type_, nb_daemons))
            except:
                logger.warning("[%s] Save http error stat in file [%s] fail : [%s]" % (self.daemon_type, _PATH_ALIVE_DAEMONS, traceback.format_exc()))
    
    
    @staticmethod
    def _finish_step(step_name, time_start):
        now = time.time()
        logger.info('[performance] Hosts - Checks - Clusters - Users compilation: step %-50s  [%.3fs]' % (step_name, now - time_start))
    
    
    def load_config_file(self):
        logger.info('Loading configuration')
        # REF: doc/shinken-conf-dispatching.png (1)
        buf = self.conf.read_config(self.config_files)
        raw_objects = self.conf.read_config_buf(buf)
        
        logger.debug('Opening local log file')
        
        if self.conf.bad_encoding_files:
            logger.error('[config] Some characters could not be read in utf-8 in these files :')
            for _file in self.conf.bad_encoding_files:
                logger.warning('[config] - %s' % _file)
        
        # First we need to get arbiters and modules,
        # so we can ask them for objects
        self.conf.create_objects_for_type(raw_objects, 'arbiter')
        self.conf.create_objects_for_type(raw_objects, 'module')
        
        self.conf.early_arbiter_linking()
        
        # Search which Arbiterlink I am
        enabled_arbiters = self.conf.arbiters.enabled()
        if len(enabled_arbiters) == 1:
            self.me = enabled_arbiters[0]
            self.is_master = not self.me.spare
            # Set myself as alive ;)
            self.me.alive = True
        else:
            for arb in enabled_arbiters:
                if arb.is_me(self.arb_name):
                    self.me = arb
                    self.is_master = not self.me.spare
                    # Set myself as alive ;)
                    self.me.alive = True
            
            if not self.me:
                sys.exit("Error: I cannot find my own Arbiter object, I bail out. "
                         "To solve this, please make sure that the 'enabled' parameter is set to 1, "
                         "or change the 'host_name' parameter in "
                         "the object %s in the Arbiter configuration file "
                         "with the value '%s'. "
                         "Thanks." % (self.arb_name if self.arb_name else "Arbiter", socket.gethostname()))
        
        # Now we are sure who we are, we should update our name for log & process name
        self.name = self.me.get_name()
        logger.set_name(self.name)
        
        if self.is_master:
            logger.info("I am the master Arbiter: %s" % self.name)
        else:
            logger.info("I am a spare Arbiter: %s" % self.name)
        
        logger.info("My own modules: " + ','.join([m.get_name() for m in self.me.modules]))
        
        # So I can update our name in the iniproperties file
        self.save_daemon_name_into_configuration_file(self.name)
        
        # We will need a real display name early in this daemon because we will launch external modules very soon
        self.compute_basic_display_name()
        
        self.modules_dir = getattr(self.conf, 'py3_modules_dir', '/opt/shinken/modules')
        
        before_modules = time.time()
        # Ok it's time to load the module manager now!
        self.load_modules_manager()
        # we request the instances without them being *started*
        # (for those that are concerned ("external" modules):
        # we will *start* these instances after we have been daemonized (if requested)
        self.modules_manager.set_modules(self.me.modules)
        
        logger.info('Loading modules for the arbiter')
        
        all_modules_are_started = self.do_load_modules()
        if not all_modules_are_started:
            logger.error('All modules are mandatory on the arbiter daemon. Please fix errors and restart the arbiter daemon.')
            sys.exit(2)
        
        logger.info('[performance] Modules initialization did took %.3fs' % (time.time() - before_modules))
        
        # Call modules that manage this read configuration pass
        self.hook_point('read_configuration')
        
        # grok our architecture name from our modules
        self._get_and_set_architecture_name()
        
        # Let ALL the configuration we are sending from where they are
        self.configuration_incarnation.set_author(self.me.get_name())
        self.configuration_incarnation.set_architecture_name(self._architecture_name)
        self.configuration_incarnation.set_author_uuid(self.server_uuid)
        logger.info('We are loading a new configuration %s that we will send to all other daemons' % self.configuration_incarnation)
        
        # Now we ask for configuration modules if they
        # got items for us
        for inst in self.modules_manager.get_all_alive_instances():
            if 'configuration' in inst.phases:
                try:
                    all_items = inst.get_objects()
                except Exception as exp:
                    logger.error('Instance %s raised an exception : %s. Arbiter can not continue : shut down' % (inst.get_name(), str(exp)))
                    self.do_stop()
                    sys.exit(2)
                
                types_creations = self.conf.types_creations
                for k in types_creations:
                    (cls, clss, prop) = types_creations[k]
                    if prop in all_items:
                        for x in all_items[prop]:
                            # test if raw_objects[k] are already set - if not, add empty array
                            if k not in raw_objects:
                                raw_objects[k] = []
                            # now append the object
                            raw_objects[k].append(x)
                        if len(all_items[prop]) != 0:
                            logger.debug("Added %-3d objects to %s from module %s" % (len(all_items[prop]), k, inst.get_name()))
        
        before = time.time()
        # Resume standard operations
        self.conf.create_objects(raw_objects)
        self._finish_step('create_objects', before)
        
        # Maybe conf is already invalid
        if not self.conf.conf_is_correct:
            self.conf.show_errors()
            sys.exit("***> One or more problems was encountered while processing the config files...")
        
        # Change Nagios2 names to Nagios3 ones
        self.conf.old_properties_names_to_new()
        
        # Manage all post-conf modules
        self.hook_point('early_configuration')
        
        # Ok here maybe we should stop because we are in a pure migration run
        if self.migrate:
            print("Migration MODE. Early exiting from configuration relinking phase")
            return
        
        before = time.time()
        # Create Template links
        self.conf.linkify_templates()
        
        # All inheritances
        self.conf.apply_inheritance(parameter_only='realm')
        self._finish_step('compute inheritance', before)
        
        # We need to check realms configuration before computing shards
        # so we get errors when there is no or too many default realms
        flush_arbiter_message_buffer()
        if self.conf.realms.is_correct():
            # Configuration need to have a Manager that will get all shards
            self.conf.create_sub_worker_manager()
            
            before = time.time()
            all_realm_process = {}
            for realm in self.conf.realms:
                p = Process(target=self._compute_shard_into_subprocess, name="shard-computation-%s" % (realm.get_name()), args=(realm,))
                p.start()
                all_realm_process[realm.get_name()] = p
            
            for (realm_name, process) in all_realm_process.items():
                logger.info('Waiting for realm: %s' % realm_name)
                process.join()
                if process.exitcode != 0:
                    write_final_json()
                    logger.error('The realm %s has errors. Exiting' % realm_name)
                    sys.exit(2)
            
            # Grok shards sent from workers
            self.conf.get_shards_from_sub_worker_manager()
            
            # The shard managers are no more needed
            self.conf.shutdown_sub_worker_manager()
            self._finish_step('compute shards into sub workers for each realm', before)
        
        before = time.time()
        # We still need to le the realms know each others
        self.conf.explode_common()
        self._finish_step('create final objects from definitions', before)
        
        before = time.time()
        # We still need to le the realms know each others
        self.conf.fill_default_common()
        self._finish_step('fill default values', before)
        
        before = time.time()
        # Remove disabled elements (in daemons)
        self.conf.remove_disabled()
        self._finish_step('remove no more useful objects', before)
        
        before = time.time()
        # Pythonize values
        self.conf.pythonize_common()
        self._finish_step('transform definitions into real objects', before)
        
        before = time.time()
        # Linkify objects to each other
        self.conf.linkify_common()
        self._finish_step('link elements together', before)
        
        # Configuration incarnation MUST be set BEFORE the whole configuration
        # serialization, so the spare will inherit from it
        self.link_configuration_instance_into_configuration_objects()
        
        before = time.time()
        # Explode global conf parameters into Classes
        self.conf.explode_global_conf()
        
        # set our own timezone and propagate it to other satellites
        self.conf.propagate_option('use_timezone')
        self.conf.propagate_option('language')
        self.set_tz(self.conf.use_timezone)
        self._finish_step('compute global parameters', before)
        
        # Manage all post-conf modules
        self.hook_point('late_configuration')
        
        before = time.time()
        # Correct conf?
        self.conf.is_correct_common()
        self._finish_step('Checking that the configuration is valid', before)
        
        # Maybe some elements where not wrong, so we must clean if possible
        self.conf.clean()
        
        # The conf can be incorrect here if the cut into parts see errors like
        # a realm with hosts and not schedulers for it
        flush_arbiter_message_buffer()
        if not self.conf.conf_is_correct:
            self.conf.show_errors()
            write_final_json()
            err = 'Configuration is incorrect, sorry, I bail out'
            logger.error(err)
            sys.exit(2)
        
        logger.info('Things look okay - No serious problems were detected during the pre-flight check')
        
        # Clean objects of temporary/unnecessary attributes for live work:
        self.conf.clean()
        flush_arbiter_message_buffer()
        # Exit if we are just here for config checking
        if self.verify_only:
            write_final_json()
            sys.exit(0)
        
        # Manage all post-conf modules
        self.hook_point('configuration_done')
        
        # Ok, here we must check if we go on or not.
        # TODO: check OK or not
        self.log_level = self.conf.log_level
        self.use_local_log = self.conf.use_local_log
        self.local_log = self.conf.local_log
        self.pidfile = os.path.abspath(self.conf.lock_file)
        self.idontcareaboutsecurity = self.conf.idontcareaboutsecurity
        self.user = self.conf.shinken_user
        self.group = self.conf.shinken_group
        self.daemon_enabled = self.conf.daemon_enabled
        self.daemon_thread_pool_size = self.conf.daemon_thread_pool_size
        self.http_backend = getattr(self.conf, 'http_backend', 'auto')
        
        self.max_file_descriptor_limit = 1024  # this daemon need a lof of file descriptors
        
        # If the user sets a workdir, lets use it. If not, use the pidfile directory
        if self.conf.workdir == '':
            self.workdir = os.path.abspath(os.path.dirname(self.pidfile))
        else:
            self.workdir = self.conf.workdir
        
        #  We need to set self.host & self.port to be used by do_daemon_init_and_start
        self.host = self.me.address
        if hasattr(self.conf, 'bind_addr'):
            self.host = self.conf.bind_addr
        self.port = self.me.port
        self.conf.set_master_arbiter_uuid(self.server_uuid)
        
        logger.info("Configuration Loaded")
    
    
    # We are accepting a configuration is we are a SLAVE, NOT a MASTER
    def accept_configuration(self):
        return not self.is_master
    
    
    def get_currently_managed_configuration(self):
        return self.configuration_incarnation.dump_as_json()
    
    
    # The instance_configuration must be set into the configuration, but also
    # in the satellites
    def link_configuration_instance_into_configuration_objects(self):
        configuration_incarnation = self.configuration_incarnation
        conf = self.conf
        conf.set_configuration_incarnation(configuration_incarnation)
        for lst in (conf.schedulers, conf.brokers, conf.pollers, conf.reactionners, conf.receivers, conf.arbiters):
            for satellite_link in lst:
                satellite_link.set_configuration_incarnation(configuration_incarnation)
        
        self.conf.modules.fill_configuration_incarnation(self.conf.get_configuration_incarnation())
    
    
    def _remove_hosts_not_in_realm(self, realm):
        realm_name = realm.get_name()
        # look to only keep hosts that are in our realm
        realm_is_default = getattr(realm, 'default', '0') == '1'
        to_del = []
        for host in self.conf.hosts:
            # Do not delete template
            if host.is_tpl():
                continue
            host_realm = getattr(host, 'realm', '')
            if host_realm == '' and realm_is_default:
                continue
            if host_realm == realm_name:
                continue
            to_del.append(host.id)
        for _id in to_del:
            del self.conf.hosts[_id]
    
    
    def _compute_shard_into_subprocess(self, realm):
        try:
            self._do_compute_shard_into_subprocess(realm)
        except SystemExit:  # do not hook system exit, follow them
            raise
        except Exception as e:
            logger.error('ERROR: the realm %s worker process did fail and exit: %s' % (realm.get_name(), e))
            logger.print_stack()
            sys.exit(2)
    
    
    def _do_compute_shard_into_subprocess(self, realm):
        # type: (Realm) -> None
        start = time.time()
        realm_name = realm.get_name()
        
        if os.getuid() == 0:
            # Previous versions of Arbiter created root owned first monitoring time retention file.
            #
            for retention_file in [OLD_ARBITER_RETENTION_JSON, REALM_RETENTION_JSON % realm_name]:
                if os.path.exists(retention_file):
                    try:
                        set_ownership(retention_file, user=getattr(self.conf, 'shinken_user', 'shinken'), group=getattr(self.conf, 'shinken_group', 'shinken'), is_link=os.path.islink(retention_file))
                    except:
                        raw_logger.get_sub_part('MONITORING START TIME COMPONENT').print_stack()
        # NOTE: the main daemon disabled the gc, but a fork() already re-enabled it
        # Let this process ultra low level
        os.nice(20)
        set_process_name('shinken-arbiter - worker - compilation %s' % realm_name)
        
        logger.set_name('Realm-%s' % realm_name)
        # __perf_format = ' ** [' + realm_name + '] %-30s: %.3f'
        start_write_arbiter_messages('shinken_realm_%s.json' % realm_name)
        before = time.time()
        self._remove_hosts_not_in_realm(realm)
        self._finish_step('clean hosts from other realms', before)
        
        # The configuration UI is not able currently to give check/service poller_tag as None when forcing unset
        # (not take ost one), and give "null" instead. so for this change poller_tag "null"  => "None"
        before = time.time()
        self.conf.fix_null_poller_tag_service_into_real_none()
        self._finish_step('fix service with null as poller tag', before)
        
        before = time.time()
        # All inheritances
        self.conf.apply_inheritance()
        self._finish_step('compute inheritance', before)
        
        before = time.time()
        # Explode between types
        # NOTE: a step in the check/service need to be done AFTER the implicit inheritance part
        self.conf.explode_realm_only()
        self._finish_step('create final objects from definitions', before)
        
        # Create Name reversed list for searching list
        self.conf.create_reversed_list()
        
        # Removes service exceptions based on host configuration
        before = time.time()
        count = self.conf.remove_exclusions()
        if count > 0:
            # We have removed excluded services, and so we must recompute the search lists
            self.conf.create_reversed_list()
        self._finish_step('remove check exclusions', before)
        
        before = time.time()
        # Cleaning Twins objects
        self.conf.remove_twins()
        self._finish_step('remove duplicate objects', before)
        
        before = time.time()
        # Implicit inheritance for services
        self.conf.apply_implicit_inheritance()
        self._finish_step('compute inheritance between hosts and checks', before)
        
        before = time.time()
        # Fill default values
        self.conf.fill_default_realm_only()
        self._finish_step('fill default values', before)
        
        # Remove templates from config
        before = time.time()
        self.conf.remove_templates()
        
        # Remove disabled elements (like daemons)
        self.conf.remove_disabled()
        
        # We removed templates, and so we must recompute the search lists
        self.conf.create_reversed_list()
        self._finish_step('remove no more useful objects', before)
        
        # Overrides specific service instances properties
        before = time.time()
        self.conf.override_properties()
        self._finish_step('compute service overrides', before)
        
        before = time.time()
        # Implicit inheritance for services and hosts SEF-9066
        # Make it after :
        #  * Inheritance
        #  * Implicit inheritance
        #  * Service Override
        self.conf.post_inheritance_explode()
        self._finish_step('post inheritance objets creation from definitions', before)
        
        # Pythonize values
        before = time.time()
        self.conf.pythonize_realm_only()
        self._finish_step('transform definitions into real objects', before)
        
        # Linkify objects to each other
        before = time.time()
        self.conf.linkify_realm_only()
        self._finish_step('link elements together', before)
        
        # Change some default values (like poller/reactionner_tag into final ones)
        before = time.time()
        self.conf.change_default_values_into_finals()
        self._finish_step('change some default values into final ones', before)
        
        # Remove services without valide host
        before = time.time()
        self.conf.remove_orphan_services()
        self._finish_step('remove checks without hosts', before)
        
        # applying dependencies
        before = time.time()
        self.conf.apply_dependencies()
        self._finish_step('declare dependencies', before)
        
        # Explode global conf parameters into Classes
        before = time.time()
        self.conf.explode_global_conf()
        
        self.conf.load_value_from_global_conf()
        
        # set our own timezone and propagate it to other satellites
        self.conf.propagate_option('use_timezone')
        self.conf.propagate_option('language')
        self.set_tz(self.conf.use_timezone)
        self._finish_step('compute global parameters', before)
        
        before = time.time()
        # Update proxy elements from current elements
        self.conf.create_proxy_items()
        
        # Look for business rules, and create the dep tree
        self.conf.create_business_rules()
        # And link them
        # TODO: get back dep links
        # self.conf.create_business_rules_dependencies()
        self._finish_step('compute cluster trees', before)
        
        # Manage all post-conf modules
        self.hook_point('late_configuration')
        
        # Correct conf?
        before = time.time()
        self.conf.is_correct_realm_only()
        self._finish_step('checking the configuration is valid', before)
        # Maybe some elements where not wrong, so we must clean if possible
        self.conf.clean()
        
        before = time.time()
        # REF: doc/shinken-conf-dispatching.png (2)
        self.confs = self.conf.cut_into_parts(realm)
        self._finish_step('splitting elements into shards', before)
        
        # The conf can be incorrect here if the cut into parts see errors
        if not self.conf.conf_is_correct:
            self.conf.show_errors()
            end_write_arbiter_messages()
            err = 'Configuration is incorrect, sorry, I bail out'
            logger.error(err)
            sys.exit(2)
        
        logger.info('Things look okay - No serious problems were detected during the pre-flight check')
        
        # Clean objects of temporary/unnecessary attributes for live work:
        self.conf.clean()
        
        logger.info('REALM: %s Whole configuration time: %.2f' % (realm_name, time.time() - start))
        
        # Exit if we are just here for config checking
        if self.verify_only:
            end_write_arbiter_messages()
            sys.exit(0)
        
        # Set & get monitoring start time, for this realm
        # NOTE: if a host change realm, it will lose it's monitoring start time
        before = time.time()
        monitoring_start_time_component = MonitoringStartTimeComponent(logger, self.conf)
        monitoring_start_time_component.set_monitoring_start_time_realm_only(realm)
        self._finish_step('set monitoring start time to new elements', before)
        
        end_write_arbiter_messages()
        before = time.time()
        self.conf.prepare_for_sending(realm)
        self._finish_step('serializing the realm shards', before)
    
    
    # Main loop function
    def main(self):
        try:
            # Log will be broks
            for line in self.get_header():
                logger.info(line)
            self.daily_log_version()
            
            self.load_config_file()
            # Look if we are enabled or not. If ok, start the daemon mode
            self.look_for_early_exit()
            self.do_daemon_init_and_start()
            
            self.daemon_http_start()
            
            # Now we are demonized, we can generate the new monitoring configuration for the arbiter master
            before = time.time()
            self.conf.create_new_monitoring_configuration()
            self._finish_step("Serializing the new monitoring configuration", before)
            
            for _item_type, items in self.conf.ignored_items.items():
                for _item_def in items:
                    logger.warning('[config] Cannot import %s defined in %s. Please use the synchronizer with a cfg-file source to import %ss into arbiter' % (_item_type, _item_def, _item_type))
            
            if self.conf.bad_encoding_files:
                logger.error('[config] Some characters could not be read in utf-8 in these files :')
                for _file in self.conf.bad_encoding_files:
                    logger.warning('[config] - %s' % _file)
            
            # ok we are now fully daemonized (if requested) now we can start our "external" modules (if any)
            self.modules_manager.start_external_instances()
            
            # Let the modules know that we are now fully daemonize
            self.hook_point('daemon_daemonized')
            
            # Ok now we can load the retention data
            self.hook_point('load_retention')
            
            if self.is_master:
                self.have_configuration = True
            # And go for the main loop
            self.do_mainloop()
        except SystemExit as exp:
            # With a 2.4 interpreter the sys.exit() in load_config_file
            # ends up here and must be handled.
            sys.exit(exp.code)
        except Exception as exp:
            logger.critical('The daemon did have an unrecoverable error. It must exit.')
            logger.critical('You can log a bug to your Shinken integrator with the error message:')
            logger.print_stack(level=logging.CRITICAL)
            raise
    
    
    def _save_master_configuration_into_retention(self, received_configuration, arbiter_master_full_version, arbiter_master_name, configuration_incarnation):
        # save the spare last conf with the current version as pickle
        logger_latest_monitoring_configuration.info(
            'The NEW Monitoring Configuration %s received from Arbiter MASTER "%s" [version=%s] will be saved into file "%s"' % (configuration_incarnation, arbiter_master_name, arbiter_master_full_version, _PATH_LAST_SPARE_CONF))
        data_to_write = {
            arbiter_master_full_version: received_configuration['full_conf']
        }
        tmp_path_last_spare_conf = _PATH_LAST_SPARE_CONF + ".tmp"
        try:
            with open(tmp_path_last_spare_conf, "wb") as conf_retention_file:
                pickle.dump(data_to_write, conf_retention_file, pickle.HIGHEST_PROTOCOL)
            shutil.move(tmp_path_last_spare_conf, _PATH_LAST_SPARE_CONF)
        except Exception as exp:
            logger_latest_monitoring_configuration.info('The NEW Monitoring Configuration %s received from Arbiter MASTER "%s" [version=%s] failed to save in file "%s" because of a system error: %s' % (
                configuration_incarnation, arbiter_master_name, arbiter_master_full_version, _PATH_LAST_SPARE_CONF, exp))
    
    
    def setup_new_conf(self):
        """ Setup a new conf received from a Master arbiter. """
        conf = self.new_conf
        self.new_conf = None
        arbiter_master_name = conf['arbiter_trace']['name']
        arbiter_master_uri = conf['arbiter_trace']['uri']
        configuration_incarnation = conf.get('configuration_incarnation', None)
        if configuration_incarnation is None:
            logger_latest_monitoring_configuration.error('The NEW Monitoring Configuration received from the arbiter "%s" at "%s" is invalid, missing configuration_incarnation. skipping it.' % (arbiter_master_name, arbiter_master_uri))
            return
        arbiter_master_full_version = conf.get('arbiter_master_full_version', None)
        if arbiter_master_full_version is None:  # not up-to-date master
            logger_latest_monitoring_configuration.error('The NEW Monitoring Configuration received from the arbiter master "%s" is too old and cannot be loaded: the Arbiter master is too old and must be updated.' % arbiter_master_name)
            return
        
        # We must have the exact same version than our MASTER to accept it
        our_version = self.get_full_version()
        formatted_arbiter_version = self._format_daemon_full_version(arbiter_master_full_version)
        if formatted_arbiter_version != our_version:
            logger_latest_monitoring_configuration.error('The Arbiter MASTER Version ( %s ) is different from the Arbiter SPARE version ( %s ). The NEW Monitoring Configuration can\'t be loaded.' % (formatted_arbiter_version, our_version))
            return
        
        logger_latest_monitoring_configuration.info('The NEW Monitoring Configuration %s received from "%s" is valid' % (configuration_incarnation, arbiter_master_name))
        # load the real conf here, don't block the put_conf return fast to make the arbiter master fast
        if 'full_conf' in conf:
            # save the spare last conf with the current version as pickle
            self._save_master_configuration_into_retention(conf, arbiter_master_full_version, arbiter_master_name, configuration_incarnation)
            
            # now we can unpickle the full_conf
            conf['full_conf'] = SafeUnpickler.loads(conf['full_conf'], 'Configuration received from Arbiter master')
        
        self.cur_conf = conf['full_conf']
        self.conf = conf['full_conf']
        for arb in self.conf.arbiters:
            if arb.is_me(self.arb_name):
                self.me = arb
                arb.is_me = lambda x: True  # we now definitively know who we are, just keep it.
            else:
                arb.is_me = lambda x: False  # and we know who we are not, just keep it.
        
        self.load_new_configuration_incarnation_from_master(configuration_incarnation)
        self.have_configuration = True
    
    
    # what ever the master is saying to us, we are switching to an idle way
    def arbiter_master__you_are_talking_to_me(self):  # IMPORTANT: read with an italian accent
        if self.must_run:
            self.must_run = False
    
    
    def do_not_run(self):
        # If I'm the master, ignore the command
        if self.is_master:
            logger_configuration.error('Received message not to run. I\'am the Master, ignore and continue to run.')
            return
        
        # Else, I'm just a spare, so I listen to my master, if needed, log it and go idle
        self.arbiter_master__you_are_talking_to_me()
        logger_configuration.debug('Received message not to run from arbiter master')
        self.last_master_speak = time.time()
    
    
    def arbiter_master_send_us_a_conf(self):
        # Arbiter master talk to us, maybe we just switch move
        self.arbiter_master__you_are_talking_to_me()
        logger_latest_monitoring_configuration.info('The NEW Arbiter MASTER send us the configuration: %s' % self.configuration_incarnation)
    
    
    def load_new_configuration_incarnation_from_retention(self, configuration_incarnation):
        self.configuration_incarnation = configuration_incarnation
        logger_latest_monitoring_configuration.info('The configuration incarnation is loaded from the LATEST Monitoring Configuration. New incarnation: %s' % configuration_incarnation)
        # Also load the architecture name from the master
        old_architecture_name = self._architecture_name
        self._architecture_name = configuration_incarnation.get_architecture_name()
        if old_architecture_name != self._architecture_name:
            logger_configuration.info('Using the Architecture name from the LATEST Arbiter MASTER configuration: %s' % self._architecture_name)
    
    
    def load_new_configuration_incarnation_from_master(self, configuration_incarnation):
        self.configuration_incarnation = configuration_incarnation
        logger_configuration.info('The NEW Monitoring Configuration is loaded from the last Arbiter MASTER configuration %s' % configuration_incarnation)
        # Also load the architecture name from the master
        old_architecture_name = self._architecture_name
        self._architecture_name = configuration_incarnation.get_architecture_name()
        if old_architecture_name != self._architecture_name:
            logger_configuration.info('Using the Architecture name from the Arbiter MASTER configuration: %s' % self._architecture_name)
    
    
    def do_loop_turn(self):
        # If I am a spare, I wait for the master arbiter to send me true conf.
        if self.me.spare and not self._is_retention_load_already_try:
            self._is_retention_load_already_try = True  # Only load the retention once
            # try to load the last receive conf, check the version, if match use it instead of non-sync and probably false cfg files that are in this conf
            self.conf = None
            self.dispatcher = None
            saved_conf = {}
            if os.path.isfile(_PATH_LAST_SPARE_CONF):
                logger_latest_monitoring_configuration.info('Trying to reload the LATEST Monitoring Configuration from file "%s"' % _PATH_LAST_SPARE_CONF)
                try:
                    saved_conf = pickle.load(open(_PATH_LAST_SPARE_CONF, 'rb'))
                except (EOFError, ValueError, IOError, IndexError, TypeError) as exp:
                    logger_latest_monitoring_configuration.error('Failed to load the last Monitoring Configuration "%s", because of a file error: %s' % (_PATH_LAST_SPARE_CONF, exp))
                # we should have only one conf here
                if len(saved_conf) == 1:
                    for version, pickled_full_conf in saved_conf.items():
                        version = self._format_daemon_full_version(version)
                        current_version = self.get_full_version()
                        if version != current_version:
                            # this conf was writen for another version of the arbiter the objects 'maybe' don't have the same representation
                            logger_latest_monitoring_configuration.error(
                                'Failed to load the LATEST Monitoring Configuration "%s", the saved version ( %s ) doesn\'t match with my current version ( %s )' % (_PATH_LAST_SPARE_CONF, version, current_version))
                        else:
                            try:  # Maybe we can have data that is not compatible
                                loaded_conf = SafeUnpickler.loads(pickled_full_conf, 'Saved configuration from Arbiter MASTER')  # type: Optional[Config]
                            except Exception as exp:  # noqa all can happen here
                                logger_latest_monitoring_configuration.error('Failed to load the LATEST Monitoring Configuration "%s" because of some elements loading error: %s' % (_PATH_LAST_SPARE_CONF, exp))
                                loaded_conf = None
                            if loaded_conf is not None:
                                # Maybe the conf is too old, from a patch that fix arbiter->scheduler communication, so check this
                                # and if the schedulers are not up-to-date, skip this conf object
                                is_valid = loaded_conf.check_post_02_08_02_satellite_communication()
                                if is_valid:
                                    self.conf = loaded_conf
                                    self.cur_conf = self.conf
                                    self.load_new_configuration_incarnation_from_retention(self.conf.get_configuration_incarnation())
                                    self.have_configuration = True
                                    logger_latest_monitoring_configuration.info('Using the LATEST Monitoring Configuration %s' % (self.conf.get_configuration_incarnation()))
                                else:
                                    logger_latest_monitoring_configuration.warning(
                                        'The last Arbiter master configuration load from %s was too old, skipping it until we are receiving a new configuration from the master arbiter' % _PATH_LAST_SPARE_CONF)
            else:  # no file
                logger_latest_monitoring_configuration.info('Skipping to load the LATEST Monitoring Configuration as there is no file "%s"' % _PATH_LAST_SPARE_CONF)
        
        # Now the configuration load is done, can work and wait
        if self.me.spare:
            logger.debug('I wait for master')
            self.wait_for_master_death()
        
        if self.must_run:
            # Main loop
            self.run()
    
    
    # Called when the Arbiter is stopping
    def do_stop(self):
        super(Arbiter, self).do_stop()
        self._save_alive_daemons()
    
    
    # Get 'objects' from external modules
    # It can be used to get external commands for example
    def get_objects_from_from_queues(self):
        for f in self.modules_manager.get_external_from_queues():
            # print "Groking from module instance %s" % f
            while True:
                try:
                    o = f.get(block=False)
                    self.add(o)
                except Empty:
                    break
                # Maybe the queue had problems
                # log it and quit it
                except (IOError, EOFError) as exp:
                    logger.error("An external module queue got a problem '%s'" % str(exp))
                    break
    
    
    def _get_master_timeout(self):
        # Look for the master timeout
        master_timeout = 300
        if self.conf:
            arbiter_master = next((arb for arb in self.conf.arbiters if not arb.spare), None)
            if arbiter_master:
                master_timeout = arbiter_master.check_interval * arbiter_master.max_check_attempts
                logger_wait_for_master_death.info("The Arbiter spare will wait for the Arbiter master for %d seconds (= master check_interval=%ss * master max_check_attempts=%s) until take over the master role" % (
                    master_timeout, arbiter_master.check_interval, arbiter_master.max_check_attempts))
        return master_timeout
    
    
    # We wait (block) for arbiter to send us something
    def wait_for_master_death(self):
        logger.info('Waiting for master death')
        self.must_run = False
        
        self.last_master_speak = time.time()
        
        # Look for the master timeout
        master_timeout = self._get_master_timeout()
        while not self.interrupted:
            self.sleep(1)
            
            if self.new_conf:
                self.setup_new_conf()
                master_timeout = self._get_master_timeout()
            
            # If the Master did ask us to go sleep, do it, and don't even log tha we are waiting
            if self.deactivated_by_arbiter:
                continue
            
            # Now check if master is dead or not
            now = time.time()
            
            elapsed_time = now - self.last_master_speak
            
            # we do not want to log when all is OK
            # NOTE: do not use function pointer for more than just few lines!
            log_function = logger_wait_for_master_death.debug
            if elapsed_time > 10:
                log_function = logger_wait_for_master_death.warning
            log_function('Arbiter Master doesn\'t speak to me since %ds. I\'ll take the master role after %s seconds.' % (elapsed_time, master_timeout))
            
            if elapsed_time > master_timeout:
                if not self.conf:
                    if self.deactivated_by_arbiter:
                        logger_configuration.info('Arbiter master deactivated me ; waiting for new master...')
                    else:
                        logger_wait_for_master_death.error('Arbiter Master is dead. The arbiter %s should the lead but no conf was given and/or can be loaded from %s' % (self.me.get_name(), _PATH_LAST_SPARE_CONF))
                    self.must_run = False
                    break
                logger_wait_for_master_death.info('-' * 200)
                logger_wait_for_master_death.info('Arbiter Master is dead. The arbiter %s take the master role with the configuration %s' % (self.me.get_name(), self.configuration_incarnation))
                logger_wait_for_master_death.info('-' * 200)
                
                self.must_run = True
                break
    
    
    # Take all external commands, make packs and send them to the schedulers
    def push_external_commands_to_schedulers(self):
        # Now get all external commands and put them into the good schedulers
        for ext_cmd in self.external_commands:
            self.external_command.resolve_command(ext_cmd)
        
        # Now for all alive schedulers, send the commands
        for sched in self.conf.schedulers:
            cmds = sched.external_commands
            if len(cmds) > 0 and sched.alive:
                logger.debug("Sending %d commands to scheduler %s" % (len(cmds), sched.get_name()))
                sched.run_external_commands(cmds)
            # clean them
            sched.external_commands = []
    
    
    # We will log if there are time period activations change as NOTICE in logs.
    def check_and_log_tp_activation_change(self):
        for tp in self.conf.timeperiods:
            tp.check_and_log_activation_change()
            tp.clean_cache()
    
    
    # Main function
    def run(self):
        start_snap = cpu_stats_helper.get_thread_cpu_snapshot()
        # Before running, I must be sure who am I
        # The arbiters change, so we must re-discover the new self.me
        for arb in self.conf.arbiters:
            if arb.is_me(self.arb_name):
                self.me = arb
        # Update the logger with our name
        if self.me:
            logger.set_name(self.me.get_name())
        
        self.monitoring_start_time_component = MonitoringStartTimeComponent(logger, self.conf, compute_module_list=True)
        
        # We were waiting for check_interval to set arbiter trace
        self._generate_and_link_arbiter_trace()
        
        logger.set_human_format(on=self.conf.human_timestamp_log)
        logger.set_name(self.me.get_name())
        # load the last run alive daemons to be able to ask them to do nothing once the conf will be sent
        self._load_last_run_alive_daemons()
        logger.info('Begin to dispatch configurations %s to satellites' % self.configuration_incarnation)
        self.dispatcher = Dispatcher(self.conf, self.me, self.trace, self.configuration_incarnation)
        
        # Fast check for daemons ALIVE/DEAD
        self.dispatcher.initial_daemons_check()
        # Now we can list all our daemons
        self.dispatcher.print_initial_listing()
        
        # Normal loop start
        self.dispatcher.check_alive()
        self.dispatcher.assert_inventories_dispatch()
        self.dispatcher.check_dispatch()
        self.dispatcher.check_bad_dispatch()
        # REF: doc/shinken-conf-dispatching.png (3)
        self.dispatcher.dispatch()
        # ask the no more enabled daemons to stop working
        self.dispatcher.disable_previous_run_daemons(self.last_run_alive_daemons, _PATH_ALIVE_DAEMONS)
        
        # Now we can get all initial broks for our satellites
        self.get_initial_broks_from_satellitelinks()
        
        # Now create the external commander. It's just here to dispatch
        # the commands to schedulers
        # TODO: get back
        e = ExternalCommandManager(self.conf, 'dispatcher')
        e.load_arbiter(self)
        self.external_command = e
        
        logger.debug('Run baby, run... %s' % start_snap.get_diff())
        mainloop_watchdog = WatchDogThreadDumper('Main loop', wait_time=60 * 30, dump_interval=60 * 5, fatal_dead_lock_delay=60 * 30, multi_usage=True)
        loop_logger = LoggerFactory.get_logger('ARBITER TIME')
        
        while self.must_run and not self.interrupted:
            with mainloop_watchdog:
                self.daily_log_version()
                loop_snap = cpu_stats_helper.get_thread_cpu_snapshot()
                loop_start = time.time()
                self._increase_loop_number()
                loop_number = self._get_loop_number()
                loop_logger.info('[ === Loop start === ] [ Loop number=%-5d ] ===-===-===-===-===-===-===-===-===-===-===-===-===' % loop_number)
                # Try to see if one of my module is dead, and
                # try to restart previously dead modules :)
                self.check_and_del_zombie_modules()
                
                # Call modules that manage a starting tick pass
                self.hook_point('tick')
                
                # Look for logging timeperiods activation change (active/inactive)
                self.check_and_log_tp_activation_change()
                
                # Now the dispatcher job
                self.dispatcher.check_alive()
                self.dispatcher.assert_inventories_dispatch()
                self.dispatcher.check_dispatch()
                # REF: doc/shinken-conf-dispatching.png (3)
                self.dispatcher.check_bad_dispatch()
                self.dispatcher.dispatch()
                
                other_taks_start = time.time()
                # Now get things from our module instances
                self.get_objects_from_from_queues()
                
                # Maybe our satellites links raise new broks. Must reap them
                self.get_broks_from_satellitelinks()
                
                # One broker is responsible for our broks,
                # we must give him our broks
                self.push_broks_to_broker()
                
                self.get_external_commands_from_satellites()
                
                # First get message from satellites : maybe the response is received and don't need to ask sanitize
                self.get_messages_from_satellites()
                
                self.launch_sanitize()
                self.send_messages_to_satellites()
                
                if self.nb_broks_send != 0:
                    logger.debug("Nb Broks send: %d" % self.nb_broks_send)
                self.nb_broks_send = 0
                
                self.push_external_commands_to_schedulers()
                
                # It's sent, do not keep them
                # TODO: check if really sent. Queue by scheduler?
                self.external_commands = []
                
                logger_perf.info('Time to do send broks to brokers and push shinken internal commands (like recheck, set acknowledge, etc) to schedulers [ %.3f ]s' % (time.time() - other_taks_start))
                
                diff_time = time.time() - loop_start
                loop_logger.info('[ === Loop stop  === ] [ Loop number=%-5d ] [PERF] [ %.3f ]s' % (loop_number, diff_time))
                loop_logger.debug('[ === Loop stop  === ] [ Loop number=%-5d ] [PERF] %s' % (loop_number, loop_snap.get_diff()))
                
                # Protect it against time shifting from system
                sleep_time = 1 - diff_time
                self.sleep(sleep_time)
        
        # Main Loop has ended, we no more need the watchdog
        mainloop_watchdog.quit()
        if not self.must_run and self.me.spare:
            logger_wait_for_master_death.info('-' * 200)
            logger_wait_for_master_death.info('The arbiter master takes over the configuration %s. Switching back to sleep move' % self.configuration_incarnation)
            logger_wait_for_master_death.info('-' * 200)
            # clear run info
            self.dispatcher = None  # clean this one for healthcheck and talk to info.
    
    
    def get_daemons(self, daemon_type):
        """ Returns the daemons list defined in our conf for the given type """
        # shouldn't the 'daemon_types' (whatever it is above) be always present?
        return getattr(self.conf, daemon_type + 's', None)
    
    
    # Helper functions for retention modules. So we give our broks and external commands
    def get_retention_data(self):
        return {}
    
    
    # Get back our data from a retention module
    def restore_retention_data(self, data):
        return
    
    
    def launch_sanitize(self):
        messages = self.monitoring_start_time_component.get_inter_daemon_message_to_send(self.name)
        for message in messages:
            self.send_message_to_satellites(message)
