#!/usr/bin/python
# -*- coding: utf-8 -*-

# Copyright (C) 2009-2012:
#    Gabes Jean, naparuba@gmail.com
#    Gerhard Lausser, Gerhard.Lausser@consol.de
#    Gregory Starck, g.starck@gmail.com
#    Hartmut Goebel, h.goebel@goebel-consult.de
#
# This file is part of Shinken.
#
# Shinken is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Shinken is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with Shinken.  If not, see <http://www.gnu.org/licenses/>.

import cPickle
import json
import os
import shutil
import socket
import sys
import time
import traceback
from Queue import Empty
from multiprocessing import Process

from shinken.brok import Brok, PersistantBrok
from shinken.configuration_incarnation import ConfigurationIncarnation
from shinken.daemon import Daemon, Interface, IStatsInterface
from shinken.dispatcher import Dispatcher
from shinken.external_command import ExternalCommand
from shinken.external_command import ExternalCommandManager
from shinken.log import logger, get_chapter_string, LoggerFactory
from shinken.misc.type_hint import TYPE_CHECKING
from shinken.objects.config import Config
from shinken.util import jsonify_r, set_process_name

if TYPE_CHECKING:
    from shinken.misc.type_hint import Optional, Any, Dict

raw_logger = LoggerFactory.get_logger()
logger_perf = raw_logger.get_sub_part(u'PERF', part_name_size=4)

_PATH_ALIVE_DAEMONS = '/var/lib/shinken/alive_daemons_retention.json'
_PATH_LAST_SPARE_CONF = '/var/lib/shinken/arbiter_spare_last_conf.dat'

CHAPTER_CONFIGURATION = get_chapter_string('CONFIGURATION')


# Interface for the other Arbiter
# It connects, and together we decide who's the Master and who's the Slave, etc.
# Here is a also a function to get a new conf from the master
class IForArbiter(Interface):
    def get_daemon_infos(self):
        to_return = {
            'arbiter'   : {
                'activated'  : self.app.must_run,
                'spare'      : self.app.me.spare,
                'version'    : self.get_context()['current_version'],
                'api_version': Interface.RAW_STATS_API_VERSION,
            },
            'satellites': []
        }
        if self.app.dispatcher:
            for e in self.app.dispatcher._get_all_satellites():
                elt = {
                    'diff_time_with_arbiter': e.diff_time_with_arbiter,
                    'display_name'          : e.get_name(),
                    'alive'                 : e.alive,
                    'reachable'             : e.reachable,
                    'type'                  : e.__class__.my_type,
                    'daemon_version'        : e.daemon_version
                }
                to_return['satellites'].append(elt)
        return to_return
    
    
    get_daemon_infos.doc = "read the sources"
    get_daemon_infos.need_lock = False
    
    
    # DISABLED
    def have_conf(self, magic_hash):
        return False
    
    
    have_conf.doc = 'DISABLED'
    have_conf.need_lock = False
    
    doc = 'Put a new configuration to the daemon'
    
    
    # The master Arbiter is sending us a new conf in a pickle way. Ok, we take it
    def put_conf(self, conf):
        super(IForArbiter, self).put_conf(conf)
        self.app.arbiter_master_send_us_a_conf()
    
    
    put_conf.method = 'POST'
    put_conf.doc = doc
    
    doc = 'Get the managed configuration (internal)'
    
    
    def get_config(self):
        return self.app.conf
    
    
    get_config.doc = doc
    get_config.need_lock = False
    
    doc = 'Ask the daemon to do not run'
    
    
    # The master arbiter asks me not to run!
    def do_not_run(self):
        self.app.do_not_run()
    
    
    do_not_run.need_lock = False
    do_not_run.doc = doc
    
    
    def wait_new_conf(self):
        super(IForArbiter, self).wait_new_conf()
        self.app.must_run = False
        self.app.conf = None
        self.app.cur_conf = None
        
        logger.debug("Received wait_new_conf")
        try:
            os.remove(_PATH_LAST_SPARE_CONF)
        except Exception:
            pass
    
    
    wait_new_conf.need_lock = False
    wait_new_conf.doc = doc
    
    doc = 'Get the satellite names sort by type'
    
    
    # Here a function called by check_shinken to get daemons list
    def get_satellite_list(self, daemon_type=''):
        res = {}
        for t in ['arbiter', 'scheduler', 'poller', 'reactionner', 'receiver', 'broker']:
            if daemon_type and daemon_type != t:
                continue
            satellite_list = []
            res[t] = satellite_list
            daemon_name_attr = t + "_name"
            daemons = self.app.get_daemons(t)
            for dae in daemons:
                if hasattr(dae, daemon_name_attr):
                    satellite_list.append(getattr(dae, daemon_name_attr))
        return res
    
    
    get_satellite_list.doc = doc
    get_satellite_list.need_lock = False
    
    doc = 'Dummy call for the arbiter'
    
    
    # Dummy call. We are the master, we manage what we want
    def what_i_managed(self):
        return {}
    
    
    what_i_managed.need_lock = False
    what_i_managed.doc = doc
    
    doc = 'Return all the data of the satellites'
    
    
    # Arbiter ask me which shard I do manage
    def get_currently_managed_configuration(self):
        managed_configuration = self.app.get_currently_managed_configuration()
        logger.debug("%s Anoter arbiter asked me what I manage. It's %s" % (CHAPTER_CONFIGURATION, managed_configuration))
        return managed_configuration
    
    
    get_currently_managed_configuration.need_lock = False
    get_currently_managed_configuration.doc = 'Return the managed configuration ids (internal)'
    
    
    # We will try to export all data from our satellites, but only the json-able fields
    def get_all_states(self):
        res = {}
        for t in ['arbiter', 'scheduler', 'poller', 'reactionner', 'receiver',
                  'broker']:
            lst = []
            res[t] = lst
            for d in getattr(self.app.conf, t + 's'):
                cls = d.__class__
                e = {}
                ds = [cls.properties, cls.running_properties]
                for _d in ds:
                    for prop in _d:
                        if hasattr(d, prop):
                            v = getattr(d, prop)
                            # give a try to a json able object
                            try:
                                json.dumps(v)
                                e[prop] = v
                            except Exception as exp:
                                print(exp)
                    lst.append(e)
        
        return lst
    
    
    get_all_states.doc = doc
    get_all_states.need_lock = False
    
    # Try to give some properties of our objects
    doc = 'Dump all objects of the type in [hosts, services, contacts, commands, hostgroups, servicegroups]'
    
    
    def get_objects_properties(self, table, fields=None):
        logger.debug('get_objects_properties for table:[%s] and fields:[%s]' % (str(table), fields))
        objs = getattr(self.app.conf, table, None)
        if not objs:
            return []
        res = []
        
        if fields is not None:
            fields = fields.split(',')
        for obj in objs:
            if fields:
                dst_obj = {}
                for field in fields:
                    dst_obj[field] = getattr(obj, field, None)
            else:
                dst_obj = obj
            res.append(jsonify_r(dst_obj))
        return res
    
    
    get_objects_properties.doc = doc
    get_objects_properties.need_lock = False
    
    
    # For the healthcheck, we can give info about who is master/spare about others
    def get_satellite_spare_info(self, daemon_type, daemon_name):
        logger.debug('get_satellite_spare_daemon:: asking for %s / %s' % (daemon_type, daemon_name))
        lst = getattr(self.app.conf, daemon_type + 's')
        daemon = lst.find_by_name(daemon_name)
        r = {'spare': {'founded': False}, 'master': {'founded': False}}
        if daemon is None:
            logger.info('HTTP API: get_satellite_spare_daemon:: asking for %s / %s => found nothing' % (daemon_type, daemon_name))
            return r
        
        if hasattr(daemon, 'spare_daemon'):
            r['spare']['founded'] = True
            spare_daemon = getattr(daemon, 'spare_daemon', None)
            r['spare']['daemon'] = '' if spare_daemon is None else spare_daemon.get_name()
            r['spare']['require_same_modules'] = getattr(daemon, '%s__manage_spare__spare_must_have_the_same_list_of_module_type' % daemon_type, '1')
        
        if hasattr(daemon, 'master_daemon'):
            r['master']['founded'] = True
            master_daemon = getattr(daemon, 'master_daemon', None)
            r['master']['daemon'] = '' if master_daemon is None else master_daemon.get_name()
        
        logger.debug('get_satellite_spare_daemon:: asking for %s / %s => found %s' % (daemon_type, daemon_name, r))
        return r
    
    
    get_satellite_spare_info.doc = 'Give for a daemon its spare_daemon'
    get_satellite_spare_info.need_lock = False


class IStats(IStatsInterface):
    
    def get_raw_stats(self, param=u''):
        # type: (unicode) -> Dict[unicode, Any]
        return super(IStats, self).get_raw_stats(param=param)
    
    
    get_raw_stats.doc = u'get stats of the daemon'
    get_raw_stats.need_lock = False
    
    
    def _daemon_get_raw_stats(self, param=u''):
        # type: (unicode) -> Dict[unicode, Any]
        raw_stats = {
            u'http_errors_count': self.app.http_errors_count,
            u'have_conf'        : self.app.is_master or self.app.cur_conf is not None,
            u'activated'        : self.app.must_run,
            u'arbiter_version'  : self.get_context()[u'current_version'],
            u'spare'            : self.app.me.spare,
        }
        return raw_stats


# Main Arbiter Class
class Arbiter(Daemon):
    def __init__(self, config_files, is_daemon, do_replace, verify_only, debug, debug_file, profile=None, analyse=None, migrate=None, arb_name='', daemon_id=0):
        
        super(Arbiter, self).__init__('arbiter', config_files[0], is_daemon, do_replace, debug, debug_file, daemon_id)
        
        # Keep only the cfg files, not the ini
        self.config_files = [c for c in config_files if c.endswith('.cfg')]
        
        # The ini specific configuration file, should be only one file
        self.config_file = None
        self.server_uuid = self._get_server_uuid()
        
        for c in config_files:
            if c.endswith('.ini'):
                self.config_file = c
        
        self.verify_only = verify_only
        self.analyse = analyse
        self.migrate = migrate
        self.arb_name = arb_name
        
        self.broks = {}
        self.persistant_broks = {}
        
        self.is_master = False
        self.dispatcher = None  # type: Optional[Dispatcher]
        self.me = None
        
        self.last_run_alive_daemons = {}
        self.nb_broks_send = 0
        
        # Now tab for external_commands
        self.external_commands = []
        
        self.fifo = None
        
        # Used to work out if we must still be alive or not
        self.must_run = True
        self._is_retention_load_already_try = False  # Only load retention one time
        
        self.uri_arb = None
        self.interface = IForArbiter(self)
        self.uri_stats = None
        self.istats = IStats(self)
        
        self.conf = Config()
        self.trace = None
        
        self.external_command = None
        
        self.last_master_speak = None
        
        # We are creating the configuration incarnation that will be given
        # with ALL the configuration, but currently we just don't know
        # our own name so we will give it later
        self.configuration_incarnation = ConfigurationIncarnation()
    
    
    def get_satellite_connections(self):
        if self.dispatcher:
            return self.dispatcher.get_satellite_connections()
        return ()
    
    
    def _get_server_uuid(self):
        ##
        # The 'server_uuid_file' contain the value of the file
        # "/sys/class/dmi/id/product_uuid" unique across machines AND machines
        # clones.
        # It is created and updated if required (clone) by the startup
        # script /etc/init.d/shinken...
        server_uuid_file = '/var/lib/shinken/server.uuid'
        
        # ... no try/catch/with, the file MUST exist.
        with open(server_uuid_file, 'r') as f:
            self.server_uuid = f.read()
        return self.server_uuid
    
    
    # Generate the trace sent to my managed satellites to identify  myself.
    def _generate_and_link_arbiter_trace(self):
        # Create our trace structure. 'expire_period' is used by daemon.py to expire and forget an old arbiter trace.
        proto = 'https' if self.me.use_ssl else 'http'
        uri = '%s://%s:%s/' % (proto, self.me.address, self.me.port)
        self.trace = {
            'master_arbiter_uuid': self.conf.master_arbiter_uuid,
            'expire_period'      : self.me.check_interval * self.me.max_check_attempts,
            'check_interval'     : self.me.check_interval,  # only for daemon with a version < 02.07.05-Patched-05 (02.07.06) do not use this value anymore use expire_period instead
            'spare'              : self.me.spare,
            'name'               : self.me.arbiter_name,
            'uri'                : uri,
            'identifier'         : self.server_uuid,
            'version'            : self.interface.get_context()['current_version']
        }
        conf = self.conf
        for lst in (conf.schedulers, conf.brokers, conf.pollers, conf.reactionners, conf.receivers, conf.arbiters):
            for satellite_link in lst:
                satellite_link.set_arbiter_trace(self.trace)
    
    
    # Use for adding things like broks
    def add(self, b):
        if isinstance(b, PersistantBrok):
            self.persistant_broks[b.id] = b
        elif isinstance(b, Brok):
            self.broks[b.id] = b
        elif isinstance(b, ExternalCommand):
            self.external_commands.append(b)
        else:
            logger.warning('Cannot manage object type %s (%s)' % (type(b), b))
    
    
    def add_Brok(self, b):
        self.broks[b.id] = b
    
    
    # We must push our broks to the broker because it's stupid to make a crossing connection
    # so we find the broker responsible for our broks, and we send it to him
    def push_broks_to_broker(self):
        for brk in self.conf.brokers:
            # Send only if alive of course
            if brk.alive and brk.reachable:
                is_send = brk.push_broks(self.broks)
                if is_send:
                    # They are gone, we keep none!
                    self.broks.clear()
                # now push the persistant broks and don't delete them
                brk.push_broks(self.persistant_broks)
    
    
    # We must take external_commands from all satellites like brokers, pollers, reactionners or receivers
    def get_external_commands_from_satellites(self):
        sat_lists = [self.conf.brokers, self.conf.receivers,
                     self.conf.pollers, self.conf.reactionners]
        for lst in sat_lists:
            for sat in lst:
                # Get only if alive of course
                if sat.alive:
                    new_cmds = sat.get_external_commands()
                    for new_cmd in new_cmds:
                        self.external_commands.append(new_cmd)
    
    
    # Our links to satellites can raise broks. We must send them
    def get_broks_from_satellitelinks(self):
        tabs = [self.conf.brokers, self.conf.schedulers,
                self.conf.pollers, self.conf.reactionners,
                self.conf.receivers]
        for tab in tabs:
            for s in tab:
                new_broks = s.get_all_broks()
                for b in new_broks:
                    self.add(b)
    
    
    # Our links to satellites can raise broks. We must send them
    def get_initial_broks_from_satellitelinks(self):
        tabs = [self.conf.brokers, self.conf.schedulers,
                self.conf.pollers, self.conf.reactionners,
                self.conf.receivers]
        for tab in tabs:
            for s in tab:
                b = s.get_initial_status_brok()
                self.add(b)
    
    
    # Load the external commander
    def load_external_command(self, e):
        self.external_command = e
        self.fifo = e.open()
    
    
    def get_daemon_links(self, daemon_type):
        # the attribute name to get these differs for schedulers and arbiters
        return daemon_type + 's'
    
    
    # return a json containing a list of daemons enabled during the last conf
    def _load_last_run_alive_daemons(self):
        try:
            file_path = _PATH_ALIVE_DAEMONS
            if not os.path.exists(os.path.dirname(file_path)):
                os.makedirs(os.path.dirname(file_path))
            if os.path.isfile(file_path):
                self.last_run_alive_daemons = json.load(open(file_path, 'r'))
            return self.last_run_alive_daemons
        except Exception as e:
            logger.warning("[%s] Previous alive daemons retention cannot be load from file [%s]. \n%s" % (self.daemon_type, _PATH_ALIVE_DAEMONS, e))
    
    
    def _save_alive_daemons(self):
        if self.conf:
            all_daemons = self.conf.get_all_daemons()
            daemons_to_write = {}
            # transform object reference into string
            for type, daemons in all_daemons.iteritems():
                if type == 'arbiters' and self.me in daemons:
                    daemons.remove(self.me)
                daemons_to_write[type] = [d.give_satellite_cfg() for d in daemons]
            try:
                file_path = _PATH_ALIVE_DAEMONS
                if not os.path.exists(os.path.dirname(file_path)):
                    os.makedirs(os.path.dirname(file_path))
                json.dump(daemons_to_write, open(file_path, 'w'))
                logger.debug('[%s] save alives daemons in %s' % (self.daemon_type, file_path))
            except:
                logger.warning("[%s] Save http error stat in file [%s] fail : [%s]" % (self.daemon_type, _PATH_ALIVE_DAEMONS, traceback.format_exc()))
    
    
    @staticmethod
    def _finish_step(step_name, time_start):
        now = time.time()
        logger.info('[performance] Hosts - Checks - Clusters - Users compilation: step %-50s  [%.3fs]' % (step_name, now - time_start))
    
    
    def load_config_file(self):
        logger.info("Loading configuration")
        # REF: doc/shinken-conf-dispatching.png (1)
        buf = self.conf.read_config(self.config_files)
        raw_objects = self.conf.read_config_buf(buf)
        
        logger.debug("Opening local log file")
        
        if self.conf.bad_encoding_files:
            logger.error('[config] Some characters could not be read in utf-8 in these files :')
            for _file in self.conf.bad_encoding_files:
                logger.warning('[config] - %s' % _file)
        
        # First we need to get arbiters and modules
        # so we can ask them for objects
        self.conf.create_objects_for_type(raw_objects, 'arbiter')
        self.conf.create_objects_for_type(raw_objects, 'module')
        
        self.conf.early_arbiter_linking()
        
        # Search which Arbiterlink I am
        enabled_arbiters = self.conf.arbiters.enabled()
        if len(enabled_arbiters) == 1:
            self.me = enabled_arbiters[0]
            self.is_master = not self.me.spare
            # Set myself as alive ;)
            self.me.alive = True
        else:
            for arb in enabled_arbiters:
                if arb.is_me(self.arb_name):
                    self.me = arb
                    self.is_master = not self.me.spare
                    # Set myself as alive ;)
                    self.me.alive = True
            
            if not self.me:
                sys.exit("Error: I cannot find my own Arbiter object, I bail out. "
                         "To solve this, please make sure that the 'enabled' parameter is set to 1, "
                         "or change the 'host_name' parameter in "
                         "the object %s in the Arbiter configuration file "
                         "with the value '%s'. "
                         "Thanks." % (self.arb_name if self.arb_name else "Arbiter", socket.gethostname()))
        
        # Update the logger with our name
        logger.set_name(self.me.get_name())
        
        if self.is_master:
            logger.info("I am the master Arbiter: %s" % self.me.get_name())
        else:
            logger.info("I am a spare Arbiter: %s" % self.me.get_name())
        
        logger.info("My own modules: " + ','.join([m.get_name() for m in self.me.modules]))
        
        # So I can update our name in the ini file
        self.save_daemon_name_into_configuration_file(self.me.get_name())
        
        # Let ALL the configuration we are sending from where they are
        self.configuration_incarnation.set_author(self.me.get_name())
        logger.info('We are loading a new configuration %s that we will send to all other daemons' % self.configuration_incarnation)
        
        self.modules_dir = getattr(self.conf, 'modules_dir', '')
        
        before_modules = time.time()
        # Ok it's time to load the module manager now!
        self.load_modules_manager()
        # we request the instances without them being *started*
        # (for those that are concerned ("external" modules):
        # we will *start* these instances after we have been daemonized (if requested)
        self.modules_manager.set_modules(self.me.modules)
        
        logger.info('Loading modules for the arbiter')
        
        all_modules_are_started = self.do_load_modules()
        if not all_modules_are_started:
            logger.error('All modules are mandatory on the arbiter daemon. Please fix errors and restart the arbiter daemon.')
            sys.exit(2)
        
        logger.info('[performance] Modules initialization did took %.3fs' % (time.time() - before_modules))
        
        # Call modules that manage this read configuration pass
        self.hook_point('read_configuration')
        
        # Now we ask for configuration modules if they
        # got items for us
        for inst in self.modules_manager.get_all_alive_instances():
            if 'configuration' in inst.phases:
                try:
                    r = inst.get_objects()
                except Exception as exp:
                    logger.error("Instance %s raised an exception : %s. Arbiter can not continue : shut down" % (inst.get_name(), str(exp)))
                    self.do_stop()
                    sys.exit(2)
                
                types_creations = self.conf.types_creations
                for k in types_creations:
                    (cls, clss, prop) = types_creations[k]
                    if prop in r:
                        for x in r[prop]:
                            # test if raw_objects[k] are already set - if not, add empty array
                            if k not in raw_objects:
                                raw_objects[k] = []
                            # now append the object
                            raw_objects[k].append(x)
                        if len(r[prop]) != 0:
                            logger.debug("Added %-3d objects to %s from module %s" % (len(r[prop]), k, inst.get_name()))
        
        before = time.time()
        ### Resume standard operations ###
        self.conf.create_objects(raw_objects)
        self._finish_step('create_objects', before)
        
        before = time.time()
        # Maybe conf is already invalid
        if not self.conf.conf_is_correct:
            sys.exit("***> One or more problems was encountered while processing the config files...")
        
        # Change Nagios2 names to Nagios3 ones
        self.conf.old_properties_names_to_new()
        
        # Manage all post-conf modules
        self.hook_point('early_configuration')
        
        # Ok here maybe we should stop because we are in a pure migration run
        if self.migrate:
            print("Migration MODE. Early exiting from configuration relinking phase")
            return
        
        before = time.time()
        # Create Template links
        self.conf.linkify_templates()
        
        # All inheritances
        self.conf.apply_inheritance(parameter_only='realm')
        self._finish_step('compute inheritance', before)
        
        # We need to check realms configuration before computing shards
        # so we get errors when there is no or too many default realms
        self.conf.realms.check_default_realm()
        
        if self.conf.realms.conf_is_correct:
            # Configuration need to have a Manager that will get all shards
            self.conf.create_sub_worker_manager()
            
            before = time.time()
            all_realm_process = {}
            for realm in self.conf.realms:
                p = Process(target=self._compute_shard_into_subprocess, name="shard-computation-%s" % (realm.get_name()), args=(realm,))
                p.start()
                all_realm_process[realm.get_name()] = p
            
            for (realm_name, process) in all_realm_process.iteritems():
                logger.info('Waiting for realm: %s' % realm_name)
                process.join()
                if process.exitcode != 0:
                    logger.error('The realm %s has errors. Exiting' % realm_name)
                    sys.exit(2)
            
            # Grok shards sent from workers
            self.conf.get_shards_from_sub_worker_manager()
            
            # The shard managers is no more need
            self.conf.shutdown_sub_worker_manager()
            self._finish_step('compute shards into sub workers for each realm', before)
        
        before = time.time()
        # We still need to le the realms know each others
        self.conf.explode_common()
        self._finish_step('create final objects from definitions', before)
        
        before = time.time()
        # We still need to le the realms know each others
        self.conf.fill_default_common()
        self._finish_step('fill default values', before)
        
        before = time.time()
        # Remove disabled elements (in daemons)
        self.conf.remove_disabled()
        self._finish_step('remove no more useful objects', before)
        
        before = time.time()
        # Pythonize values
        self.conf.pythonize_common()
        self._finish_step('transform definitions into real objects', before)
        
        before = time.time()
        # Linkify objects to each other
        self.conf.linkify_common()
        self._finish_step('link elements together', before)
        
        # Configuration incarnation MUST be set BEFORE the whole configuration
        # serialization, so the spare will inherit from it
        self.link_configuration_instance_into_configuration_objects()
        
        before = time.time()
        # Explode global conf parameters into Classes
        self.conf.explode_global_conf()
        
        # set our own timezone and propagate it to other satellites
        self.conf.propagate_option('use_timezone')
        self.conf.propagate_option('language')
        self.set_tz(self.conf.use_timezone)
        self._finish_step('compute global parameters', before)
        
        # Manage all post-conf modules
        self.hook_point('late_configuration')
        
        before = time.time()
        # Correct conf?
        self.conf.is_correct_common()
        self._finish_step("Checking that the configuration is valid", before)
        
        # Maybe some elements where not wrong, so we must clean if possible
        self.conf.clean()
        
        # The conf can be incorrect here if the cut into parts see errors like
        # a realm with hosts and not schedulers for it
        if not self.conf.conf_is_correct:
            self.conf.show_errors()
            err = "Configuration is incorrect, sorry, I bail out"
            logger.error(err)
            sys.exit(err)
        
        logger.info('Things look okay - No serious problems were detected during the pre-flight check')
        
        # Clean objects of temporary/unnecessary attributes for live work:
        self.conf.clean()
        
        # Exit if we are just here for config checking
        if self.verify_only:
            sys.exit(0)
        
        # Manage all post-conf modules
        self.hook_point('configuration_done')
        
        # Some properties need to be "flatten" (put in strings)
        # before being send, like realms for hosts for example
        # BEWARE: after the cutting part, because we stringify some properties
        before = time.time()
        
        self.conf.create_whole_conf()
        self._finish_step("Serializing the global configuration", before)
        
        # sys.exit(0)
        
        # Ok, here we must check if we go on or not.
        # TODO: check OK or not
        self.log_level = self.conf.log_level
        self.use_local_log = self.conf.use_local_log
        self.local_log = self.conf.local_log
        self.pidfile = os.path.abspath(self.conf.lock_file)
        self.idontcareaboutsecurity = self.conf.idontcareaboutsecurity
        self.user = self.conf.shinken_user
        self.group = self.conf.shinken_group
        self.daemon_enabled = self.conf.daemon_enabled
        self.daemon_thread_pool_size = self.conf.daemon_thread_pool_size
        self.http_backend = getattr(self.conf, 'http_backend', 'auto')
        
        self.max_file_descriptor_limit = 1024  # this daemon need a lof of file descriptors
        
        # If the user sets a workdir, lets use it. If not, use the pidfile directory
        if self.conf.workdir == '':
            self.workdir = os.path.abspath(os.path.dirname(self.pidfile))
        else:
            self.workdir = self.conf.workdir
        
        #  We need to set self.host & self.port to be used by do_daemon_init_and_start
        self.host = self.me.address
        if hasattr(self.conf, 'bind_addr'):
            self.host = self.conf.bind_addr
        self.port = self.me.port
        self.conf.set_master_arbiter_uuid(self.server_uuid)
        
        logger.info("Configuration Loaded")
    
    
    def get_currently_managed_configuration(self):
        return self.configuration_incarnation.dump_as_json()
    
    
    # The instance_configuration must be set into the configuration, but also
    # in the satellites
    def link_configuration_instance_into_configuration_objects(self):
        configuration_incarnation = self.configuration_incarnation
        conf = self.conf
        conf.set_configuration_incarnation(configuration_incarnation)
        for lst in (conf.schedulers, conf.brokers, conf.pollers, conf.reactionners, conf.receivers, conf.arbiters):
            for satellite_link in lst:
                satellite_link.set_configuration_incarnation(configuration_incarnation)
    
    
    def _remove_hosts_not_in_realm(self, realm):
        realm_name = realm.get_name()
        # look to only keep hosts that are in our realm
        realm_is_default = getattr(realm, 'default', '0') == '1'
        to_del = []
        for host in self.conf.hosts:
            # Do not delete template
            if host.is_tpl():
                continue
            host_realm = getattr(host, 'realm', '')
            if host_realm == '' and realm_is_default:
                continue
            if host_realm == realm_name:
                continue
            to_del.append(host.id)
        for _id in to_del:
            del self.conf.hosts[_id]
    
    
    def _compute_shard_into_subprocess(self, realm):
        try:
            self._do_compute_shard_into_subprocess(realm)
        except SystemExit:  # do not hook system exit, follow them
            raise
        except:
            err = traceback.format_exc()
            logger.error('ERROR: the realm %s worker process did fail and exit: %s' % (realm.get_name(), err))
            sys.exit(2)
    
    
    def _do_compute_shard_into_subprocess(self, realm):
        start = time.time()
        realm_name = realm.get_name()
        
        # NOTE: the main daemon did disable the gc, but a fork() did already reenable it
        # Let this process ultra low level
        os.nice(20)
        set_process_name('shinken-arbiter - worker - compilation %s' % (realm_name))
        
        logger.set_name('Realm-%s' % realm_name)
        # __perf_format = ' ** [' + realm_name + '] %-30s: %.3f'
        
        before = time.time()
        self._remove_hosts_not_in_realm(realm)
        self._finish_step('clean hosts from other realms', before)
        
        # The configuration UI is not able currently to give check/service poller_tag as None when forcing unset
        # (not take ost one), and give "null" instead. so for this change poller_tag "null"  => "None"
        before = time.time()
        self.conf.fix_null_poller_tag_service_into_real_none()
        self._finish_step('fix service with null as poller tag', before)
        
        before = time.time()
        # All inheritances
        self.conf.apply_inheritance()
        self._finish_step('compute inheritance', before)
        
        before = time.time()
        # Explode between types
        # NOTE: a step in the check/service need to be done AFTER the implicit inheritance part
        self.conf.explode_realm_only()
        self._finish_step('create final objects from definitions', before)
        
        # Create Name reversed list for searching list
        self.conf.create_reversed_list()
        
        # Removes service exceptions based on host configuration
        before = time.time()
        count = self.conf.remove_exclusions()
        if count > 0:
            # We removed excluded services, and so we must recompute the search lists
            self.conf.create_reversed_list()
        self._finish_step('remove check exclusions', before)
        
        before = time.time()
        # Cleaning Twins objects
        self.conf.remove_twins()
        self._finish_step('remove duplicate objects', before)
        
        before = time.time()
        # Implicit inheritance for services
        self.conf.apply_implicit_inheritance()
        self._finish_step('compute inheritance between hosts and checks', before)
        
        before = time.time()
        # Implicit inheritance for services
        self.conf.post_inheritance_explode()
        self._finish_step('post inheritance objets creation from definitions', before)
        
        before = time.time()
        # Fill default values
        self.conf.fill_default_realm_only()
        self._finish_step('fill default values', before)
        
        # Remove templates from config
        before = time.time()
        self.conf.remove_templates()
        
        # Remove disabled elements (like daemons)
        self.conf.remove_disabled()
        
        # We removed templates, and so we must recompute the search lists
        self.conf.create_reversed_list()
        self._finish_step('remove no more useful objects', before)
        
        # Overrides specific service instances properties
        before = time.time()
        self.conf.override_properties()
        self._finish_step('compute service overrides', before)
        
        # Pythonize values
        before = time.time()
        self.conf.pythonize_realm_only()
        self._finish_step('transform definitions into real objects', before)
        
        # Linkify objects to each other
        before = time.time()
        self.conf.linkify_realm_only()
        self._finish_step('link elements together', before)
        
        # Change some default values (like poller/reactionner_tag into final ones)
        before = time.time()
        self.conf.change_default_values_into_finals()
        self._finish_step('change some default values into final ones', before)
        
        # Remove services without valide host
        before = time.time()
        self.conf.remove_orphan_services()
        self._finish_step('remove checks without hosts', before)
        
        # applying dependencies
        before = time.time()
        self.conf.apply_dependencies()
        self._finish_step('declare depedencies', before)
        
        # Explode global conf parameters into Classes
        before = time.time()
        self.conf.explode_global_conf()
        
        # set our own timezone and propagate it to other satellites
        self.conf.propagate_option('use_timezone')
        self.conf.propagate_option('language')
        self.set_tz(self.conf.use_timezone)
        self._finish_step('compute global parameters', before)
        
        before = time.time()
        # Update proxy elements from current elements
        self.conf.create_proxy_items()
        
        # Look for business rules, and create the dep tree
        self.conf.create_business_rules()
        # And link them
        # TODO: get back dep links
        # self.conf.create_business_rules_dependencies()
        self._finish_step('compute cluster trees', before)
        
        # Manage all post-conf modules
        self.hook_point('late_configuration')
        
        # Correct conf?
        before = time.time()
        self.conf.is_correct_realm_only()
        self._finish_step('checking the configuration is valid', before)
        
        # Maybe some elements where not wrong, so we must clean if possible
        self.conf.clean()
        
        before = time.time()
        # REF: doc/shinken-conf-dispatching.png (2)
        self.confs = self.conf.cut_into_parts(realm)
        self._finish_step('splitting elements into shards', before)
        
        are_realms_with_valid_schedulers = True
        for r in self.conf.realms:
            are_realms_with_valid_schedulers &= r.is_correct_schedulers_with_hosts_and_satellites()
        if not are_realms_with_valid_schedulers:
            self.conf.conf_is_correct = False
        
        # The conf can be incorrect here if the cut into parts see errors like
        # a realm with hosts and not schedulers for it
        if not self.conf.conf_is_correct:
            self.conf.show_errors()
            err = "Configuration is incorrect, sorry, I bail out"
            logger.error(err)
            sys.exit(2)
        
        logger.info('Things look okay - No serious problems were detected during the pre-flight check')
        
        # Clean objects of temporary/unnecessary attributes for live work:
        self.conf.clean()
        
        logger.info('REALM: %s Whole configuration time: %.2f' % (realm_name, time.time() - start))
        
        # Exit if we are just here for config checking
        if self.verify_only:
            sys.exit(0)
        
        # Set & get monitoring start time, for this realm
        # NOTE: if a host change realm, it will loose it's monitoring start time
        before = time.time()
        self.conf.set_monitoring_start_time_realm_only(realm)
        self._finish_step('set monitoring start time to new elements', before)
        
        before = time.time()
        self.conf.prepare_for_sending(realm)
        self._finish_step('serializing the realm shards', before)
    
    
    # Main loop function
    def main(self):
        try:
            # Log will be broks
            for line in self.get_header():
                logger.info(line)
            
            self.load_config_file()
            
            # Look if we are enabled or not. If ok, start the daemon mode
            self.look_for_early_exit()
            self.do_daemon_init_and_start()
            
            for _item_type, items in self.conf.ignored_items.iteritems():
                for _item_def in items:
                    logger.warning('[config] Cannot import %s defined in %s. Please use the synchronizer with a cfg-file source to import %ss into arbiter' % (_item_type, _item_def, _item_type))
            
            if self.conf.bad_encoding_files:
                logger.error('[config] Some characters could not be read in utf-8 in these files :')
                for _file in self.conf.bad_encoding_files:
                    logger.warning('[config] - %s' % _file)
            
            self.uri_arb = self.http_daemon.register(self.interface)
            self.uri_stats = self.http_daemon.register(self.istats)
            
            # ok we are now fully daemonized (if requested) now we can start our "external" modules (if any)
            self.modules_manager.start_external_instances()
            
            # Let the modules know that we are now fully daemonize
            self.hook_point('daemon_daemonized')
            
            # Ok now we can load the retention data
            self.hook_point('load_retention')
            
            # And go for the main loop
            self.do_mainloop()
        except SystemExit as exp:
            # With a 2.4 interpreter the sys.exit() in load_config_file
            # ends up here and must be handled.
            sys.exit(exp.code)
        except Exception as exp:
            logger.critical("The daemon did have an unrecoverable error. It must exit.")
            logger.critical("You can log a bug to your Shinken integrator with the error message:")
            logger.critical("%s" % (traceback.format_exc()))
            raise
    
    
    def _save_master_configuration_into_retention(self, received_configuration):
        # save the spare last conf with the current version as pickle
        data_to_write = {
            received_configuration['arbiter_trace']['version']: received_configuration['full_conf']
        }
        tmp_path_last_spare_conf = _PATH_LAST_SPARE_CONF + ".tmp"
        with open(tmp_path_last_spare_conf, "wb") as conf_retention_file:
            cPickle.dump(data_to_write, conf_retention_file, cPickle.HIGHEST_PROTOCOL)
        shutil.move(tmp_path_last_spare_conf, _PATH_LAST_SPARE_CONF)
    
    
    def setup_new_conf(self):
        """ Setup a new conf received from a Master arbiter. """
        conf = self.new_conf
        self.new_conf = None
        arbiter_master_name = conf['arbiter_trace']['name']
        arbiter_master_uri = conf['arbiter_trace']['uri']
        configuration_incarnation = conf.get('configuration_incarnation', None)
        if configuration_incarnation is None:
            logger.error('%s The configuration received from the arbiter "%s" at "%s" is invalid, missing configuration_incarnation. skipping it.' % (CHAPTER_CONFIGURATION, arbiter_master_name, arbiter_master_uri))
            return
        logger.info("%s New configuration received from an arbiter named [%s] from %s : %s" % (CHAPTER_CONFIGURATION, arbiter_master_name, arbiter_master_uri, configuration_incarnation))
        # load the real conf here, don't block the put_conf return fast to make the arbiter master fast
        if 'full_conf' in conf:
            # save the spare last conf with the current version as pickle
            self._save_master_configuration_into_retention(conf)
            
            # now we can unpickle the full_conf
            conf['full_conf'] = cPickle.loads(conf['full_conf'])
        
        self.cur_conf = conf['full_conf']
        self.conf = conf['full_conf']
        for arb in self.conf.arbiters:
            if arb.is_me(self.arb_name):
                self.me = arb
                arb.is_me = lambda x: True  # we now definitively know who we are, just keep it.
            else:
                arb.is_me = lambda x: False  # and we know who we are not, just keep it.
        
        self.load_new_configuration_incarnation_from_master(configuration_incarnation)
    
    
    # what ever the master is saying to us, we are swtiching to a idle way
    def arbiter_master__you_are_talking_to_me(self):  # IMPORTANT: read with an italian accent
        if self.must_run:
            self.must_run = False
            logger.info('-' * 200)
            logger.info('%s The arbiter master take over the configuration %s. Switching back to sleep move' % (CHAPTER_CONFIGURATION, self.configuration_incarnation))
            logger.info('-' * 200)
    
    
    def do_not_run(self):
        # If I'm the master, ignore the command
        if self.is_master:
            logger.error("Received message to not run. I am the Master, ignore and continue to run.")
            return
        
        # Else, I'm just a spare, so I listen to my master, if need, log it and go idle
        self.arbiter_master__you_are_talking_to_me()
        logger.debug("Received message to not run from arbiter master")
        self.last_master_speak = time.time()
    
    
    def arbiter_master_send_us_a_conf(self):
        # Arbiter master talk to us, maybe we just switch move
        self.arbiter_master__you_are_talking_to_me()
        logger.info('%s The arbiter master send us the configuration: %s' % (CHAPTER_CONFIGURATION, self.configuration_incarnation))
    
    
    def load_new_configuration_incarnation_from_retention(self, configuration_incarnation):
        self.configuration_incarnation = configuration_incarnation
        logger.info('%s The configuration incarnation is loaded from the retention. New incarnation: %s' % (CHAPTER_CONFIGURATION, configuration_incarnation))
    
    
    def load_new_configuration_incarnation_from_master(self, configuration_incarnation):
        self.configuration_incarnation = configuration_incarnation
        logger.info('%s The configuration incarnation is loaded from the arbiter master. New incarnation: %s' % (CHAPTER_CONFIGURATION, configuration_incarnation))
    
    
    def do_loop_turn(self):
        # If I am a spare, I wait for the master arbiter to send me true conf.
        if self.me.spare and not self._is_retention_load_already_try:
            self._is_retention_load_already_try = True  # Only load the retention once
            # try to load the last receive conf, check the version, if match use it instead of non sync and probably false cfg files that are in this conf
            self.conf = None
            self.dispatcher = None
            saved_conf = {}
            if os.path.isfile(_PATH_LAST_SPARE_CONF):
                try:
                    saved_conf = cPickle.load(open(_PATH_LAST_SPARE_CONF, 'rb'))
                except (EOFError, ValueError, IOError, IndexError, TypeError):
                    logger.error("Failed to load the last received configuration '%s']" % _PATH_LAST_SPARE_CONF)
                # we should have only one conf here
                if len(saved_conf) == 1:
                    for version, pickled_full_conf in saved_conf.iteritems():
                        current_version = self.interface.get_context()['current_version']
                        if version != current_version:
                            # this conf was writen for another version of the arbiter the objects 'maybe' don't have the same representation
                            logger.error("Failed to load the last received configuration '%s', the saved version [%s] doesn't match with my current version [%s]" % (_PATH_LAST_SPARE_CONF, version, current_version))
                        else:
                            loaded_conf = cPickle.loads(pickled_full_conf)  # type: Config
                            # Maybe the conf is too old, from a patch that fix arbiter->scheduler communication, so check this
                            # and if the schedulers are not up to date, skip this conf object
                            is_valid = loaded_conf.check_post_02_08_02_satellite_communication()
                            if is_valid:
                                self.conf = loaded_conf
                                self.cur_conf = self.conf
                                self.load_new_configuration_incarnation_from_retention(self.conf.get_configuration_incarnation())
                                logger.info('Arbiter conf loaded from %s, used it until receive a new one' % _PATH_LAST_SPARE_CONF)
                            else:
                                logger.warning('The arbiter retention file %s was too old, skipping it until we are receiving a new configuration from the master arbiter' % _PATH_LAST_SPARE_CONF)
        
        # Now the configuration load is done, can work and wait
        if self.me.spare:
            logger.debug("I wait for master")
            self.wait_for_master_death()
        
        if self.must_run:
            # Main loop
            self.run()
    
    
    # Called when the Arbiter is stopping
    def do_stop(self):
        super(Arbiter, self).do_stop()
        self._save_alive_daemons()
    
    
    # Get 'objects' from external modules
    # It can be used to get external commands for example
    def get_objects_from_from_queues(self):
        for f in self.modules_manager.get_external_from_queues():
            # print "Groking from module instance %s" % f
            while True:
                try:
                    o = f.get(block=False)
                    self.add(o)
                except Empty:
                    break
                # Maybe the queue had problems
                # log it and quit it
                except (IOError, EOFError) as exp:
                    logger.error("An external module queue got a problem '%s'" % str(exp))
                    break
    
    
    # We wait (block) for arbiter to send us something
    def wait_for_master_death(self):
        logger.info("Waiting for master death")
        self.must_run = False
        
        self.last_master_speak = time.time()
        
        # Look for the master timeout
        master_timeout = 300
        if self.conf:
            master_timeout = next((arb.check_interval * arb.max_check_attempts for arb in self.conf.arbiters if not arb.spare), master_timeout)
        logger.info("I'll wait master for %d seconds" % master_timeout)
        
        while not self.interrupted:
            self.sleep(1)
            
            if self.new_conf:
                self.setup_new_conf()
                if self.conf:
                    master_timeout = next((arb.check_interval * arb.max_check_attempts for arb in self.conf.arbiters if not arb.spare), master_timeout)
            
            # Now check if master is dead or not
            now = time.time()
            
            elapsed_time = now - self.last_master_speak
            
            # we do not want to log when all is OK
            # NOTE: do not use function pointer for more than just few lines!
            log_function = logger.debug
            if elapsed_time > 10:
                log_function = logger.warning
            log_function("Arbiter Master don't speak to me since %ds. I'll take the master role after %s seconds." % (elapsed_time, master_timeout))
            
            if elapsed_time > master_timeout:
                if not self.conf:
                    if self.deactivated_by_arbiter:
                        logger.info("Arbiter master deactivated me ; waiting for new master...")
                    else:
                        logger.error("Arbiter Master is dead. The arbiter %s should the lead but no conf was given and/or can be loaded from %s" % (self.me.get_name(), _PATH_LAST_SPARE_CONF))
                    self.must_run = False
                    break
                logger.info("-" * 200)
                logger.info("%s Arbiter Master is dead. The arbiter %s take the lead with the configuration %s" % (CHAPTER_CONFIGURATION, self.me.get_name(), self.configuration_incarnation))
                logger.info("-" * 200)
                
                self.must_run = True
                break
    
    
    # Take all external commands, make packs and send them to  the schedulers
    def push_external_commands_to_schedulers(self):
        # Now get all external commands and put them into the good schedulers
        for ext_cmd in self.external_commands:
            self.external_command.resolve_command(ext_cmd)
        
        # Now for all alive schedulers, send the commands
        for sched in self.conf.schedulers:
            cmds = sched.external_commands
            if len(cmds) > 0 and sched.alive:
                logger.debug("Sending %d commands to scheduler %s" % (len(cmds), sched.get_name()))
                sched.run_external_commands(cmds)
            # clean them
            sched.external_commands = []
    
    
    # We will log if there are time period activations change as NOTICE in logs.
    def check_and_log_tp_activation_change(self):
        for tp in self.conf.timeperiods:
            tp.check_and_log_activation_change()
    
    
    # Main function
    def run(self):
        # Before running, I must be sure who am I
        # The arbiters change, so we must re-discover the new self.me
        for arb in self.conf.arbiters:
            if arb.is_me(self.arb_name):
                self.me = arb
        # Update the logger with our name
        if self.me:
            logger.set_name(self.me.get_name())
        
        # We where waiting for check_interval to set arbiter trace
        self._generate_and_link_arbiter_trace()
        
        logger.set_human_format(on=self.conf.human_timestamp_log)
        logger.set_name(self.me.get_name())
        # load the last run alive daemons to be able to ask them to do nothing once the conf will be send
        self._load_last_run_alive_daemons()
        logger.info("Begin to dispatch configurations %s to satellites" % self.configuration_incarnation)
        self.dispatcher = Dispatcher(self.conf, self.me, self.trace, self.configuration_incarnation)
        
        # Fast check for deamons ALIVE/DEAD
        self.dispatcher.initial_daemons_check()
        # Now we can list all our daemons
        self.dispatcher.print_initial_listing()
        
        # Normal loop start
        self.dispatcher.check_alive()
        self.dispatcher.assert_inventories_dispatch()
        self.dispatcher.check_dispatch()
        self.dispatcher.check_bad_dispatch()
        # REF: doc/shinken-conf-dispatching.png (3)
        self.dispatcher.dispatch()
        # ask to the non anymore enabled daemons to stop working
        self.dispatcher.disable_previous_run_daemons(self.last_run_alive_daemons)
        
        # Now we can get all initial broks for our satellites
        self.get_initial_broks_from_satellitelinks()
        
        # Now create the external commander. It's just here to dispatch
        # the commands to schedulers
        # TODO: get back
        e = ExternalCommandManager(self.conf, 'dispatcher')
        e.load_arbiter(self)
        self.external_command = e
        
        logger.debug("Run baby, run...")
        
        while self.must_run and not self.interrupted:
            loop_start = time.time()
            self._increase_loop_number()
            loop_number = self._get_loop_number()
            logger.info('[ARBITER TIME  ] [ === Loop start === ] [ Loop number=%-5d ] ===-===-===-===-===-===-===-===-===-===-===-===-===' % loop_number)
            # Try to see if one of my module is dead, and
            # try to restart previously dead modules :)
            self.check_and_del_zombie_modules()
            
            # Call modules that manage a starting tick pass
            self.hook_point('tick')
            
            # Look for logging timeperiods activation change (active/inactive)
            self.check_and_log_tp_activation_change()
            
            # Now the dispatcher job
            self.dispatcher.check_alive()
            self.dispatcher.assert_inventories_dispatch()
            self.dispatcher.check_dispatch()
            # REF: doc/shinken-conf-dispatching.png (3)
            self.dispatcher.check_bad_dispatch()
            self.dispatcher.dispatch()
            
            other_taks_start = time.time()
            # Now get things from our module instances
            self.get_objects_from_from_queues()
            
            # Maybe our satellites links raise new broks. Must reap them
            self.get_broks_from_satellitelinks()
            
            # One broker is responsible for our broks,
            # we must give him our broks
            self.push_broks_to_broker()
            
            self.get_external_commands_from_satellites()
            
            if self.nb_broks_send != 0:
                logger.debug("Nb Broks send: %d" % self.nb_broks_send)
            self.nb_broks_send = 0
            
            self.push_external_commands_to_schedulers()
            
            # It's sent, do not keep them
            # TODO: check if really sent. Queue by scheduler?
            self.external_commands = []
            
            logger_perf.info('Time to do send broks to brokers and push shinken internal commands (like recheck, set acknowledge, etc) to schedulers [ %.3f ]s' % (time.time() - other_taks_start))
            
            diff_time = time.time() - loop_start
            logger.info('[ARBITER TIME  ] [ === Loop stop  === ] [ Loop number=%-5d ] [PERF] [ %.3f ]s' % (loop_number, diff_time))
            
            # Protect it against time shifting from system
            sleep_time = 1 - diff_time
            self.sleep(sleep_time)
    
    
    def get_daemons(self, daemon_type):
        """ Returns the daemons list defined in our conf for the given type """
        # shouldn't the 'daemon_types' (whatever it is above) be always present?
        return getattr(self.conf, daemon_type + 's', None)
    
    
    # Helper functions for retention modules. So we give our broks and external commands
    def get_retention_data(self):
        return {}
    
    
    # Get back our data from a retention module
    def restore_retention_data(self, data):
        return
