#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2022:
#     Gabes Jean, naparuba@gmail.com
#     Gerhard Lausser, Gerhard.Lausser@consol.de
#     Gregory Starck, g.starck@gmail.com
#     Hartmut Goebel, h.goebel@goebel-consult.de
#     Martin Benjamin, b.martin@shinken-solutions.com
#
# This file is part of Shinken.
#
# Shinken is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Shinken is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with Shinken.  If not, see <http://www.gnu.org/licenses/>.

import base64
import copy
import logging
import pickle
import threading
import time
import zlib

from shinken.compat import SHINKEN_PICKLE_PROTOCOL
from shinken.daemon import Daemon, Interface
from shinken.http_client import HTTPClient, HTTPExceptions
from shinken.inter_daemon_message import InterDaemonMessage
from shinken.inter_daemon_messages_manager_for_daemon import InterDaemonMessagesManagerForDaemon
from shinken.log import LoggerFactory
from shinken.misc.type_hint import TYPE_CHECKING

if TYPE_CHECKING:
    from shinken.log import Log, PartLogger
    from shinken.daemons.brokerdaemon import Broker
    from shinken.misc.type_hint import Optional, List, Dict, Union
    from shinken.objects.module import Module

logger = LoggerFactory.get_logger()
logger_configuration = logger.get_sub_part('CONFIGURATION')
logger_configuration_initial = logger_configuration.get_sub_part('INITIAL')
logger_configuration_initial_from_arbiter = logger_configuration_initial.get_sub_part('ARBITER')
logger_configuration_initial_from_retention = logger_configuration_initial.get_sub_part('RETENTION')
logger_configuration_update = logger_configuration.get_sub_part('UPDATE')
logger_connection = logger.get_sub_part('CONNECTION')

RECENT_CONFIGURATION_CHANGE_THRESHOLD = 120  # under 120s we consider a configuration change 'recent'


# Interface for Arbiter, our big MASTER It gives us our conf
class IForArbiter(Interface):
    app = None  # type: Union[BaseSatellite, Broker]
    doc = 'Remove a scheduler connection (internal)'
    
    
    # Arbiter ask us to stop managing a scheduler_id anymore, I do it and don't ask why
    def remove_from_conf(self, shard_id, reason):
        try:
            shard_id = int(shard_id)
        except ValueError:
            logger_configuration_update.error('Arbiter asks to remove a shard %s but the format is invalid' % shard_id)
            return
        self.app.reset_configuration_change()  # arbiter change our configuration, we note this
        self.app.delete_shard_id(shard_id, reason)
        return
    
    
    remove_from_conf.doc = doc
    
    doc = 'DISABLED'
    
    
    # Old call, disabled
    def what_i_managed(self):
        return None
    
    
    what_i_managed.need_lock = False
    what_i_managed.doc = doc
    
    doc = 'Return the managed configuration ids (internal)'
    
    
    # Give to arbiter which configuration we are managing (with configuration incarnation, and schedulers)
    def get_currently_managed_configuration(self):
        manage_configuration = self.app.get_currently_managed_configuration()
        logger.debug('The arbiter asked me what I manage. It\'s %s' % manage_configuration)
        return manage_configuration
    
    
    get_currently_managed_configuration.need_lock = False
    get_currently_managed_configuration.doc = doc
    
    doc = 'Ask the daemon to drop its configuration and wait for a new one'
    
    
    # Call by arbiter if it thinks we are running, but we must not (like
    # if I was a spare that take a conf but the master returns, I must die
    # and wait a new conf)
    # Us: No please...
    # Arbiter: I don't care, hasta la vista baby!
    # Us: ... <- Nothing! We are dead! you don't get it or what??
    # Reading code is not a job for eyes only...
    def wait_new_conf(self):
        logger_configuration.info('Arbiter wants me to wait for a new configuration')
        super(IForArbiter, self).wait_new_conf()
        self.app.schedulers.clear()
    
    
    wait_new_conf.doc = doc
    
    doc = 'Push broks objects to the daemon (internal)'
    
    
    # NB: following methods are only used by broker
    # Used by the Arbiter to push broks to broker
    def push_broks(self, broks):
        with self.app.arbiter_broks_lock:
            self.app.arbiter_broks.extend(broks.values())
    
    
    push_broks.method = 'POST'
    # We are using a Lock just for NOT lock this call from the arbiter :)
    push_broks.need_lock = False
    push_broks.doc = doc
    push_broks.display_name = 'Arbiter Architecture information push to Brokers'
    
    doc = 'Get the shinken internal commands (like recheck, set acknowledge, etc) from the daemon (internal)'
    
    
    # The arbiter ask us our external commands in queue
    # Same than push_broks, we will not use Global lock here,
    # and only lock for external_commands
    def get_external_commands(self):
        with self.app.external_commands_lock:
            cmds = self.app.get_external_commands()
            raw = pickle.dumps(cmds, SHINKEN_PICKLE_PROTOCOL)
            raw = base64.b64encode(zlib.compress(raw))
        return raw
    
    
    get_external_commands.need_lock = False
    get_external_commands.doc = doc
    
    doc = 'Does the daemon got configuration (receiver)'
    
    
    # NB: only useful for receiver
    def got_conf(self):
        return self.app.cur_conf is not None
    
    
    got_conf.need_lock = False
    got_conf.doc = doc
    
    
    def push_messages(self, messages):
        # type: (List[InterDaemonMessage]) -> None
        self.app.inter_daemon_messages_manager.collect_new_messages_from_arbiter(messages)
    
    
    push_messages.doc = 'Messages received from Arbiter (internal)'
    push_messages.method = 'POST'
    push_messages.need_lock = False
    
    
    def get_messages(self):
        # type: () -> bytes
        ret = self.app.inter_daemon_messages_manager.get_message_to_send_at_arbiter()
        get_messages_buffer = pickle.dumps(ret, SHINKEN_PICKLE_PROTOCOL)
        get_messages_buffer = zlib.compress(get_messages_buffer)
        get_messages_buffer = base64.b64encode(get_messages_buffer)
        return get_messages_buffer
    
    
    get_messages.doc = 'Messages to send to Arbiter (internal)'
    get_messages.need_lock = False


class BaseSatellite(Daemon):
    """Super class for Broker, scheduler, reactionner, poller"""
    
    # Should we look at passive property for external connection
    is_using_passive_connection_information = False
    
    
    def __init__(self, name, config_file, is_daemon, do_replace, debug, debug_file, daemon_id=0):
        super(BaseSatellite, self).__init__(name, config_file, is_daemon, do_replace, debug, debug_file, daemon_id)
        # Ours schedulers
        self.schedulers = {}
        # Our arbiters
        self.arbiters = {}
        
        # Our pollers and reactionners, maybe void for some daemons
        self.pollers = {}
        self.reactionners = {}
        self.receivers = {}
        # Now we create the interfaces
        self._add_http_interface(IForArbiter(self))
        
        # Can have a queue of external_commands given by modules will be taken by arbiter to process
        self.external_commands = []
        self.external_commands_lock = threading.RLock()
        
        self.activated = True
        self.spare = False
        # We will have a tread by distant satellites, so we must protect our access
        self.satellite_lock = threading.Condition(threading.RLock())
        self.satellite_threads = {}
        
        # Keep broks so they can be eaten by a broker
        self.broks = {}
        
        # Last configuration change
        self._last_configuration_change = 0  # epoch of the last change from the arbiter
        
        self.inter_daemon_messages_manager = InterDaemonMessagesManagerForDaemon(daemon_name=self.name, daemon_type=self.daemon_type, handle_messages_received_from_arbiter=self.handle_messages_received_from_arbiter)
    
    
    def do_loop_turn(self):
        raise NotImplementedError
    
    
    def load_modules_manager(self, *, keep_modules_info: bool = False) -> None:
        super(BaseSatellite, self).load_modules_manager(keep_modules_info=keep_modules_info)
        self.inter_daemon_messages_manager.update_on_new_configuration(self.name, self.modules_manager)
        self.inter_daemon_messages_manager.start_thread()
    
    
    def update_daemon_name(self, daemon_name: str) -> None:
        self.name = daemon_name
        self.inter_daemon_messages_manager.update_on_new_configuration(self.name)
    
    
    def send_message_to_arbiter(self, inter_daemon_message):
        # type: (InterDaemonMessage) -> None
        self.inter_daemon_messages_manager.send_message_to_arbiter(inter_daemon_message)
    
    
    def handle_messages_received_from_arbiter(self, inter_daemon_message):
        # type: (InterDaemonMessage) -> None
        if inter_daemon_message.message_type == 'ping':
            self.send_message_to_arbiter(InterDaemonMessage('pong', message_to=inter_daemon_message.message_from, message_from=inter_daemon_message.message_to, data={}))
    
    
    def reset_configuration_change(self):
        self._last_configuration_change = int(time.time())
    
    
    def is_configuration_change_recent(self):
        now = int(time.time())
        elapsed_time = abs(now - self._last_configuration_change)  # note: abs= manage time get back
        return elapsed_time <= 120
    
    
    def is_activated(self):
        return self.activated
    
    
    # Someone asks us our broks. We send them, and clean the queue
    def get_broks(self):
        res = copy.copy(self.broks)
        self.broks.clear()
        return res
    
    
    # The arbiter can resend us new conf in the pyro_daemon port.
    # We do not want to lose time about it, so it's not a blocking  wait, timeout = 0s
    # If it sends us a new conf, we re init the connections of all schedulers
    def watch_for_new_conf(self, timeout):
        self.sleep(timeout)
    
    
    # by default, do nothing
    def clean_previous_run(self):
        return
    
    
    def _print_new_update_conf_received(self):
        # type: () -> PartLogger
        configuration_is_from_retention = self.new_conf and self.new_conf.get('arbiter_trace', {}).get('from_retention', False)
        
        if self.cur_conf is None and (self.daemon_type != 'scheduler' or getattr(self, 'conf', None) is None):  # First configuration
            _logger = logger_configuration_initial
            if configuration_is_from_retention:
                _logger_incarnation = logger_configuration_initial_from_retention
                _logger_incarnation.info(f'----- Loading the new configuration from retention')
            else:
                _logger_incarnation = logger_configuration_initial_from_arbiter
                _logger_incarnation.info(f'----- Loading the new configuration from the arbiter')
        else:  # configuration update
            _logger = logger_configuration_update
            _logger_incarnation = _logger
            _logger_incarnation.info(f'----- Loading a configuration update from the arbiter')
        
        self.print_configuration_incarnation_log_entry_if_need(_logger_incarnation)
        return _logger
    
    
    def _set_is_activated(self, activated, _logger):
        # type: (bool, PartLogger) -> bool
        was_activated = self.activated
        self.activated = activated
        if self.activated != was_activated:
            if self.activated:
                _logger.info('Switching from sleeping to active mode')
            else:
                _logger.info('Switching from active to sleeping mode')
        
        if activated and self.inter_daemon_messages_manager:
            self.inter_daemon_messages_manager.start_thread()
        
        return was_activated
    
    
    def _set_spare(self, spare, _logger):
        # type: (bool, PartLogger) -> bool
        was_spare = self.spare
        self.spare = spare
        if self.spare != was_spare:
            if self.spare:
                _logger.info('Will be used as a SPARE daemon')
            else:
                _logger.info('Will be used as a MASTER daemon')
        return was_spare
    
    
    # When arbiter deactivated us because we are a spare we need to clean some data.
    def _go_as_not_active(self, was_activated: 'bool', _logger: 'PartLogger|Log', modules_configuration: 'list[Module]|None') -> None:
        # I'm not activated AKA spare, make some clean and log that
        self.clean_previous_run()
        self.arbiters.clear()
        self._clean_known_daemons()
        
        if was_activated:
            _logger.info('Stopping modules as we are going in sleep mode')
        if self.inter_daemon_messages_manager:
            self.inter_daemon_messages_manager.stop()
        self.modules_manager.go_to_idle_mode(modules_configuration)
    
    
    def do_stop(self):
        if self.inter_daemon_messages_manager:
            self.inter_daemon_messages_manager.stop()
        super(BaseSatellite, self).do_stop()
    
    
    # When arbiter deactivated us we need to clear known daemons because shinken-healthcheck talk to info.
    def _clean_known_daemons(self):
        for daemon_list in ('receivers', 'pollers', 'reactionners', 'schedulers'):
            if hasattr(self, daemon_list):
                getattr(self, daemon_list).clear()
    
    
    @staticmethod
    def __print_added_daemon_line(daemon_type, daemon_name, daemon_id, uri, _logger):
        # type: (str, str, Optional[str], str, PartLogger) -> None
        
        shard_string = '[shard_id=%4d]' % daemon_id if daemon_id is not None else '               '
        _logger.info('  + ADDED     %-13s : [name=%-20s] %s [uri=%s]' % (daemon_type, daemon_name, shard_string, uri))
    
    
    @staticmethod
    def __print_removed_daemon_line(daemon_type, daemon_name, daemon_id, uri, _logger):
        # type: (str, str, Optional[str], str, PartLogger) -> None
        
        shard_string = '[shard_id=%4d]' % daemon_id if daemon_id is not None else '               '
        _logger.info('  - REMOVED   %-13s : [name=%-20s] %s [uri=%s]' % (daemon_type, daemon_name, shard_string, uri))
    
    
    def _print_new_and_deleted_daemons(
            self,
            new_schedulers: list = None,
            deleted_schedulers: list = None,
            new_arbiters: list = None,
            new_pollers: list = None,
            deleted_pollers: list = None,
            new_reactionners: list = None,
            deleted_reactionners: list = None,
            _logger: 'PartLogger' = None,
            loaded_from_retention: bool = False
    ) -> None:
        # Print new daemons, in a bloc
        
        if deleted_reactionners is None:
            deleted_reactionners = []
        if new_reactionners is None:
            new_reactionners = []
        if deleted_pollers is None:
            deleted_pollers = []
        if new_pollers is None:
            new_pollers = []
        if new_arbiters is None:
            new_arbiters = []
        if deleted_schedulers is None:
            deleted_schedulers = []
        if new_schedulers is None:
            new_schedulers = []
        
        if deleted_schedulers or deleted_pollers or deleted_reactionners:
            if loaded_from_retention:
                _logger.info('Retention configuration involve to remove daemons:')
            else:
                _logger.info('The arbiter asked us to remove daemons:')
            for (daemon_name, daemon_id, uri) in deleted_schedulers:
                self.__print_removed_daemon_line('scheduler', daemon_name, daemon_id, uri, _logger)
            for (daemon_name, uri) in deleted_pollers:
                self.__print_removed_daemon_line('poller', daemon_name, None, uri, _logger)
            for (daemon_name, uri) in deleted_reactionners:
                self.__print_removed_daemon_line('reactionner', daemon_name, None, uri, _logger)
        
        if new_schedulers or new_arbiters or new_pollers or new_reactionners:
            if loaded_from_retention:
                _logger.info('Retention configuration involves new daemons:')
            else:
                _logger.info('Arbiter configuration involves new daemons:')
            for (daemon_name, daemon_id, uri) in new_schedulers:
                self.__print_added_daemon_line('scheduler', daemon_name, daemon_id, uri, _logger)
            for (daemon_name, uri) in new_arbiters:
                self.__print_added_daemon_line('arbiter', daemon_name, None, uri, _logger)
            for (daemon_name, uri) in new_pollers:
                self.__print_added_daemon_line('poller', daemon_name, None, uri, _logger)
            for (daemon_name, uri) in new_reactionners:
                self.__print_added_daemon_line('reactionner', daemon_name, None, uri, _logger)
    
    
    def delete_shard_id(self, shard_id, reason):
        scheduler = self.schedulers.get(shard_id, None)
        if scheduler is None:
            logger_configuration_update.info('Arbiter asked me to remove the shard %s I do not have it in my keys: %s' % (shard_id, list(self.schedulers.keys())))
            return
        scheduler_name = scheduler['name']
        uri = scheduler['uri']
        reason_string = '' if not reason else ' because %s' % reason
        logger_configuration_update.info('----- Loading a configuration update from the arbiter')
        logger_configuration_update.info('The arbiter ask us to remove daemons %s:' % reason_string)
        self.__print_removed_daemon_line('scheduler', scheduler_name, shard_id, uri, logger_configuration_update)
        
        with self.satellite_lock:
            del self.schedulers[shard_id]
            if shard_id in self.cur_conf['schedulers']:
                del self.cur_conf['schedulers'][shard_id]
            self.hook_point('configuration_retention_save', self.cur_conf)
    
    
    @staticmethod
    def _find_previous_daemon_by_uri(into, uri):
        for (_id, daemon_entry) in into.items():
            if daemon_entry['uri'] == uri:
                return _id
        return None
    
    
    @staticmethod
    def _get_daemon_uri(daemon):
        proto = 'https' if daemon['use_ssl'] else 'http'
        uri = '%s://%s:%s/' % (proto, daemon['address'], daemon['port'])
        return uri
    
    
    @staticmethod
    def _update_daemon_addr_port_with_satellitemap(daemon, satellite_map, _logger):
        # type: (Dict, Dict, Optional[PartLogger]) -> None
        
        daemon_name = daemon['name']
        map_entry = satellite_map.get(daemon_name, None)
        if map_entry:
            # MAP ENTRY will looks like   {'port': 7768, 'address': '51.15.255.102'}
            old_address = daemon.get('address', '')
            old_port = daemon.get('port', '')
            new_address = map_entry.get('address', '')
            new_port = map_entry.get('port', '')
            if _logger:
                _logger.get_sub_part('SATELLITE_MAP').info('Replacing the daemon %-15s to address:port from %s:%s => %s:%s as defined in our daemon .cfg file (satellitemap property)' % (daemon_name, old_address, old_port, new_address, new_port))
            daemon.update(map_entry)
    
    
    def _set_default_values_to_scheduler_entry(self, entry):
        raise NotImplementedError()
    
    
    def _set_daemon_id_of_scheduler(self, daemon, daemon_id):
        raise NotImplementedError()
    
    
    # Simple function to stack an entry to new/delete schedulers/pollers/reactionners/etc
    @staticmethod
    def __stack_into_daemons_list(daemon_name, daemon_id, daemon_uri, lst, do_stack_id):
        daemon_id_entry = daemon_id if do_stack_id else None
        lst.append((daemon_name, daemon_id_entry, daemon_uri))
    
    
    def _set_or_update_scheduler_from_configuration(self, new_daemon, new_daemon_id, global_conf, new_schedulers, deleted_schedulers, _logger, do_stack_daemon_id=True):
        daemon_name = new_daemon['name']
        
        # replacing poller address and port by those defined in satellitemap
        # IMPORTANT: do this BEFORE get uri
        self._update_daemon_addr_port_with_satellitemap(new_daemon, global_conf['satellitemap'], _logger)
        uri = self._get_daemon_uri(new_daemon)
        new_daemon['uri'] = uri
        
        # There are 5 cases:
        # * 1=> a new daemon with new id  => just create it
        # * 2=> a new daemon with old id  (one replace another) => delete old, create new entry
        # * 3=> old daemon with new id  (daemon change) => no log DELETE but just move the entry
        # * 4=> another daemon on old id  (daemon change, and replace another) => log the DELETE, move the entry
        # * 5=> same daemon on same id  => touch nothing
        
        previous_id = self._find_previous_daemon_by_uri(self.schedulers, uri)
        
        # But maybe we need to keep them from the past (if same uri => aka same daemon)
        if previous_id is not None:  # Daemon was already exiting (case 3->5)
            
            if previous_id == new_daemon_id:  # CASE 5: same daemon on same id
                _logger.debug('The daemon %s did not change id (%s) skipping it' % (daemon_name, new_daemon_id))
                return
            
            # => CASE 3 or 4 (daemon change)
            previous_daemon = self.schedulers[previous_id]  # must exist as previous_id is not None
            
            _logger.info('The daemon %s did switch id %s -> %s' % (daemon_name, previous_id, new_daemon_id))
            # We log it is deleted from this old place
            self.__stack_into_daemons_list(previous_daemon['name'], previous_id, previous_daemon['uri'], deleted_schedulers, do_stack_daemon_id)
            
            # Maybe it can overwrite another daemon, if so, show it
            new_place_daemon_that_will_be_overlap = self.schedulers.get(new_daemon_id, None)
            if new_place_daemon_that_will_be_overlap is not None:  # CASE 4
                self.__stack_into_daemons_list(new_place_daemon_that_will_be_overlap['name'], new_daemon_id, new_place_daemon_that_will_be_overlap['uri'], deleted_schedulers, do_stack_daemon_id)
            else:  # CASE 3: old daemon with new id  (daemon change)
                pass
            # In all cases, we are logging this new daemon +ADDED (to have its new number)
            self.__stack_into_daemons_list(daemon_name, new_daemon_id, uri, new_schedulers, do_stack_daemon_id)
            
            # The scheduler is no more at this entry
            del self.schedulers[previous_id]
            # Let the old object know it did change
            self._set_daemon_id_of_scheduler(previous_daemon, new_daemon_id)
            
            self.schedulers[new_daemon_id] = previous_daemon  # do not lose the old object, just moving it
        else:  # New daemon, CASE 1 or 2
            _logger.debug('The daemon %s is a new one (will be set at %s)' % (daemon_name, new_daemon_id))
            # Create the new structure based on what the arbiter did send us
            self._set_default_values_to_scheduler_entry(new_daemon)
            self._set_daemon_id_of_scheduler(new_daemon, new_daemon_id)
            
            # Now check if we didn't overlap an old scheduler in this place
            deleted_scheduler = self.schedulers.get(new_daemon_id, None)
            if deleted_scheduler is not None:  # CASE 2: we are replacing this one
                self.__stack_into_daemons_list(deleted_scheduler['name'], new_daemon_id, deleted_scheduler['uri'], deleted_schedulers, do_stack_daemon_id)
            else:  # CASE 1: new one
                pass
            # In both cases, we are logging this new daemon
            self.__stack_into_daemons_list(daemon_name, new_daemon_id, uri, new_schedulers, do_stack_daemon_id)
            # Now save the object
            self.schedulers[new_daemon_id] = new_daemon
    
    
    # Give the arbiter the data about what I manage as shards/schedulers to know if we should
    # be updated or not. We will return the shards ids or them
    # Note: maybe the arbiter did just send us a new_conf, but we did not consume it, so
    # if we have one, we should look in it instead of the running conf that will be changed at
    # the end of our turn
    def get_currently_managed_configuration(self):
        configuration_incarnation_dump = {}
        if self.configuration_incarnation is not None:  # at start, we are void
            configuration_incarnation_dump = self.configuration_incarnation.dump_as_json()
        r = {'configuration_incarnation_dump': configuration_incarnation_dump, 'schedulers': {}, 'activated': self.activated}
        
        # Maybe we did receive a new configuration but did not consume it
        with self.satellite_lock:
            if self.new_conf:  # got a new conf, use it instead of the running one
                for (shard_id, scheduler) in self.new_conf['schedulers'].items():
                    r['schedulers'][shard_id] = scheduler['name']
                return r
        
        # no new conf, and no conf at all? we are just void
        if not self.already_have_conf:
            return None
        
        # ok look in the running configuration
        for (shard_id, scheduler) in self.schedulers.items():
            # Maybe it's a synchronizer for try check that is here, if so, skip it
            if scheduler.get('unmanaged_by_arbiter', False):
                continue
            r['schedulers'][shard_id] = scheduler['name']
        
        logger.debug('The arbiter ask us what we are managing, it is %s ' % r)
        
        return r
    
    
    # Call by arbiter to get our external commands
    def get_external_commands(self):
        res = self.external_commands
        self.external_commands = []
        return res
    
    
    def get_satellite_connections(self):
        res = []
        for (daemon_type, daemon_list) in [('receiver', getattr(self, 'receivers', None)), ('poller', getattr(self, 'pollers', None)), ('reactionner', getattr(self, 'reactionners', None)), ('scheduler', getattr(self, 'schedulers', None))]:
            if not daemon_list:
                continue
            
            for daemon_conf in daemon_list.values():
                # scheduler can have directly scheduler object here, skip it
                if not isinstance(daemon_conf, dict):
                    continue
                # if synchronizer dummy entry, skip it
                if daemon_conf.get('unmanaged_by_arbiter', False):
                    continue
                # skip passive pollers on schedulers
                if self.is_using_passive_connection_information and 'passive' in daemon_conf and not daemon_conf['passive']:
                    continue
                
                proto = 'https' if daemon_conf['use_ssl'] else 'http'
                con_info = {
                    'name'   : daemon_conf['name'],
                    'type'   : daemon_type,
                    'address': daemon_conf['address'],
                    'proto'  : proto,
                    'uri'    : daemon_conf['uri'],
                    'port'   : daemon_conf['port']
                }
                if 'passive' in daemon_conf:
                    con_info['passive'] = daemon_conf['passive']
                if 'timeout' in daemon_conf:
                    con_info['timeout'] = daemon_conf['timeout']
                res.append(con_info)
        return res
    
    
    # Get the good tabs for links by the kind. If unknown, return None
    def get_link_from_type(self, daemon_type, daemon_id):
        t = {
            'scheduler': self.schedulers,
            'arbiter'  : self.arbiters,
        }
        with self.satellite_lock:
            return t.get(daemon_type, {}).get(daemon_id, None)
    
    
    def get_any_link_from_type(self, daemon_type):
        t = {
            'scheduler': self.schedulers,
            'arbiter'  : self.arbiters,
        }
        with self.satellite_lock:
            return len(t.get(daemon_type, {})) != 0
    
    
    # Check if we do not connect too often to this
    def is_connection_try_too_close(self, elt):
        now = time.time()
        last_connection = elt['last_connection']
        if now - last_connection < 5:
            return True
        return False
    
    
    # initialize or re-initialize connection with scheduler
    def pynag_con_init(self, sat_entry):
        daemon_type = sat_entry['type']
        
        if daemon_type == 'scheduler':
            # If scheduler is not active, I do not try to init
            # it is just useless
            is_active = sat_entry['active']
            if not is_active:
                return
        
        # If we try to connect too much, we slow down our tests
        if self.is_connection_try_too_close(sat_entry):
            return
        
        # Ok, we can now update it
        sat_entry['last_connection'] = time.time()
        
        # DBG: print 'Running id before connection', daemon_incarnation
        uri = sat_entry['uri']
        timeout = sat_entry.get('timeout', 3)
        data_timeout = sat_entry.get('data_timeout', 120)
        name = sat_entry['name']
        if sat_entry['con']:
            try:
                sat_entry['con'].con.close()
            except:
                pass
            sat_entry['con'] = None
        try:
            sat_entry['con'] = HTTPClient(uri=uri, strong_ssl=sat_entry['hard_ssl_name_check'], timeout=timeout, data_timeout=data_timeout)
        except HTTPExceptions as exp:
            # But the multiprocessing module is not compatible with it!
            # so we must disable it immediately after
            logger_connection.info('Connection problem to the %s %s (uri="%s"): %s' % (daemon_type, name, uri, str(exp)))
            sat_entry['con'] = None
            return
        
        before_ = time.time()
        did_connect = self.ping_and_check_distant_daemon(sat_entry)
        elapsed = time.time() - before_
        
        if did_connect:
            logger_connection.info('Connection OK to the %s %s in %.3fs (uri="%s", ping_timeout=%ss, data_timeout=%ss)' % (daemon_type, name, elapsed, uri, timeout, data_timeout))
    
    
    # For a new distant daemon, if it is a scheduler, ask for a new full broks generation
    def _manage_new_distant_daemon_incarnation(self, entry, old_incarnation, new_incarnation):
        raise NotImplemented('_manage_new_distant_daemon_incarnation')
    
    
    def ping_and_check_distant_daemon(self, sat_entry):
        con = sat_entry['con']  # type: HTTPClient
        daemon_type = sat_entry['type']
        if con is None:
            self.pynag_con_init(sat_entry)
            con = sat_entry['con']
            if con is None:
                return False
        try:
            # initial ping must be quick
            con.get('ping')
            new_incarnation = con.get('get_daemon_incarnation')
            has_new_incarnation = False
            # protect daemon_incarnation from modification
            with self.satellite_lock:
                # data transfer can be longer
                daemon_incarnation = sat_entry['daemon_incarnation']
                # logger.debug('type[%s] daemon_incarnation old[%s]/new[%s]' % (daemon_type, daemon_incarnation, new_incarnation))
                
                if new_incarnation != daemon_incarnation:
                    # Only log schedulers currently, that's the only one that matter currently here
                    if daemon_type == 'scheduler':
                        if new_incarnation == {}:
                            logger_configuration.info(f'''{sat_entry['name']} has not yet received any configuration from Arbiter.''')
                        elif daemon_incarnation:
                            logger_configuration.info(f'''{sat_entry['name']} has changed its configuration from {new_incarnation['configuration_incarnation_uuid']} to {daemon_incarnation['configuration_incarnation_uuid']}''')
                        else:  # it was without any configuration
                            logger_configuration.info(f'''{sat_entry['name']} has received a new configuration (uuid={new_incarnation['configuration_incarnation_uuid']})''')
                    has_new_incarnation = True
                # Ok all is done, we can save this new incarnation
                sat_entry['daemon_incarnation'] = new_incarnation
            if has_new_incarnation:
                self._manage_new_distant_daemon_incarnation(sat_entry, daemon_incarnation, new_incarnation)
            return True
        except HTTPExceptions as exp:
            logger.error('Connection problem to the %s %s: %s' % (daemon_type, sat_entry['name'], str(exp)))
            try:
                con.con.close()
            except:
                pass
            sat_entry['con'] = None
            return False
    
    
    # Daemon get something from distant, should be implemented!
    def get_jobs_from_distant(self, e):
        raise NotImplementedError('get_jobs_from_distant')
    
    
    # By default, we are connecting to everyone
    # only the scheduler will skip some pollers/reactionners because it needs to
    # connect only to passive one
    def should_connect_to_distant_satellite(self, satellite_type, distant_link):
        return True
    
    
    def _do_satellite_thread(self, s_type, s_id, t_name):
        try:
            logger.debug('SATELLITE THREAD: Starting thread to exchange with %s [%s]' % (s_type, t_name))
            self.do_satellite_thread(s_type, s_id, t_name)
            logger.debug('SATELLITE THREAD: Exiting thread to exchange with %s [%s]' % (s_type, t_name))
        except Exception as e:  # we need ALL the exceptions here
            logger.warning(f'The thread for the {s_type} {t_name} encountered error ({str(e)}), it will be restarted. Your monitoring is still working. Please fill a bug with your log file.')
            logger.print_stack(level=logging.WARNING)
    
    
    def do_satellite_thread(self, s_type, s_id, t_name):
        with self.satellite_lock:
            distant_link = self.get_link_from_type(s_type, s_id)
            if distant_link is None:  # already down?
                return
        
        uri = distant_link['uri']
        while not self.interrupted:
            with self.satellite_lock:
                # first look if we are still need or not
                distant_link = self.get_link_from_type(s_type, s_id)
            
            if distant_link is None or (uri != distant_link['uri']):  # no more present, or uri did change? EXIT!!!
                logger.info('SATELLITE THREAD: This connection thread to %s with id %s (%s) is no more needed.' % (s_type, s_id, t_name))
                return
            
            # If the Arbiter did deactivate us, we can stop all our running threads, they will be restarted
            # when we will have a new configuration
            if self.deactivated_by_arbiter:
                logger.info(f'SATELLITE THREAD: The connection thread to the {s_type} with the id {s_id} ({t_name}) is no longer needed as this daemon is now idle.')
                return
            
            # For some distant satellite, we have a thread but currently the distant daemon is not
            # interesting (not passive, etc.). we need to have a thread, so it's already running, and ready as soon as
            # the configuration will change
            if self.should_connect_to_distant_satellite(s_type, distant_link):
                # we have an entry, so we can ping it
                if self.ping_and_check_distant_daemon(distant_link):
                    # so now if it changes, we can know that we must drop this thread
                    # logger.debug("[Broks] SATELLITE THREAD: get broks from daemon:[%s] id:[%s] push_flavor:[%s] name:[%s]" % (s_type, s_id, e['daemon_incarnation'], t_name))
                    self.get_jobs_from_distant(distant_link)
            
            time.sleep(distant_link.get('loop_delay', 1))
    
    
    # We will look for satellites, and if we don't have a thread or a dead one, start a new one
    def assert_valid_satellite_threads(self):
        with self.satellite_lock:
            in_conf_satellites = set()
            types = {'scheduler': self.schedulers, 'poller': self.pollers, 'reactionner': self.reactionners, 'receiver': self.receivers}
            for (satellite_type, satellite_definition) in types.items():
                for (satellite_id, satellite_entry) in satellite_definition.items():
                    self._assert_one_satellite_thread(satellite_type, satellite_id, satellite_entry)
                    in_conf_satellites.add(satellite_entry['name'])
            
            # Cleanup of disabled thread entries
            launched_satellites = {daemon_name for daemon_name in self.satellite_threads.keys()}
            rogue_satellites = launched_satellites - in_conf_satellites
            for satellite_name in rogue_satellites:
                thread = self.satellite_threads[satellite_name]  # type: threading.Thread
                if not thread.is_alive():
                    thread.join()
                    del self.satellite_threads[satellite_name]
    
    
    def _assert_one_satellite_thread(self, satellite_type, satellite_id, satellite_entry):
        satellite_name = satellite_entry['name']
        thread = self.satellite_threads.get(satellite_name, None)
        restart = False
        
        if thread is None:
            restart = True
        elif not thread.is_alive():
            thread.join(1)
            restart = True
        
        if restart:
            thread_name = 'SAT:%.3s:%s' % (satellite_type, ('%s%s' % (satellite_name[:5], satellite_name[-2:])) if (len(satellite_name) > 7 and '0' <= satellite_name[-1] <= '9') else satellite_name)
            thread = threading.Thread(None, target=self._do_satellite_thread, name=thread_name, args=(satellite_type, satellite_id, thread_name))
            thread.daemon = True
            thread.start()
            self.satellite_threads[satellite_name] = thread
