#!/usr/bin/python
# -*- coding: utf-8 -*-

# Copyright (C) 2009-2012:
#    Gabes Jean, naparuba@gmail.com
#    Gerhard Lausser, Gerhard.Lausser@consol.de
#    Gregory Starck, g.starck@gmail.com
#    Hartmut Goebel, h.goebel@goebel-consult.de
#
# This file is part of Shinken.
#
# Shinken is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Shinken is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with Shinken.  If not, see <http://www.gnu.org/licenses/>.

import base64
import cPickle
import os
import threading
import time
import traceback
import zlib
from multiprocessing import active_children

from shinken.daemon import Interface, IStatsInterface
from shinken.external_command import ExternalCommandManager
from shinken.http_client import HTTPClient, HTTPExceptions
from shinken.log import logger, get_chapter_string, get_section_string, LoggerFactory
from shinken.misc.type_hint import TYPE_CHECKING
from shinken.property import PathProp, IntegerProp
from shinken.runtime_stats.cpu_stats import cpu_stats_helper
from shinken.runtime_stats.threads_dumper import WatchDogThreadDumper
from shinken.withinventorysatellite import WithInventorySatellite, IArbiterToInventorySatellite

if TYPE_CHECKING:
    from shinken.log import PartLogger
    from shinken.misc.type_hint import Any, Dict, List, NoReturn

CHAPTER_CONNECTION = get_chapter_string(u'CONNECTION')
_BUS_COMMANDS_STR = get_section_string(u'BUS COMMANDS')

logger_raw = LoggerFactory.get_logger()
logger_host_mapping = logger_raw.get_sub_part(u'HOSTS TO SCHEDULER MAPPING')


class IStatsReceiver(IStatsInterface):
    
    def get_raw_stats(self, param=u''):
        # type: (unicode) -> Dict[unicode, Any]
        return super(IStatsReceiver, self).get_raw_stats(param=param)
    
    
    get_raw_stats.doc = u'get stats of the daemon'
    get_raw_stats.need_lock = False
    
    
    def _daemon_get_raw_stats(self, param=u''):
        # type: (unicode) -> Dict[unicode, Any]
        app = self.app
        raw_stats = {
            u'command_buffer_size': len(app.external_commands),
            u'module_stats'       : self._get_module_stats(getattr(app, u'modules_manager', None), param),
            u'http_errors_count'  : app.http_errors_count,
            u'have_conf'          : app.cur_conf is not None,
            u'activated'          : app.activated,
            u'spare'              : app.spare,
        }
        return raw_stats


class IBroks(Interface):
    """Interface for Brokers
    They connect here and get all broks (data for brokers)
    data must be ORDERED! (initial status BEFORE update...)

    """
    
    doc = 'Get broks from the daemon'
    
    
    # poller or reactionner ask us actions
    def get_broks(self, bname):
        res = self.app.get_broks()
        return base64.b64encode(zlib.compress(cPickle.dumps(res), 2))
    
    
    get_broks.doc = doc


class IReceiverHostMapping(Interface):
    """Interface arbiter so we receive a host mapping for direct routing mode
    """
    
    
    # Use by the receivers to got the host names managed by the schedulers
    def push_host_names(self, configuration_incarnation_uuid, shard_id, hnames):
        # type: (unicode, int, List[unicode]) -> NoReturn
        self.app.push_host_names(configuration_incarnation_uuid, shard_id, hnames)
    
    
    push_host_names.need_lock = False
    push_host_names.method = 'POST'
    push_host_names.doc = 'internal'
    push_host_names.display_name = u'Hosts/schedulers mapping send from an Arbiter server'
    
    
    # Used by the arbiter to know if the receiver know about a mapping
    # configuration_uuid+shard_id => host names
    def have_host_names_mapping(self, configuration_incarnation_uuid, shard_id):
        # type: (unicode, int) -> bool
        shard_id = int(shard_id)  # beware about http losing types
        return self.app.have_host_names_mapping(configuration_incarnation_uuid, shard_id)
    
    
    have_host_names_mapping.doc = 'internal'
    have_host_names_mapping.need_lock = False


# Our main APP class
class Receiver(WithInventorySatellite):
    properties = WithInventorySatellite.properties.copy()
    properties.update({
        'pidfile'  : PathProp(default='receiverd.pid'),
        'port'     : IntegerProp(default='7773'),
        'local_log': PathProp(default='receiverd.log'),
    })
    
    # Labels for the logging
    command_log_labels = {
        'PROCESS_HOST_CHECK_RESULT'   : 'Host result',
        'PROCESS_SERVICE_CHECK_RESULT': 'Check result',
        'ACKNOWLEDGE_HOST_PROBLEM'    : 'Host acknowledge',
        'ACKNOWLEDGE_SVC_PROBLEM'     : 'Check acknowledge',
        'REMOVE_HOST_ACKNOWLEDGEMENT' : 'Host acknowledge deleteion',
        'REMOVE_SVC_ACKNOWLEDGEMENT'  : 'Check acknowledge deletion',
        'SCHEDULE_FORCED_HOST_CHECK'  : 'Schedule immediate host',
        'SCHEDULE_FORCED_SVC_CHECK'   : 'Schedule immediate check',
        'SCHEDULE_HOST_DOWNTIME'      : 'Create host downtime',
        'SCHEDULE_SVC_DOWNTIME'       : 'Create check downtime',
        'DEL_ALL_SVC_DOWNTIMES'       : 'Delete check downtimes',
        'DEL_ALL_HOST_DOWNTIMES'      : 'Delete host downtimes',
    }
    
    
    def __init__(self, config_file, is_daemon, do_replace, debug, debug_file, daemon_id=0):
        super(Receiver, self).__init__('receiver', config_file, is_daemon, do_replace, debug, debug_file, daemon_id)
        
        # Our arbiters
        self.arbiters = {}
        
        # Our pollers and reactionners
        self.pollers = {}
        self.reactionners = {}
        
        # Modules are load one time
        self.have_modules = False
        
        # Can have a queue of external_commands give by modules
        # will be taken by arbiter to process
        self.external_commands = []
        self.command_status_output = {}
        # and the unprocessed one, a buffer
        self.unprocessed_external_commands = []
        
        # All broks to manage
        self.broks = {}
        
        self.host_assoc = {}
        self.host_assoc_lock = threading.RLock()
        self._host_name_mappings = {}  # conf_uuid => shard_id => [hostnames]
        self._host_name_mappings_lock = threading.RLock()
        
        self.direct_routing = False
        
        # Declare  HTTP methods interfaces
        self._add_http_interface(IStatsReceiver(self))
        self._add_http_interface(IBroks(self))
        self._add_http_interface(IReceiverHostMapping(self))
        self._add_http_interface(IArbiterToInventorySatellite(self))
        
        self.external_command = None
        self.mainloop_watchdog = WatchDogThreadDumper(u'Main loop', wait_time=60 * 30, dump_interval=60 * 5, fatal_dead_lock_delay=60 * 30, multi_usage=True)
    
    
    # Give us objects we need to manage. Only 2 are done:
    # Brok -> self.broks
    # External commands -> self.external_commands
    def add(self, elt):
        cls_type = elt.__class__.my_type
        if cls_type == 'brok':
            # We do not generate broks any more
            return
        elif cls_type == 'externalcommand':
            self.unprocessed_external_commands.append(elt)
    
    
    def push_host_names(self, configuration_incarnation_uuid, shard_id, hnames):
        logger_host_mapping.info('HOSTNAME<->SCHEDULER-SHARD mapping: scheduler with shard_id=%s is filled with %d hostnames' % (shard_id, len(hnames)))
        # First clean all but the
        with self._host_name_mappings_lock:
            # We have a new conf, update it
            if configuration_incarnation_uuid not in self._host_name_mappings:
                self._host_name_mappings.clear()
                self._host_name_mappings[configuration_incarnation_uuid] = {}
            logger_host_mapping.debug('push_host_names:: update shard %s => %s => %s' % (configuration_incarnation_uuid, shard_id, len(hnames)))
            self._host_name_mappings[configuration_incarnation_uuid][shard_id] = hnames
        # We can go outside the lock, adn recompute the whole thing
        self._regenerate_host_assoc(configuration_incarnation_uuid)
    
    
    def _regenerate_host_assoc(self, configuration_incarnation_uuid):
        before = time.time()
        # Generate but outside the runtime lock, so we can fast switch it
        new_host_assoc = {}
        nb_hosts = 0
        # Take the first lock
        with self._host_name_mappings_lock:
            current_shards = self._host_name_mappings.get(configuration_incarnation_uuid, {})
            nb_shards = len(current_shards)
            for shard_id, host_names in current_shards.iteritems():
                nb_hosts += len(host_names)
                logger_host_mapping.debug(u' - Updating host mapping for shard_id=%s => nb_hosts=%s' % (shard_id, len(host_names)))
                for hname in host_names:
                    new_host_assoc[hname] = shard_id
        # Then the second, for a fast switch
        with self.host_assoc_lock:
            self.host_assoc = new_host_assoc
        logger_host_mapping.debug(u'The host mapping was regenerated for %d shards, and a total of %d hosts [%.3f]s' % (nb_shards, nb_hosts, time.time() - before))
    
    
    def have_host_names_mapping(self, configuration_incarnation_uuid, shard_id):
        with self._host_name_mappings_lock:
            have_it = shard_id in self._host_name_mappings.get(configuration_incarnation_uuid, {})
            logger_host_mapping.debug(u'Arbiter is asking us if we have the host mapping configuration_incarnation=%s / shard_id=%s => %s' % (configuration_incarnation_uuid, shard_id, have_it))
            return have_it
    
    
    def get_sched_from_hname(self, hname):
        with self.host_assoc_lock:
            shard_id = self.host_assoc.get(hname, None)
        scheduler_entry = self.schedulers.get(shard_id, None)
        scheduler_name = scheduler_entry['name'] if scheduler_entry is not None else '(not found)'
        logger_host_mapping.debug(u'Looking for host "%s" => shard_id=%s => scheduler => %s' % (hname, shard_id, scheduler_name))
        return scheduler_entry
    
    
    # Get 'objects' from external modules
    # from now nobody use it, but it can be useful
    # for a module like livestatus to raise external
    # commands for example
    def get_objects_from_from_queues(self):
        for queue in self.modules_manager.get_external_from_queues():
            while not queue.empty():
                o = queue.get(block=False)
                self.add(o)
    
    
    def do_stop(self):
        self.mainloop_watchdog.quit()
        act = active_children()
        for a in act:
            a.terminate()
            a.join(1)
        super(Receiver, self).do_stop()
    
    
    def setup_new_conf(self):
        with self.satellite_lock:
            self.really_setup_new_conf()
    
    
    # Initialize or re-initialize connection with scheduler
    def _connect_to_scheduler(self, schedulers_id):
        scheduler = self.schedulers[schedulers_id]
        
        # If sched is not active, I do not try to init it is just useless
        if not scheduler['active']:
            return
        
        sname = scheduler['name']
        uri = scheduler['uri']
        timeout = scheduler.get('timeout', 3)
        data_timeout = scheduler.get('data_timeout', 120)
        daemon_incarnation = scheduler['daemon_incarnation']
        
        start_time = time.time()
        try:
            scheduler_connection = scheduler['con'] = HTTPClient(uri=uri, strong_ssl=scheduler['hard_ssl_name_check'], timeout=timeout, data_timeout=data_timeout)
        except HTTPExceptions, exp:
            logger.info('%s Connection problem to the %s %s (uri="%s"): %s' % (CHAPTER_CONNECTION, 'scheduler', sname, uri, str(exp)))
            scheduler['con'] = None
            scheduler['con_info'] = str(exp)
            return
        
        # timeout of 120 s and get the running id
        try:
            new_daemon_incarnation = scheduler_connection.get('get_daemon_incarnation')
        except (HTTPExceptions, cPickle.PicklingError, KeyError), exp:
            logger.warning('[executor][%s] Scheduler %s is not initialized or has network problem: %s' % (self.name, sname, str(exp)))
            scheduler['con'] = None
            scheduler['con_info'] = str(exp)
            return
        
        elapsed = time.time() - start_time
        scheduler['con_latency'] = elapsed
        
        # The schedulers have been restarted: it has a new run_id.
        # So we clear all verifs, they are obsolete now.
        if scheduler['daemon_incarnation'] != {} and new_daemon_incarnation != daemon_incarnation:
            logger.info('[executor][%s] The running id of the scheduler %s changed, we must clear its actions' % (self.name, sname))
            with scheduler['wait_homerun_lock']:
                scheduler['wait_homerun'].clear()
        scheduler['daemon_incarnation'] = new_daemon_incarnation
        logger.info('%s Connection OK to the %s %s in %.3fs (uri="%s", ping_timeout=%ss, transfert_timeout=%ss)' % (CHAPTER_CONNECTION, 'scheduler', sname, elapsed, uri, timeout, data_timeout))
    
    
    def _set_direct_routing(self, direct_routing, _logger):
        # type: (bool, PartLogger) -> NoReturn
        was_direct_routing = self.direct_routing
        self.direct_routing = direct_routing
        if self.direct_routing != was_direct_routing:
            if self.direct_routing:
                _logger.info(u'Switching from not direct routing to direct routing: ON')
            else:
                _logger.info(u'Switching from direct routing to direct routing: OFF')
        return
    
    
    # Set properties we want to set in our new schedulers
    def _set_default_values_to_scheduler_entry(self, entry):
        
        # IMPORTANT: mut be LOCAL, so each scheduler have their own {} and []
        default_scheduler_properties = {'wait_homerun'      : {}, 'wait_homerun_lock': threading.RLock(),
                                        'actions'           : {}, 'external_commands': [],
                                        'con'               : None,
                                        'type'              : 'scheduler',
                                        'daemon_incarnation': {},
                                        }
        
        entry.update(default_scheduler_properties)
    
    
    def _set_daemon_id_of_scheduler(self, daemon, daemon_id):
        daemon['instance_id'] = daemon_id
    
    
    def really_setup_new_conf(self):
        _logger = self._print_new_update_conf_received()
        
        # If the configuration was giving us a new configuration incarnation, show it
        self.print_configuration_incarnation_log_entry_if_need(_logger)
        
        conf = self.new_conf
        self.new_conf = None
        self.cur_conf = conf
        # Got our name from the globals
        if 'receiver_name' in conf['global']:
            name = conf['global']['receiver_name']
        else:
            name = 'Unnamed receiver'
        self.name = name
        logger.load_obj(self, name)
        self.save_daemon_name_into_configuration_file(name)
        global_conf = conf['global']
        
        self._set_direct_routing(global_conf['direct_routing'], _logger)
        self._set_spare(global_conf.get('spare', False), _logger)
        was_activated = self._set_is_activated(conf.get('activated', True), _logger)
        
        # The arbiter let us know about the realms that are allowed to talk to us
        # it let us know also if a realm that was present before did disapear and so need to be deleted
        self.known_realms = conf['known_realms']
        
        # Should we enable/disable human log format
        logger.set_human_format(on=global_conf.get('human_timestamp_log', True))
        
        if not self.activated:
            self.modules_manager.stop_all()
            self.have_modules = False
            self._clean_known_daemons()
            self.modules = ()
            self.have_configuration = True  # need be override in children init
            return
        _logger.debug(u'[receiver][configuration] Configuration received')
        
        new_schedulers = []  # for logging
        deleted_schedulers = []
        
        # If we've got something in the schedulers, we do not want it anymore
        for (daemon_id, daemon) in conf['schedulers'].iteritems():
            self._set_or_update_scheduler_from_configuration(daemon, daemon_id, global_conf, new_schedulers, deleted_schedulers, _logger)
            continue
        
        self._print_new_and_deleted_daemons(new_schedulers=new_schedulers, deleted_schedulers=deleted_schedulers, _logger=_logger)
        
        self.modules = conf['global']['modules']
        _logger.debug(u'Receiving modules:[%s] i already load modules:[%s]' % (','.join([m.get_name() for m in self.modules]), self.have_modules))
        
        if not self.have_modules:
            # Ok now start, or restart them!
            # Set modules, init them and start external ones
            self.modules_manager.set_modules(self.modules)
            self.do_load_modules()
            self.modules_manager.start_external_instances()
            self.modules_manager.start_worker_based_instances()
            self.have_modules = True
        else:  # just update the one we need
            self.modules_manager.update_modules(self.modules)
        
        # Set our giving timezone from arbiter
        self.set_tz(conf['global']['use_timezone'])
        
        # Now create the external commander. It's just here to dispatch the commands to schedulers
        e = ExternalCommandManager(None, 'receiver')
        e.load_receiver(self)
        self.external_command = e
        self.have_configuration = True  # need be override in children init
    
    
    # Take all external commands, make packs and send them to
    # the schedulers
    def push_external_commands_to_schedulers(self):
        with self.satellite_lock:
            self.really_push_external_commands_to_schedulers()
    
    
    @staticmethod
    def _get_command_type_from_full_external_command(extcommand_string):
        part1 = extcommand_string.split(';', 1)[0]  # [0] => always valid, give something like '[1600953489] PROCESS_HOST_CHECK_RESULT'
        elts = part1.split(' ', 1)
        if len(elts) != 2:  # malformed? skip it
            return ''
        return elts[1].strip()  # => PROCESS_HOST_CHECK_RESULT
    
    
    def _log_scheduler_sent_summary(self, scheduler_name, cmds):
        # type: (str, List[str]) -> NoReturn
        nb_commands = len(cmds)
        # Now log a summary about what was sent
        commands_types_stats = {}
        for cmd in cmds:
            cmd_type = self._get_command_type_from_full_external_command(cmd)
            cmd_label = self.command_log_labels.get(cmd_type, 'Other command')
            commands_types_stats[cmd_label] = commands_types_stats.get(cmd_label, 0) + 1
        logger.info('%s %s %-4d commands are sent to this scheduler :' % (_BUS_COMMANDS_STR, get_section_string(scheduler_name), nb_commands))
        labels = commands_types_stats.keys()
        labels.sort()
        for label in labels:
            nb_commands = commands_types_stats[label]
            logger.info('%s  - %-30s: %-4d commands' % (_BUS_COMMANDS_STR, label, nb_commands))
    
    
    def really_push_external_commands_to_schedulers(self):
        # If we are not in a direct routing mode, just bailout after
        # faking resolving the commands
        if not self.direct_routing:
            self.external_commands.extend(self.unprocessed_external_commands)
            self.unprocessed_external_commands = []
            return
        
        # Now get all external commands and put them into the
        # good schedulers
        for ext_cmd in self.unprocessed_external_commands:
            self.external_command.resolve_command(ext_cmd)
            self.external_commands.append(ext_cmd)
        
        # And clean the previous one
        self.unprocessed_external_commands = []
        
        # Now for all alive schedulers, send the commands
        for sched_id in self.schedulers:
            if sched_id not in self.command_status_output:
                self.command_status_output[sched_id] = {}
            if 'ready' not in self.command_status_output[sched_id]:
                self.command_status_output[sched_id]['ready'] = True
            if 'command_stored' not in self.command_status_output[sched_id]:
                self.command_status_output[sched_id]['command_stored'] = 0
            
            sched = self.schedulers[sched_id]
            sched_name = sched['name']
            extcmds = sched['external_commands']
            cmds = [extcmd.cmd_line for extcmd in extcmds]
            con = sched.get('con', None)
            sent = False
            if not con:
                logger.info('The scheduler %s is not connected' % sched_name)
                self._connect_to_scheduler(sched_id)
                con = sched.get('con', None)
            # If there are commands and the scheduler is alive
            nb_commands = len(cmds)
            if nb_commands > 0 and con:
                logger.debug('Trying to send %d commands to scheduler %s' % (nb_commands, sched_name))
                try:
                    run_external_commands_ack = con.post('run_external_commands', {'cmds': cmds, 'receiver_name': self.name})
                    if run_external_commands_ack == 'scheduler is not ready':
                        if self.command_status_output[sched_id]['ready']:
                            logger.info('%s The scheduler %s is not ready. The scheduler is waiting for its configuration.' % (_BUS_COMMANDS_STR, sched_name))
                            self.command_status_output[sched_id]['ready'] = False
                        if not self.command_status_output[sched_id]['ready'] and nb_commands > self.command_status_output[sched_id]['command_stored']:
                            self.command_status_output[sched_id]['command_stored'] = nb_commands
                            logger.info('%s %s shinken external commands (like recheck, set acknowledge, etc) received are stored in memory. Scheduler not ready.' % (_BUS_COMMANDS_STR, nb_commands))
                        return
                    self.command_status_output[sched_id]['command_stored'] = 0
                    self.command_status_output[sched_id]['ready'] = True
                    sent = True
                    
                    # For each scheduler we want a clean summary about what was sent this turn
                    self._log_scheduler_sent_summary(sched_name, cmds)
                
                # Not connected or scheduler is gone
                except (HTTPExceptions, KeyError), exp:
                    if getattr(exp, 'errno', None) == 503:
                        logger.info('The scheduler %s is not ready.' % sched_name)
                        return
                    else:
                        logger.error('manage_returns exception:: %s,%s ' % (type(exp), str(exp)))
                        self._connect_to_scheduler(sched_id)
                        return
                except AttributeError, exp:  # the scheduler must  not be initialized
                    logger.debug('manage_returns exception:: %s,%s ' % (type(exp), str(exp)))
                    return
                except Exception, exp:
                    logger.error('A satellite raised an unknown exception: %s (%s)' % (exp, type(exp)))
                    raise
            
            # If we sent or not the commands, just clean the scheduler list.
            self.schedulers[sched_id]['external_commands'] = []
            
            # If we sent them, remove the commands of this scheduler of the arbiter list
            if sent:
                # and remove them from the list for the arbiter (if not, we will send it twice)
                for extcmd in extcmds:
                    try:
                        self.external_commands.remove(extcmd)
                    except ValueError:
                        pass
    
    
    def do_loop_turn(self):
        with self.mainloop_watchdog:
            self._do_loop_turn()
    
    
    def _do_loop_turn(self):
        start_snap = cpu_stats_helper.get_thread_cpu_snapshot()
        loop_start = time.time()
        
        # Begin to clean modules
        self.check_and_del_zombie_modules()
        
        # Now we check if arbiter speak to us in the pyro_daemon.
        # If so, we listen for it
        # When it push us conf, we reinit connections
        self.watch_for_new_conf(0.0)
        if self.new_conf:
            self.setup_new_conf()
        
        # Maybe external modules raised 'objects'
        # we should get them
        self.get_objects_from_from_queues()
        
        self.push_external_commands_to_schedulers()
        
        # We need to be sure that our inventory do not keep
        # deleted realms
        self._clean_old_realms_in_inventory()
        
        # If an inventory did change, warn the modules about it
        # so they can update their own inventory about it
        self.assert_module_inventory_are_updated()
        
        if int(time.time()) % 60 == 0:
            for (realm_name, inventory) in self._realms_inventory.items():
                logger.info('The realm %s inventory have currently %s elements' % (realm_name, inventory.get_len()))
        
        logger.debug('[PERFS] %s' % start_snap.get_diff())
        
        diff_time = time.time() - loop_start
        # Protect it against time shifting from system
        sleep_time = 1 - diff_time
        self.sleep(sleep_time)
    
    
    #  Main function, will loop forever
    def main(self):
        try:
            self.load_config_file()
            # Look if we are enabled or not. If ok, start the daemon mode
            self.look_for_early_exit()
            
            for line in self.get_header():
                logger.info(line)
            self.daily_log_version()
            
            logger.info('[Receiver] Using working directory: %s' % os.path.abspath(self.workdir))
            
            self.do_daemon_init_and_start()
            
            self.load_modules_manager()
            
            self._register_http_interfaces()
            
            #  We wait for initial conf
            self.wait_for_initial_conf()
            if not self.new_conf:
                return
            
            self.setup_new_conf()
            
            # Now the main loop
            self.do_mainloop()
        
        except Exception:
            logger.critical('The daemon did have an unrecoverable error. It must exit.')
            logger.critical('You can log a bug to your Shinken integrator with the error message:')
            logger.critical('%s' % (traceback.format_exc()))
            raise
