#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright (C) 2009-2012:
#     Gabes Jean, naparuba@gmail.com
#     Gerhard Lausser, Gerhard.Lausser@consol.de
#     Gregory Starck, g.starck@gmail.com
#     Hartmut Goebel, h.goebel@goebel-consult.de
#
# This file is part of Shinken.
#
# Shinken is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Shinken is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with Shinken.  If not, see <http://www.gnu.org/licenses/>.

import imp
import inspect
import os
import sys
import time
import traceback
from threading import RLock, Thread

from shinken.basemodule import BaseModule, ModuleState
from shinken.log import LoggerFactory

logger = LoggerFactory.get_logger('modules-manager')


class INSTANCE_INIT_STATE(object):
    OK = 'OK'
    FAIL = 'FAIL'
    WAIT = 'WAIT'


# We need to manage pre-2.0 module types with _ into the new 2.0 - mode
def uniform_module_type(s):
    return s.replace('_', '-')


MAX_MODULE_INIT_TRY_INTERVAL = 60


class ModulesManager(object):
    """This class is use to manage modules and call callback"""
    
    class StateCleaner(Thread):
        """Used to cleanup the modules state (self.last_restarts)"""
        
        CLEAN_STEP = 60  # every 60 seconds
        
        
        def __init__(self, manager):
            Thread.__init__(self)
            self.setDaemon(True)  # Will be killed at application close
            self.__manager = manager
        
        
        def run(self):
            while True:
                time.sleep(ModulesManager.StateCleaner.CLEAN_STEP)
                self.__manager.clean_modules_states()
    
    def __init__(self, modules_type, modules_path, modules, daemon_display_name='UNSET'):
        self._modules = None
        self.modules_path = modules_path
        self.modules_type = modules_type
        self.modules = modules
        self.allowed_types = [uniform_module_type(plug.module_type) for plug in modules if hasattr(plug, 'module_type')]
        self.imported_modules = []
        self.modules_assoc = {}
        self.instances = {}
        self.to_restart = {}
        self.last_restarts = {}
        self.last_restarts_lock = RLock()
        self.last_restarts_keep = 86400  # lasts 24h (86400 seconds) of restarts
        self.max_queue_size = 0
        self.manager_factory = None
        self.errors = []
        self.daemon_display_name = daemon_display_name
        ModulesManager.StateCleaner(self).start()
    
    
    @property
    def modules(self):
        return self._modules
    
    
    @modules.setter
    def modules(self, modules):
        self._modules = modules
        self._set_modules_ordering_indexes(modules)
    
    
    def _set_modules_ordering_indexes(self, modules):
        self._idx_modules_order_by_name = {}
        for index, module in enumerate(modules):
            self._idx_modules_order_by_name[module.module_name] = index
    
    
    def load_manager_factory(self, factory):
        self.manager_factory = factory
    
    
    # Set the modules requested for this manager
    def set_modules(self, modules):
        self.modules = modules
        self.errors = []
        self.allowed_types = [uniform_module_type(mod.module_type) for mod in modules if hasattr(mod, 'module_type')]
    
    
    # We get some new modules, and we must look which one are:
    # * no more there and should be stopped
    # * the new ones, we need to start
    # * the one that did changed (look at hash property) that we need to restart
    def update_modules(self, modules):
        changed_to_restart = []
        self.errors = []
        
        # First try to look which one are no more need
        new_modules = set([m.get_name() for m in modules])
        previous_ones = set([m.get_name() for m in self.modules])
        
        old_to_delete = previous_ones - new_modules
        new_to_start = new_modules - previous_ones
        
        # In the common ones, look at the one that did changed, and so we will have to delete
        common_ones = previous_ones & new_modules
        
        for inst_name in common_ones:
            old_one = None
            for m in self.modules:
                if m.get_name() == inst_name:
                    old_one = m
                    break
            new_one = None
            for m in modules:
                if m.get_name() == inst_name:
                    new_one = m
                    break
            if old_one is None or new_one is None:
                logger.error('Cannot find previous or new module definition for the module name %s, so cannot update it.' % inst_name)
                continue
            # we compare hash to know if some thing did change, if so we will need to kill/restart it
            if old_one.hash != new_one.hash:
                logger.info('[%s] Configuration of the module have change so we restart it' % inst_name)
                changed_to_restart.append(new_one)
        
        for inst_name in old_to_delete:
            logger.info('Removing the module %s because it is remove from the configuration' % inst_name)
            self.__remove_instance(inst_name)
        
        for inst_name in new_to_start:
            message = 'Launching a new module %s as it was added in the daemon configuration' % inst_name
            logger.info(message)
            mod_conf = None
            for m in modules:
                if m.get_name() == inst_name:
                    mod_conf = m
                    break
            # Got a new module configuration, need to start it
            inst = self.__get_instance_from_modconf(mod_conf)
            if inst is None:
                continue
            updated_instances = self.instances.copy()
            updated_instances[inst_name] = inst
            self.instances = updated_instances
            # Set this module to be restart so it will start
            self.set_to_restart(inst, message, is_an_error=False)  # here it's not a problem, so don't log it as AT RISK in healtcheck
        
        for new_instance in changed_to_restart:
            inst_name = new_instance.get_name()
            message = 'The module %s did change its configuration. We restart it to take the new configuration.' % inst_name
            logger.info(message)
            self.__remove_instance(inst_name)
            mod_conf = None
            for m in modules:
                if m.get_name() == inst_name:
                    mod_conf = m
                    break
            # Got a new module configuration, need to start it
            inst = self.__get_instance_from_modconf(mod_conf)
            if inst is None:
                continue
            updated_instances = self.instances.copy()
            updated_instances[inst_name] = inst
            self.instances = updated_instances
            
            logger.debug("[module] I should try to re-init [%s]" % inst_name)
            if inst.is_external:
                self.start_external_instance(inst)
            elif inst.is_worker_based:
                self.start_worker_based_instance(inst)
            else:
                self.start_internal_instance(inst)
        
        # save the new modules value
        self.modules = modules
    
    
    def set_max_queue_size(self, max_queue_size):
        self.max_queue_size = max_queue_size
    
    
    # Import, instanciate & "init" the modules we have been requested
    def load_and_init(self):
        self.load()
        all_was_started = self._get_instances()
        return all_was_started
    
    
    def load(self):
        self._load_python_modules()
    
    
    # Try to import the requested modules ; put the imported modules in self.imported_modules.
    # The previous imported modules, if any, are cleaned before.
    def _load_python_modules(self):
        # We get all modules file with .py
        modules_files = []
        
        # And directories
        modules_files.extend([module_dir_name for module_dir_name in os.listdir(self.modules_path) if os.path.isdir(os.path.join(self.modules_path, module_dir_name))])
        
        # Now we try to load them
        # So first we add their dir into the sys.path
        if self.modules_path not in sys.path:
            sys.path.append(self.modules_path)
        
        # We try to import them, but we keep only the one of
        # our type
        del self.imported_modules[:]
        for module_dir_name in modules_files:
            try:
                module_dir = os.path.join(self.modules_path, module_dir_name)
                sys.path.append(module_dir)
                try:
                    imp.load_module(module_dir_name, *imp.find_module(module_dir_name, [self.modules_path]))
                except Exception as e:
                    logger.warning('[%20s] Failed to load the directory [%s] as a python module.' % (module_dir_name, module_dir_name))
                    
                    init_path_file = os.path.join(module_dir, '__init__.py')
                    if not os.path.exists(init_path_file):
                        logger.warning('[%20s]  - because of missing file :[%s].' % (module_dir_name, init_path_file))
                    
                    # logger.warning('The shinken module can be load in shinken but import other python file of the module will fail. To do this you need to load the python module. ')
                    logger.warning('[%20s]  - "import %s.my_file" will not work in your module.py file' % (module_dir_name, module_dir_name))
                
                # Then we load the module.py inside this directory
                mod_file = os.path.abspath(os.path.join(self.modules_path, module_dir_name, 'module.py'))
                mod_dir = os.path.dirname(mod_file)
                # We add this dir to sys.path so the module can load local files too
                sys.path.append(mod_dir)
                if not os.path.exists(mod_file):
                    mod_file = os.path.abspath(os.path.join(self.modules_path, module_dir_name, 'module.pyc'))
                
                if mod_file.endswith('.py'):
                    # important, equivalent to import fname from module.py
                    m = imp.load_source(module_dir_name, mod_file)
                else:
                    m = imp.load_compiled(module_dir_name, mod_file)
                
                # Look if it's a valid module
                if not hasattr(m, 'properties'):
                    logger.warning('[%20s] Bad module file for %s : missing properties dict' % (module_dir_name, mod_file))
                    continue
                
                # We want to keep only the modules of our type
                if self.modules_type in m.properties['daemons']:
                    self.imported_modules.append(m)
            except Exception, exp:
                # Oups, something went wrong here...
                logger.warning('[%20s] Importing module %s: %s' % (module_dir_name, module_dir_name, exp))
    
    
    # For a specific module definition, we want to get the pymodule that match us
    def __get_pymodule_from_mod_conf(self, mod_conf):
        if not hasattr(mod_conf, 'module_type'):
            return None
        module_type = uniform_module_type(mod_conf.module_type)
        for py_module in self.imported_modules:
            if uniform_module_type(py_module.properties['type']) == module_type:
                return py_module
        _error = "The module %s of type %s is not available for the daemon/module %s." % (mod_conf.get_name(), mod_conf.module_type, self.modules_type)
        self.errors.append(_error)
        logger.warning(_error)
        return None
    
    
    # Try to "init" the given module instance.
    # If late_start, don't look for last_init_try
    # Returns: True on successful init. False if instance init method raised any Exception.
    def try_instance_init(self, inst, late_start=False):
        try:
            inst.init_try += 10
            # Maybe it's a retry
            if not late_start and inst.init_try > 1:
                # as much as we fail to start, increase the time, but not more than
                # MAX_MODULE_INIT_TRY_INTERVAL
                retry_interval = min(inst.init_try, MAX_MODULE_INIT_TRY_INTERVAL)
                if time.time() < inst.last_init_try + retry_interval:
                    logger.info('The module %s did fail since not long, waiting before trying to restart it.' % inst.get_name())
                    return INSTANCE_INIT_STATE.WAIT
            logger.info("Trying to init module: %s" % inst.get_name())
            inst.last_init_try = time.time()
            
            # If it's an external, create/update Queues()
            if inst.is_external or inst.is_worker_based:
                inst.create_queues(self.manager_factory)
            
            argspec = inspect.getargspec(inst.init)
            if argspec.args and 'daemon_display_name' in argspec.args:
                inst.init(daemon_display_name=self.daemon_display_name)
            else:
                inst.init()
            
            inst.init_try = 1
        except Exception as e:
            logger.error("The instance %s raised an exception %s, I remove it!" % (inst.get_name(), str(e)))
            logger.print_stack()
            self.did_crash(inst, reason=str(e))
            return INSTANCE_INIT_STATE.FAIL
        return INSTANCE_INIT_STATE.OK
    
    
    # Request to "remove" the given instances list or all if not provided
    def clear_instances(self, insts=None):
        if insts is None:
            insts = self.get_all_instances()  # have to make a copy of the list
        for i in insts:
            logger.info('Removing the module %s as we are stopping all modules' % i.get_name())
            self.__remove_instance(i.get_name())
    
    
    # A daemon did detect that a module instance did crash and so don't want it anymore.
    # We will need to log it, kill the subprocess if external and set to restart later
    def did_crash(self, inst, reason=""):
        if reason:
            logger.error(reason)
        self.set_to_restart(inst, reason)
    
    
    # Put an instance to the restart queue
    # By default if an instance is set to restart, it means it's an error,
    # but in some case it's not (like new modules)
    def set_to_restart(self, inst, reason='', is_an_error=True):
        # type: (BaseModule, str, bool) -> None
        if inst.get_internal_state() == ModuleState.FATAL:
            inst.fatal_error_has_been_managed = True
            inst.stop_all()
            return
        if is_an_error:
            logger.warning('The module %s will be restart because : %s' % (inst.get_name(), reason))
            self.__register_module_restart(inst.get_name(), reason)
        self.to_restart[inst.get_name()] = inst
        inst.stop_all()
    
    
    def __get_instance_from_modconf(self, mod_conf):
        mname = mod_conf.get_name()
        py_module = self.__get_pymodule_from_mod_conf(mod_conf)
        # if we cannot find a suitable code to launch, skip it, we already warn about it
        if not py_module:
            return None
        
        try:
            mod_conf.properties = py_module.properties.copy()
            
            argspec = inspect.getargspec(py_module.get_instance)
            if len(argspec.args) > 1 and 'module_on' in argspec.args:
                inst = py_module.get_instance(mod_conf, module_on=self.modules_type)
            else:
                inst = py_module.get_instance(mod_conf)
            # Give the module the data to which module it is load from
            # inst.set_loaded_into(self.modules_type)
            if inst is None:  # None = Bad thing happened :)
                logger.info("get_instance for module %s returned None!" % mname)
                return None
            assert (isinstance(inst, BaseModule))
            return inst
        
        except Exception, exp:
            s = str(exp)
            if isinstance(s, str):
                s = s.decode('UTF-8', 'replace')
            logger.error("The module %s raised an exception %s, I remove it!" % (mname, s))
            logger.print_stack()
            return None
    
    
    # Actually only arbiter call this method with start_external=False..
    # Create, init and then returns the list of module instances that the caller needs.
    # If an instance can't be created or init'ed then only log is done.
    # That instance is skipped. The previous modules instance(s), if any, are all cleaned.
    def _get_instances(self):
        self.clear_instances()
        
        all_was_start = True
        
        for mod_conf in self.modules:
            inst = self.__get_instance_from_modconf(mod_conf)
            if inst:
                updated_instances = self.instances.copy()
                updated_instances[mod_conf.get_name()] = inst
                self.instances = updated_instances
            else:  # there was an error on this module
                all_was_start = False
        
        # We should init the modules, but not the:
        # * external ones
        # * with workers based
        # because they can be crashed and so it must be done just before forking
        for inst in self.get_all_instances():
            if not inst.is_external and not inst.is_worker_based:
                was_init_ok = self.start_internal_instance(inst)
                if not was_init_ok:
                    all_was_start = False
        
        return all_was_start
    
    
    def start_internal_instance(self, inst, late_start=False):
        # But maybe the init failed a bit, so bypass this ones from now
        init_state = self.try_instance_init(inst, late_start=late_start)
        if init_state == INSTANCE_INIT_STATE.FAIL:
            message = "The module '%s' failed to init" % inst.get_name()
            logger.error(message)
            self.set_to_restart(inst, message)
            return False
        if init_state == INSTANCE_INIT_STATE.WAIT:
            self.set_to_restart(inst, is_an_error=False)
            return False
        return True
    
    
    def start_external_instance(self, inst, late_start=False):
        # But maybe the init failed a bit, so bypass this ones from now
        init_state = self.try_instance_init(inst, late_start=late_start)
        if init_state == INSTANCE_INIT_STATE.FAIL:
            message = "The module '%s' failed to init" % inst.get_name()
            logger.error(message)
            self.set_to_restart(inst, message)
            return
        if init_state == INSTANCE_INIT_STATE.WAIT:
            self.set_to_restart(inst, is_an_error=False)
            return
        
        # ok, init succeed
        logger.info("Starting external module %s" % inst.get_name())
        try:
            inst.start(daemon_display_name=self.daemon_display_name)
        except Exception as exp:
            message = 'The module "%s" failed to start (%s)' % (inst.get_name(), exp)
            logger.error(message)
            self.set_to_restart(inst, message)
    
    
    # Launch external instances that are load correctly
    def start_external_instances(self, late_start=False):
        for inst in [inst for inst in self.get_all_instances() if inst.is_external]:
            self.start_external_instance(inst, late_start=late_start)
    
    
    def start_worker_based_instance(self, inst, late_start=False):
        logger.info('Starting a worker based instance: %s' % inst.get_name())
        # But maybe the init failed a bit, so bypass this ones from now
        init_state = self.try_instance_init(inst, late_start=late_start)
        if init_state == INSTANCE_INIT_STATE.FAIL:
            message = "The worker based module '%s' failed to init" % inst.get_name()
            logger.error(message)
            self.set_to_restart(inst, message)
            return
        if init_state == INSTANCE_INIT_STATE.WAIT:
            self.set_to_restart(inst, is_an_error=False)
            return
        
        # ok, init succeed
        logger.info('Worker based module %s was started' % inst.get_name())
        try:
            inst.start_workers(daemon_display_name=self.daemon_display_name)
        except Exception as e:
            logger.error("The instance %s raised an exception %s, I remove it!" % (inst.get_name(), str(e)))
            logger.error("Back trace of this remove: %s" % (traceback.format_exc()))
            self.did_crash(inst, reason=str(e))
            return False
    
    
    # Launch external instances that are load correctly
    def start_worker_based_instances(self, late_start=False):
        for inst in [inst for inst in self.get_all_instances() if inst.is_worker_based]:
            self.start_worker_based_instance(inst, late_start=late_start)
    
    
    # Request to cleanly remove the given instance.
    # If instance is external also shutdown it cleanly
    def __remove_instance(self, instance_name):
        inst = self.instances.get(instance_name, None)
        if inst is None:
            logger.warning('Trying to remove the module %s but it is not found in the current instances: %s' % (instance_name, self.instances))
            return
        
        inst.stop_all()
        
        # Then do not listen anymore about it
        updated_instances = self.instances.copy()
        del updated_instances[instance_name]
        self.instances = updated_instances
        # SEF-6521: do not forget to clean in all lists:
        if instance_name in self.to_restart:
            del self.to_restart[instance_name]
    
    
    def check_alive_instances(self, skip_external=False):
        for instance_name, inst in self.get_all_instances_with_name():
            # skip already to restart one
            if instance_name in self.to_restart or inst.fatal_error_has_been_managed:
                continue
            
            # The skip_external is used in a child process because there is no possible to set an external module to a subprocess/submodule
            if skip_external and inst.is_external:
                continue
            
            if not inst.is_alive():
                self.did_crash(inst, "The external module %s goes down unexpectedly!" % instance_name)
                inst.stop_process()
                continue
            
            if inst.is_worker_based and not inst.check_worker_processes():
                err = 'The module %s worker goes down unexpectedly!' % instance_name
                self.did_crash(inst, err)
                continue
            
            # Now look for man queue size. If above value, the module should got a huge problem
            # and so bailout. It's not a perfect solution, more a watchdog
            # If max_queue_size is 0, don't check this
            if self.max_queue_size == 0:
                continue
            # Ok, go launch the dog!
            queue_size = 0
            try:
                queue_size = inst.to_q.qsize()
            except Exception:
                pass
            if queue_size > self.max_queue_size:
                self.did_crash(inst, "The external module %s got a too high brok queue size (%s > %s)!" % (instance_name, queue_size, self.max_queue_size))
    
    
    def try_to_restart_deads(self):
        to_restart = self.to_restart
        self.to_restart = {}
        for instance_name, inst in to_restart.items():
            logger.debug("[module] I should try to re-init [%s]" % instance_name)
            if inst.is_external:
                self.start_external_instance(inst)
            elif inst.is_worker_based:
                self.start_worker_based_instance(inst)
            else:
                self.start_internal_instance(inst)
    
    
    def __register_module_restart(self, module_name, reason=''):
        with self.last_restarts_lock:
            if module_name not in self.last_restarts:
                self.last_restarts[module_name] = []
            
            self.last_restarts[module_name].append({'timestamp': time.time(), 'reason': reason.rstrip()})
    
    
    # Called from StateCleaner thread
    def clean_modules_states(self):
        with self.last_restarts_lock:
            now_ts = time.time()
            clean_ts = now_ts - self.last_restarts_keep
            
            for last_restarts in self.last_restarts.values():
                delete_count = 0
                
                for restart_ts in last_restarts:
                    if restart_ts < clean_ts:
                        delete_count += 1
                    else:
                        break
                
                del last_restarts[:delete_count]
    
    
    def get_modules_states(self):
        states = []
        for name, inst in self.get_all_instances_with_name():
            status = {"restarts": self.last_restarts.get(name, []), "name": name, "type": inst.myconf.module_type}
            status.update(inst.get_state())
            states.append(status)
        return {"modules": states, "errors": self.errors}
    
    
    def _get_instances_with_condition(self, condition=lambda inst: True):
        return sorted(filter(condition, self.get_all_instances()), key=lambda inst: self._idx_modules_order_by_name[inst.get_name()])
    
    
    # Do not give to others inst that got problems
    def get_internal_instances(self, phase=None):
        return self._get_instances_with_condition(lambda inst: not inst.is_external and phase in inst.phases and inst not in self.to_restart.values() and inst.get_internal_state() != ModuleState.FATAL)
    
    
    def get_external_instances(self, phase=None):
        return self._get_instances_with_condition(lambda inst: inst.is_external and phase in inst.phases and inst not in self.to_restart.values() and inst.get_internal_state() != ModuleState.FATAL)
    
    
    def get_external_to_queues(self):
        return [inst.to_q for inst in self.get_all_instances() if inst.is_external and inst not in self.to_restart.values() and inst.get_internal_state() != ModuleState.FATAL]
    
    
    def get_external_modules_and_queues(self):
        return [(inst, inst.to_q) for inst in self.get_all_instances() if inst.is_external and inst not in self.to_restart.values() and inst.get_internal_state() != ModuleState.FATAL]
    
    
    def get_external_from_queues(self):
        return [inst.from_module_to_main_daemon_queue for inst in self.get_all_instances() if (inst.is_external or inst.is_worker_based) and inst not in self.to_restart.values() and inst.get_internal_state() != ModuleState.FATAL]
    
    
    def get_all_alive_instances(self):
        return self._get_instances_with_condition(lambda inst: inst not in self.to_restart.values() and inst.get_internal_state() != ModuleState.FATAL)
    
    
    def stop_all(self):
        # Ask internal to quit if they can
        for inst in self.get_internal_instances():
            inst.stop_all()
        
        # Clear/stop all external & worker based instances
        self.clear_instances([inst for inst in self.get_all_instances() if inst.is_external])
        self.clear_instances([inst for inst in self.get_all_instances() if inst.is_worker_based])
    
    
    def get_all_instances(self):
        return self.instances.values()
    
    
    def get_all_instances_name(self):
        return self.instances.keys()
    
    
    def get_all_instances_with_name(self):
        return self.instances.items()
