#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright (C) 2009-2012:
#     Gabes Jean, naparuba@gmail.com
#     Gerhard Lausser, Gerhard.Lausser@consol.de
#     Gregory Starck, g.starck@gmail.com
#     Hartmut Goebel, h.goebel@goebel-consult.de
#
# This file is part of Shinken.
#
# Shinken is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Shinken is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with Shinken.  If not, see <http://www.gnu.org/licenses/>.

import imp
import inspect
import os
import sys
import time
import traceback
from threading import RLock, Thread

from shinken.basemodule import BaseModule, ModuleState
from shinken.log import LoggerFactory, get_chapter_string, get_section_string
from shinken.log import logger
from shinken.misc.type_hint import TYPE_CHECKING

if TYPE_CHECKING:
    from shinken.misc.type_hint import Bool, NoReturn

logger = LoggerFactory.get_logger('modules-manager')

CODE_LOADING_CHAPTER = get_chapter_string('MODULE-CODE-LOADING')
INSTANCE_CREATION = get_chapter_string('MODULE-INSTANCE-CREATION')
MODULE_CHANGE_CHAPTER = get_chapter_string('MODULES-CHANGE')


class INSTANCE_INIT_STATE(object):
    OK = 'OK'
    FAIL = 'FAIL'
    WAIT = 'WAIT'


CHAPTER_CONFIGURATION = get_chapter_string('CONFIGURATION')


# We need to manage pre-2.0 module types with _ into the new 2.0 - mode
def uniform_module_type(s):
    return s.replace('_', '-')


MAX_MODULE_INIT_TRY_INTERVAL = 60


class ModulesManager(object):
    """This class is use to manage modules and call callback"""
    
    class StateCleaner(Thread):
        """Used to cleanup the modules state (self.last_restarts)"""
        
        CLEAN_STEP = 60  # every 60 seconds
        
        
        def __init__(self, manager):
            Thread.__init__(self)
            self.setDaemon(True)  # Will be killed at application close
            self.__manager = manager
        
        
        def run(self):
            while True:
                time.sleep(ModulesManager.StateCleaner.CLEAN_STEP)
                self.__manager.clean_modules_states()
    
    def __init__(self, modules_type, modules_path, modules, daemon_display_name='UNSET', with_thread=True):
        self._modules = None
        self.modules_path = modules_path
        self.modules_type = modules_type
        self.modules = modules
        self.allowed_types = [uniform_module_type(plug.module_type) for plug in modules if hasattr(plug, 'module_type')]
        self.imported_modules = []
        self.modules_assoc = {}
        self.instances = {}
        self.to_restart = {}
        self.last_restarts = {}
        self.last_restarts_lock = RLock()
        self.instances_lock = RLock()
        self.last_restarts_keep = 86400  # lasts 24h (86400 seconds) of restarts
        self.max_queue_size = 0
        self.manager_factory = None
        self.errors = []
        self.daemon_display_name = daemon_display_name
        if with_thread:
            ModulesManager.StateCleaner(self).start()
    
    
    @property
    def modules(self):
        return self._modules
    
    
    @modules.setter
    def modules(self, modules):
        self._modules = modules
        self._set_modules_ordering_indexes(modules)
    
    
    def _set_modules_ordering_indexes(self, modules):
        self._idx_modules_order_by_name = {}
        for index, module in enumerate(modules):
            self._idx_modules_order_by_name[module.module_name] = index
    
    
    def load_manager_factory(self, factory):
        self.manager_factory = factory
    
    
    # Set the modules requested for this manager
    def set_modules(self, modules):
        self.modules = modules
        self.errors = []
        self.allowed_types = [uniform_module_type(mod.module_type) for mod in modules if hasattr(mod, 'module_type')]
        
        self._display_module_tree(added_modules_names=[module.get_name() for module in self.modules], updated_modules_names=[], deleted_modules_names=[])
    
    
    def _find_module_by_name(self, module_name):
        for module in self.modules:
            if module.get_name() == module_name:
                return module
        return None
    
    
    # When module does changes in a way, we print what we currently have as module
    def _display_module_tree(self, added_modules_names, updated_modules_names, deleted_modules_names):
        # If no modules, don't need to display any thing
        if len(self.modules) == 0 and len(deleted_modules_names) == 0:
            return
        
        # Maybe there was no change in modules
        if len(added_modules_names) == 0 and len(updated_modules_names) == 0 and len(deleted_modules_names) == 0:
            return
        
        # Ok something DID changed
        all_module_names = [module.get_name() for module in self.modules]
        all_module_names.extend(deleted_modules_names)  # all = current have + deleted
        all_module_names = list(set(all_module_names))
        all_module_names.sort()
        logger.info('%s Modules did changed:' % MODULE_CHANGE_CHAPTER)
        for module_name in all_module_names:
            module = self._find_module_by_name(module_name)
            if module is None:
                what = '-> stopped   (removed)'
            elif module_name in added_modules_names:
                what = '-> started   (new)'
            elif module_name in updated_modules_names:
                what = '-> restarted (configuration change)'
            else:  # untouched
                what = ''
            logger.info('%s   - %-20s %s' % (MODULE_CHANGE_CHAPTER, module_name, what))
            if module is None:  # was deleted, don't print sub modules
                continue
            # Maybe there is no sub modules to print
            if len(module.modules) == 0:
                continue
            
            # IMPORTANT: DO NOT SORT sub modules, because for webui for example the
            #            order IS important (for auth modules for example)
            # NOTE: see this is only useful in DEBUG phase
            for sub_module in module.modules:
                logger.debug('%s      * %s' % (MODULE_CHANGE_CHAPTER, sub_module.get_name()))
    
    
    def get_module_by_name(self, module_name):
        inst = self.instances.get(module_name, None)
        return inst
    
    
    # We get some new modules, and we must look which one are:
    # * no more there and should be stopped
    # * the new ones, we need to start
    # * the one that did changed (look at hash property) that we need to restart
    def update_modules(self, modules):
        with self.instances_lock:
            changed_to_restart = []
            self.errors = []
            
            # First try to look which one are no more need
            new_modules = set([m.get_name() for m in modules])
            previous_ones = set([m.get_name() for m in self.modules])
            
            old_to_delete = previous_ones - new_modules
            new_to_start = new_modules - previous_ones
            
            # In the common ones, look at the one that did changed, and so we will have to delete
            common_ones = previous_ones & new_modules
            
            for inst_name in common_ones:
                old_one = self._find_module_by_name(inst_name)
                new_one = None
                for m in modules:
                    if m.get_name() == inst_name:
                        new_one = m
                        break
                if old_one is None or new_one is None:
                    logger.error('Cannot find previous or new module definition for the module name %s, so cannot update it.' % inst_name)
                    continue
                # we compare hash to know if some thing did change, if so we will need to kill/restart it
                if old_one.hash != new_one.hash:
                    logger.info('%s %s Configuration of the module did change so we restart it' % (CHAPTER_CONFIGURATION, get_section_string(inst_name)))
                    changed_to_restart.append(new_one)
            
            for inst_name in old_to_delete:
                logger.info('%s %s Removing the module because it is remove from the configuration' % (CHAPTER_CONFIGURATION, get_section_string(inst_name)))
                self.__remove_instance(inst_name)
            
            for inst_name in new_to_start:
                message = 'Launching a new module as it was added in the daemon configuration'
                logger.info('%s %s Launching a new module as it was added in the daemon configuration' % (CHAPTER_CONFIGURATION, get_section_string(inst_name)))
                mod_conf = None
                for m in modules:
                    if m.get_name() == inst_name:
                        mod_conf = m
                        break
                # Got a new module configuration, need to start it
                inst = self.__get_instance_from_modconf(mod_conf)
                if inst is None:
                    continue
                updated_instances = self.instances.copy()
                updated_instances[inst_name] = inst
                self.instances = updated_instances
                # Set this module to be restart so it will start
                self.set_to_restart(inst, message, is_an_error=False)  # here it's not a problem, so don't log it as AT RISK in healtcheck
            
            for new_instance in changed_to_restart:
                inst_name = new_instance.get_name()
                logger.debug('The module %s did change its configuration. We restart it to take the new configuration.' % inst_name)
                self.__remove_instance(inst_name)
                mod_conf = None
                for m in modules:
                    if m.get_name() == inst_name:
                        mod_conf = m
                        break
                # Got a new module configuration, need to start it
                inst = self.__get_instance_from_modconf(mod_conf)
                if inst is None:
                    continue
                updated_instances = self.instances.copy()
                updated_instances[inst_name] = inst
                self.instances = updated_instances
                
                logger.debug("[module] I should try to re-init [%s]" % inst_name)
                if inst.is_external:
                    self.start_external_instance(inst)
                elif inst.is_worker_based:
                    self.start_worker_based_instance(inst)
                else:
                    self.start_internal_instance(inst)
            
            # save the new modules value
            self.modules = modules
            
            # Now display what did changed
            updated_instances_names = [module.get_name() for module in changed_to_restart]
            self._display_module_tree(added_modules_names=new_to_start, updated_modules_names=updated_instances_names, deleted_modules_names=old_to_delete)
    
    
    def set_max_queue_size(self, max_queue_size):
        self.max_queue_size = max_queue_size
    
    
    # Import, instanciate & "init" the modules we have been requested
    def load_and_init(self):
        self.load()
        all_was_started = self._get_instances()
        return all_was_started
    
    
    def load(self):
        self._load_python_modules()
    
    
    # We want to know which sys.modules are new after the modules load, but bewaare of duplicate ones :)
    @staticmethod
    def _get_only_new_sys_modules(sys_modules_names_before, sys_modules_code_before):
        # Look at sys.modules and look which modules are now loaded when they was not before:
        # WARNING: there will be duplicate: entries that match the same lib object
        sys_modules_names_after = set(sys.modules.keys())
        sys_modules_names_added = list(sys_modules_names_after - sys_modules_names_before)
        sys_modules_names_added.sort()
        
        for sys_module_name in list(sys_modules_names_added):  # copy because we will remove elements inside
            sys_module = sys.modules.get(sys_module_name, None)  # maybe a thread did remove it?
            if sys_module in sys_modules_code_before:  # was already loaded in fact
                sys_modules_names_added.remove(sys_module_name)
        return sys_modules_names_added
    
    
    # Try to import the requested modules ; put the imported modules in self.imported_modules.
    def _load_python_modules(self):
        # Was already done, don't load it again
        if self.imported_modules:
            return
        
        # And directories
        modules_dirs = [module_dir_name for module_dir_name in os.listdir(self.modules_path) if os.path.isdir(os.path.join(self.modules_path, module_dir_name))]
        
        # Now we try to load them
        # So first we add their dir into the sys.path
        if self.modules_path not in sys.path:
            sys.path.append(self.modules_path)
        
        before = time.time()
        failed_modules_dir = []
        available_modules_names = []
        # We try to import them, but we keep only the one of
        # our type
        for module_dir_name in modules_dirs:
            module_start = time.time()
            sys_modules_names_before = set(sys.modules.keys())
            sys_modules_code_before = set(sys.modules.values())
            
            try:
                module_dir = os.path.join(self.modules_path, module_dir_name)
                logger.debug('%s [directory=%-20s] Starting to load the module code directory %s.' % (CODE_LOADING_CHAPTER, module_dir_name, module_dir))
                sys.path.append(module_dir)
                try:
                    imp.load_module(module_dir_name, *imp.find_module(module_dir_name, [self.modules_path]))
                except Exception:
                    logger.warning('%s [directory=%-20s] Failed to load the directory [%s] as a python module.' % (CODE_LOADING_CHAPTER, module_dir_name, module_dir_name))
                    
                    init_path_file = os.path.join(module_dir, '__init__.py')
                    if not os.path.exists(init_path_file):
                        logger.warning('%s [directory=%-20s]  - because of missing file :[%s].' % (CODE_LOADING_CHAPTER, module_dir_name, init_path_file))
                    
                    # logger.warning('The shinken module can be load in shinken but import other python file of the module will fail. To do this you need to load the python module. ')
                    logger.warning('%s [directory=%-20s]  - "import %s.my_file" will not work in your module.py file' % (CODE_LOADING_CHAPTER, module_dir_name, module_dir_name))
                
                # Then we load the module.py inside this directory
                mod_file = os.path.abspath(os.path.join(self.modules_path, module_dir_name, 'module.py'))
                mod_dir = os.path.dirname(mod_file)
                # We add this dir to sys.path so the module can load local files too
                sys.path.append(mod_dir)
                if not os.path.exists(mod_file):
                    mod_file = os.path.abspath(os.path.join(self.modules_path, module_dir_name, 'module.pyc'))
                
                if mod_file.endswith('.py'):
                    # important, equivalent to import fname from module.py
                    m = imp.load_source(module_dir_name, mod_file)
                else:
                    m = imp.load_compiled(module_dir_name, mod_file)
                
                # Look if it's a valid module
                if not hasattr(m, 'properties'):
                    logger.warning('%s [directory=%-20s] Bad module file for %s : missing properties dict' % (CODE_LOADING_CHAPTER, module_dir_name, mod_file))
                    failed_modules_dir.append(module_dir_name)
                    continue
                
                if 'type' not in m.properties:
                    logger.warning('%s [directory=%-20s] Bad module file for %s : missing type entry in properties dict' % (CODE_LOADING_CHAPTER, module_dir_name, mod_file))
                    failed_modules_dir.append(module_dir_name)
                    continue
                
                # Look at sys.modules and look which modules are now loaded when they was not before:
                # WARNING: there will be duplicate: entries that match the same lib object
                sys_modules_names_added = self._get_only_new_sys_modules(sys_modules_names_before, sys_modules_code_before)
                
                imported_libs_log = ''
                if sys_modules_names_added:
                    imported_libs_log = 'Did import %s python new librairies (%s).' % (len(sys_modules_names_added), ','.join(sys_modules_names_added))
                logger.debug('%s [directory=%-20s] [%.3fs] Module code was loaded. %s' % (CODE_LOADING_CHAPTER, module_dir_name, time.time() - module_start, imported_libs_log))
                
                is_available = self.modules_type in m.properties['daemons']
                # We want to keep only the modules of our type
                if is_available:
                    mod_type = uniform_module_type(m.properties['type'])
                    self.imported_modules.append(m)
                    available_modules_names.append(mod_type)
            except Exception as exp:
                # Oups, something went wrong here...
                logger.warning('%s [directory=%-20s] Importing module %s: %s' % (CODE_LOADING_CHAPTER, module_dir_name, module_dir_name, exp))
                failed_modules_dir.append(module_dir_name)
        
        logger.info('%s A total of %s Shinken Enterprise modules are available for this daemon/module (%s): %s (on a total of %s, loaded in %.3fs)' % (
            CODE_LOADING_CHAPTER, len(self.imported_modules), self.modules_type, ', '.join(available_modules_names), len(modules_dirs), time.time() - before))
    
    
    # For a specific module definition, we want to get the pymodule that match us
    def __get_pymodule_from_mod_conf(self, mod_conf):
        if not hasattr(mod_conf, 'module_type'):
            return None
        module_type = uniform_module_type(mod_conf.module_type)
        for py_module in self.imported_modules:
            if uniform_module_type(py_module.properties['type']) == module_type:
                return py_module
        _error = "The module %s of type %s is not available for the daemon/module %s." % (mod_conf.get_name(), mod_conf.module_type, self.modules_type)
        self.errors.append(_error)
        logger.warning(_error)
        return None
    
    
    # Try to "init" the given module instance.
    # If late_start, don't look for last_init_try
    # Returns: True on successful init. False if instance init method raised any Exception.
    def try_instance_init(self, inst, late_start=False):
        before = time.time()
        try:
            inst.init_try += 10
            # Maybe it's a retry
            if not late_start and inst.init_try > 1:
                # as much as we fail to start, increase the time, but not more than
                # MAX_MODULE_INIT_TRY_INTERVAL
                retry_interval = min(inst.init_try, MAX_MODULE_INIT_TRY_INTERVAL)
                if time.time() < inst.last_init_try + retry_interval:
                    logger.info('The module %s did fail since not long, waiting before trying to restart it.' % inst.get_name())
                    return INSTANCE_INIT_STATE.WAIT
            logger.info("%s [%-20s] Trying to initialize module by calling it's init method." % (INSTANCE_CREATION, inst.get_name()))
            inst.last_init_try = time.time()
            
            # If it's an external, create/update Queues()
            if inst.is_external or inst.is_worker_based:
                inst.create_queues(self.manager_factory)
            
            argspec = inspect.getargspec(inst.init)
            if argspec.args and 'daemon_display_name' in argspec.args:
                inst.init(daemon_display_name=self.daemon_display_name)
            else:
                inst.init()
            logger.info("%s [%-20s] [%.3fs] SUCCESS The module was initialized successfully." % (INSTANCE_CREATION, inst.get_name(), time.time() - before))
            inst.init_try = 1
        except Exception as e:
            logger.error("%s [%-20s] [%.3fs] FAIL The module raised an exception %s. Removing it and we will try to restart it in the future if possible." % (INSTANCE_CREATION, inst.get_name(), time.time() - before, str(e)))
            logger.print_stack()
            self.did_crash(inst, reason=str(e))
            return INSTANCE_INIT_STATE.FAIL
        return INSTANCE_INIT_STATE.OK
    
    
    # Request to "remove" the given instances list or all if not provided
    def clear_instances(self, insts=None):
        if insts is None:
            insts = self.get_all_instances()  # have to make a copy of the list
        for i in insts:
            logger.info('Removing the module %s as we are stopping all modules' % i.get_name())
            self.__remove_instance(i.get_name())
    
    
    # A daemon did detect that a module instance did crash and so don't want it anymore.
    # We will need to log it, kill the subprocess if external and set to restart later
    def did_crash(self, inst, reason="", do_log=True):
        if reason and do_log:
            logger.error(reason)
        self.set_to_restart(inst, reason)
    
    
    # Put an instance to the restart queue
    # By default if an instance is set to restart, it means it's an error,
    # but in some case it's not (like new modules)
    def set_to_restart(self, inst, reason='', is_an_error=True):
        # type: (BaseModule, str, bool) -> None
        if inst.get_internal_state() == ModuleState.FATAL:
            inst.fatal_error_has_been_managed = True
            inst.stop_all()
            return
        if is_an_error:
            logger.warning('The module %s will be restart because : %s' % (inst.get_name(), reason))
            self.__register_module_restart(inst.get_name(), reason)
        self.to_restart[inst.get_name()] = inst
        inst.stop_all()
    
    
    def __get_instance_from_modconf(self, mod_conf):
        mod_name = mod_conf.get_name()
        mod_type = mod_conf.get_type()
        py_module = self.__get_pymodule_from_mod_conf(mod_conf)
        # if we cannot find a suitable code to launch, skip it, we already warn about it
        if not py_module:
            return None
        start = time.time()
        try:
            
            logger.debug('%s [module type=%-20s] [name=%-20s] Start to create the module instance' % (INSTANCE_CREATION, mod_type, mod_name))
            
            mod_conf.properties = py_module.properties.copy()
            
            argspec = inspect.getargspec(py_module.get_instance)
            if len(argspec.args) > 1 and 'module_on' in argspec.args:
                inst = py_module.get_instance(mod_conf, module_on=self.modules_type)
            else:
                inst = py_module.get_instance(mod_conf)
            if inst is None:  # None = Bad thing happened :)
                logger.error("%s [module type=%-20s] [name=%-20s] FAIL The module get_instance() call did not return any instance. " % (INSTANCE_CREATION, mod_type, mod_name))
                return None
            assert (isinstance(inst, BaseModule))
            logger.info("%s [module type=%-20s] [name=%-20s] [%.3fs] SUCCESS The module instance is created. " % (INSTANCE_CREATION, mod_type, mod_name, time.time() - start))
            return inst
        
        except Exception as exp:
            s = unicode(exp)
            logger.error("%s [module type=%-20s] [name=%-20s] [%.3fs] FAIL The module creation did fail with an exception: %s. Remove this module currently, will try to recreate it in the future." % (
                INSTANCE_CREATION, mod_type, mod_name, time.time() - start, s))
            logger.print_stack()
            return None
    
    
    # Actually only arbiter call this method with start_external=False..
    # Create, init and then returns the list of module instances that the caller needs.
    # If an instance can't be created or init'ed then only log is done.
    # That instance is skipped. The previous modules instance(s), if any, are all cleaned.
    def _get_instances(self):
        with self.instances_lock:
            self.clear_instances()
            
            all_was_start = True
            
            for mod_conf in self.modules:
                inst = self.__get_instance_from_modconf(mod_conf)
                if inst:
                    updated_instances = self.instances.copy()
                    updated_instances[mod_conf.get_name()] = inst
                    self.instances = updated_instances
                else:  # there was an error on this module
                    all_was_start = False
            
            # We should init the modules, but not the:
            # * external ones
            # * with workers based
            # because they can be crashed and so it must be done just before forking
            for inst in self.get_all_instances():
                if not inst.is_external and not inst.is_worker_based:
                    was_init_ok = self.start_internal_instance(inst)
                    if not was_init_ok:
                        all_was_start = False
            
            return all_was_start
    
    
    def start_internal_instance(self, inst, late_start=False):
        # But maybe the init failed a bit, so bypass this ones from now
        init_state = self.try_instance_init(inst, late_start=late_start)
        if init_state == INSTANCE_INIT_STATE.FAIL:
            message = "The module '%s' failed to init" % inst.get_name()
            logger.error(message)
            self.set_to_restart(inst, message)
            return False
        if init_state == INSTANCE_INIT_STATE.WAIT:
            self.set_to_restart(inst, is_an_error=False)
            return False
        return True
    
    
    def start_external_instance(self, inst, late_start=False):
        # But maybe the init failed a bit, so bypass this ones from now
        init_state = self.try_instance_init(inst, late_start=late_start)
        if init_state == INSTANCE_INIT_STATE.FAIL:
            message = "The module '%s' failed to init" % inst.get_name()
            logger.error(message)
            self.set_to_restart(inst, message)
            return
        if init_state == INSTANCE_INIT_STATE.WAIT:
            self.set_to_restart(inst, is_an_error=False)
            return
        
        # ok, init succeed
        logger.info("Starting external module %s" % inst.get_name())
        try:
            inst.start(daemon_display_name=self.daemon_display_name)
        except Exception as exp:
            message = 'The module "%s" failed to start (%s)' % (inst.get_name(), exp)
            logger.error(message)
            self.set_to_restart(inst, message)
    
    
    # Launch external instances that are load correctly
    def start_external_instances(self, late_start=False):
        for inst in [inst for inst in self.get_all_instances() if inst.is_external]:
            self.start_external_instance(inst, late_start=late_start)
    
    
    def start_worker_based_instance(self, inst, late_start=False):
        logger.info('Starting a worker based instance: %s' % inst.get_name())
        # But maybe the init failed a bit, so bypass this ones from now
        init_state = self.try_instance_init(inst, late_start=late_start)
        if init_state == INSTANCE_INIT_STATE.FAIL:
            message = "The worker based module '%s' failed to init" % inst.get_name()
            logger.error(message)
            self.set_to_restart(inst, message)
            return
        if init_state == INSTANCE_INIT_STATE.WAIT:
            self.set_to_restart(inst, is_an_error=False)
            return
        
        # ok, init succeed
        logger.info('Worker based module %s was started' % inst.get_name())
        try:
            inst.start_workers(daemon_display_name=self.daemon_display_name)
        except Exception as e:
            logger.error("The instance %s raised an exception %s, I remove it!" % (inst.get_name(), str(e)))
            logger.error("Back trace of this remove: %s" % (traceback.format_exc()))
            self.did_crash(inst, reason=str(e))
            return False
    
    
    # Launch external instances that are load correctly
    def start_worker_based_instances(self, late_start=False):
        for inst in [inst for inst in self.get_all_instances() if inst.is_worker_based]:
            self.start_worker_based_instance(inst, late_start=late_start)
    
    
    # Request to cleanly remove the given instance.
    # If instance is external also shutdown it cleanly
    def __remove_instance(self, instance_name):
        with self.instances_lock:
            inst = self.instances.get(instance_name, None)
            if inst is None:
                logger.warning('Trying to remove the module %s but it is not found in the current instances: %s' % (instance_name, self.instances))
                return
            
            inst.stop_all()
            
            # Then do not listen anymore about it
            updated_instances = self.instances.copy()
            del updated_instances[instance_name]
            self.instances = updated_instances
            # SEF-6521: do not forget to clean in all lists:
            if instance_name in self.to_restart:
                del self.to_restart[instance_name]
    
    
    def check_alive_instances(self, skip_external=False):
        # type: (Bool) -> NoReturn
        # Only for external
        for instance_name, inst in self.get_all_instances_with_name():
            # skip already to restart one
            if instance_name in self.to_restart or inst.fatal_error_has_been_managed:
                continue
            
            # The skip_external is used in a child process because there is no possible to set an external module to a subprocess/submodule
            if skip_external and inst.is_external:
                continue
            
            if not inst.is_alive():
                self.did_crash(inst, "The external module %s goes down unexpectedly!" % instance_name)
                inst.stop_process()
                continue
            
            if inst.is_worker_based and not inst.check_worker_processes():
                err = 'The module %s worker goes down unexpectedly!' % instance_name
                self.did_crash(inst, err)
                continue
            
            # Now look for man queue size. If above value, the module should got a huge problem
            # and so bailout. It's not a perfect solution, more a watchdog
            # If max_queue_size is 0, don't check this
            if self.max_queue_size == 0:
                continue
            # Ok, go launch the dog!
            queue_size = 0
            try:
                queue_size = inst.to_q.qsize()
            except Exception:
                pass
            if queue_size > self.max_queue_size:
                self.did_crash(inst, "The external module %s got a too high brok queue size (%s > %s)!" % (instance_name, queue_size, self.max_queue_size))
    
    
    def is_instance_set_to_restart(self, instance):
        return instance in self.to_restart.values()
    
    
    def try_to_restart_deads(self):
        to_restart = self.to_restart
        self.to_restart = {}
        for instance_name, inst in to_restart.items():
            logger.debug("[module] I should try to re-init [%s]" % instance_name)
            if inst.is_external:
                self.start_external_instance(inst)
            elif inst.is_worker_based:
                self.start_worker_based_instance(inst)
            else:
                self.start_internal_instance(inst)
    
    
    def __register_module_restart(self, module_name, reason=''):
        with self.last_restarts_lock:
            if module_name not in self.last_restarts:
                self.last_restarts[module_name] = []
            
            self.last_restarts[module_name].append({'timestamp': time.time(), 'reason': reason.rstrip()})
    
    
    # Called from StateCleaner thread
    def clean_modules_states(self):
        with self.last_restarts_lock:
            now_ts = time.time()
            clean_ts = now_ts - self.last_restarts_keep
            
            for last_restarts in self.last_restarts.values():
                delete_count = 0
                
                for restart_ts in last_restarts:
                    if restart_ts < clean_ts:
                        delete_count += 1
                    else:
                        break
                
                del last_restarts[:delete_count]
    
    
    def get_modules_states(self):
        states = []
        for name, inst in self.get_all_instances_with_name():
            status = {"restarts": self.last_restarts.get(name, []), "name": name, "type": inst.myconf.module_type}
            status.update(inst.get_state())
            states.append(status)
        return {"modules": states, "errors": self.errors}
    
    
    def _get_instances_with_condition(self, condition=lambda inst: True):
        return sorted(filter(condition, self.get_all_instances()), key=lambda inst: self._idx_modules_order_by_name[inst.get_name()])
    
    
    # Do not give to others inst that got problems
    def get_internal_instances(self, phase=None):
        return self._get_instances_with_condition(lambda inst: not inst.is_external and phase in inst.phases and inst not in self.to_restart.values() and inst.get_internal_state() != ModuleState.FATAL)
    
    
    def get_external_instances(self, phase=None):
        return self._get_instances_with_condition(lambda inst: inst.is_external and phase in inst.phases and inst not in self.to_restart.values() and inst.get_internal_state() != ModuleState.FATAL)
    
    
    def get_external_to_queues(self):
        return [inst.to_q for inst in self.get_all_instances() if inst.is_external and inst not in self.to_restart.values() and inst.get_internal_state() != ModuleState.FATAL]
    
    
    def get_external_modules_and_queues(self):
        return [(inst, inst.to_q) for inst in self.get_all_instances() if inst.is_external and inst not in self.to_restart.values() and inst.get_internal_state() != ModuleState.FATAL]
    
    
    def get_external_modules_and_from_queues(self):
        return [(inst, inst.from_module_to_main_daemon_queue) for inst in self.get_all_alive_instances() if (inst.is_external or inst.is_worker_based)]
    
    
    def get_external_from_queues(self):
        return [inst.from_module_to_main_daemon_queue for inst in self.get_all_instances() if (inst.is_external or inst.is_worker_based) and inst not in self.to_restart.values() and inst.get_internal_state() != ModuleState.FATAL]
    
    
    def get_all_alive_instances(self):
        return self._get_instances_with_condition(lambda inst: inst not in self.to_restart.values() and inst.get_internal_state() != ModuleState.FATAL)
    
    
    def stop_all(self):
        logger.debug('Asking to stop all modules')
        # Ask internal to quit if they can
        for inst in self.get_internal_instances():
            if not inst.is_worker_based:
                inst.stop_all()
        
        # Clear/stop all external & worker based instances
        self.clear_instances([inst for inst in self.get_all_instances() if inst.is_external])
        self.clear_instances([inst for inst in self.get_all_instances() if inst.is_worker_based])
    
    
    def get_all_instances(self):
        with self.instances_lock:
            return self.instances.values()
    
    
    def get_all_alive_instances(self):
        with self.instances_lock:
            return [instance for instance in self.instances.values() if instance not in self.to_restart]
    
    
    def get_all_instances_name(self):
        with self.instances_lock:
            return self.instances.keys()
    
    
    def get_all_instances_with_name(self):
        with self.instances_lock:
            return self.instances.items()
