#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2022:
#    Gabes Jean, naparuba@gmail.com
#    Gerhard Lausser, Gerhard.Lausser@consol.de
#    Gregory Starck, g.starck@gmail.com
#    Hartmut Goebel, h.goebel@goebel-consult.de
#
# This file is part of Shinken.
#
# Shinken is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Shinken is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with Shinken.  If not, see <http://www.gnu.org/licenses/>.

import base64
import cPickle
import ctypes
import gc
import os
import signal
import sys
import time
from datetime import datetime, timedelta
from multiprocessing import Process, cpu_count

from pymongo.errors import BulkWriteError

from shinken.basesubprocess import LookAtMyFatherThread
from shinken.log import get_section_string
from shinken.macroresolver import MacroResolver
from shinken.misc.type_hint import TYPE_CHECKING
from shinken.modules.base_module.basemodule import BaseModule, ModuleState
from shinken.objects.proxyitem import proxyitemsmgr, proxyitemsgraph
from shinken.safepickle import SafeUnpickler
from shinken.util import malloc_trim, mem_wait_for_fork_possible, get_memory_consumption, split_list_by_pack
from shinkensolutions.ssh_mongodb.mongo_client import MongoClient
from shinkensolutions.ssh_mongodb.mongo_conf import MongoConf
from shinkensolutions.ssh_mongodb.mongo_error import ShinkenMongoException
from shinkensolutions.ssh_mongodb.sshtunnelmongomgr import mongo_by_ssh_mgr

if TYPE_CHECKING:
    from shinken.log import PartLogger
    from shinken.misc.type_hint import Optional, Dict, Any
    from shinken.objects.module import Module as ShinkenModuleDefinition
    from shinken.scheduler import Scheduler
    from shinkensolutions.ssh_mongodb.mongo_collection import MongoCollection

properties = {
    u'daemons' : [u'scheduler'],
    u'type'    : u'mongodb_retention',
    u'external': False,
}

ON_LINUX = sys.platform.startswith(u'linux')

CHUNKS_SIZE = 1000


def get_instance(plugin):
    instance = MongodbRetentionScheduler(plugin)
    return instance


def chunks(_list, n):
    # Yield successive n-sized chunks from l.
    for i in xrange(0, len(_list), n):
        yield _list[i:i + n]


# This is a scheduler module to save host/service retention data into a mongodb database
class MongodbRetentionScheduler(BaseModule):
    
    def __init__(self, modconf):
        # type: (ShinkenModuleDefinition) -> None
        BaseModule.__init__(self, modconf)
        
        self.mongo_conf = MongoConf(modconf, logger=self.logger, prefix_module_property=self.get_module_type())
        self.mongo_conf.log_configuration(log_properties=True, show_values_as_in_conf_file=True)
        self.mongo_client = MongoClient(self.mongo_conf, logger=self.logger)
        
        self.nb_workers = 4
        
        self.mongo_timeout = int(getattr(modconf, u'mongo_timeout', u'10')) * 1000
        self.size_chunk_to_load = int(getattr(modconf, u'size_chunk_to_load', u'1000'))
        self.size_chunk_to_delete = int(getattr(modconf, u'size_chunk_to_delete', u'1000'))
        
        self.hosts_retention_collection = None  # type: Optional[MongoCollection]
        self.services_retention_collection = None  # type: Optional[MongoCollection]
        
        # When we will fork, we will need to stop the HTTP sockets inside the
        # daemon, so if the scheduler goes down, we won't lock the socket ports
        self.scheduler_daemon = None
        
        self.worker_timeout = int(getattr(modconf, u'worker_timeout', u'120'))
        self.worker_one_try_timeout = int(getattr(modconf, u'worker_one_try_timeout', u'30'))
        
        # Not public option, for debug use only
        self.enable_sub_processes_memory_usage_protection = getattr(modconf, u'scheduler__retention_mongo__enable_sub_processes_memory_usage_protection', u'1') == u'1'
        self.sub_process_memory_usage_system_reserved_memory = int(getattr(modconf, u'scheduler__retention_mongo__sub_process_memory_usage_system_reserved_memory', u'0'))
        self.sub_processes_memory_usage_protection_max_retry_time = int(getattr(modconf, u'scheduler__retention_mongo__sub_processes_memory_usage_protection_max_retry_time', u'5'))
        
        self.nb_of_retention_day = int(getattr(modconf, u'nb_of_max_retention_day', u'7'))
        
        self.max_number_of_workers = int(getattr(modconf, u'max_number_of_workers', u'4'))
        
        self.all_data = None
        self.in_debug_mode = False
        
        # Loggers !
        self.save_workers_logger = self.logger.get_sub_part(u'SAVE WORKERS')
        self.delete_logger = self.logger.get_sub_part(u'DELETE OLD RETENTION')
        
        self.load_logger = self.logger.get_sub_part(u'LOAD RETENTION')
        host_clusters_part = u'HOSTS/CLUSTERS'
        self.hosts_clusters_load_logger = self.load_logger.get_sub_part(host_clusters_part, part_name_size=len(host_clusters_part))
        self.checks_load_logger = self.load_logger.get_sub_part(u'CHECKS', part_name_size=len(host_clusters_part))
    
    
    def get_state(self):
        # type: () -> Dict[unicode, Any]
        
        mongo_details = mongo_by_ssh_mgr.check_connexion_mongodb(self.mongo_conf.uri)
        disconnect_uri = [s for s in mongo_details if s[u'is_connected'] is False]
        
        if disconnect_uri:
            output = u'No connection to Mongo Database : %s' % u', '.join([d[u'url'] for d in disconnect_uri])
            status = ModuleState.CRITICAL
        else:
            output = u'OK'
            status = ModuleState.OK
        
        return {
            u'status'       : status,
            u'output'       : output,
            u'mongodb_stats': mongo_details,
        }
    
    
    def create_connections(self, logger, verbose=True):
        # type: (PartLogger, bool) -> None
        # In save mode the timeout is handled by the daemon, so we force the timeout to the daemon value.
        if self.daemon_display_name and self.daemon_display_name != u'UNSET':
            requestor = u'%s ] [ %s ] [ %s' % (self.daemon_display_name, self.name, logger.name)
        else:
            requestor = u'%s ] [ %s' % (self.name, logger.name)
        try:
            self.mongo_client.try_connection(requestor, logger, verbose=verbose)
            self.hosts_retention_collection = self.mongo_client.get_collection(u'retention_hosts_raw')
            self.services_retention_collection = self.mongo_client.get_collection(u'retention_services_raw')
        except TypeError:
            raise Exception(u'Multiples urls were found in the module\'s configuration file')
    
    
    def check_connection(self):
        # type: () -> bool
        return self.hosts_retention_collection.is_connection_available()
    
    
    # When exiting, don't care about the threads, we just exit whatever can happen
    def _forced_system_exit(self, rc):
        os._exit(rc)  # noqa : need to call this
    
    
    def init(self):
        # type: () -> None
        self.logger.get_sub_part(u'INITIALIZATION').debug(u'Initialization of the module')
    
    
    def manage_signal(self, sig, frame):
        self.logger.get_sub_part(u'MANAGE SIGNAL').info(u'The worker with the pid %d received a signal %s' % (os.getpid(), sig))
        dump_logger = self.logger.get_sub_part(u'WORKER pid=%d' % os.getpid())
        if not ON_LINUX:
            return
        if sig == signal.SIGUSR1:  # if USR1, ask a memory dump
            if sys.version_info[1] == 6:  # python 2.6
                try:
                    from guppy import hpy  # noqa : If the import failed, we don't want continue
                    hp = hpy()
                    dump_logger.error(u'(support-only) MEMORY DUMP (to be sent to the support):\n%s' % hp.heap())
                    return
                except ImportError:
                    dump_logger.error(u'(support-only) MEMORY DUMP: FAIL check if guppy lib is installed')
            if sys.version_info[1] == 7:  # python 2.7
                try:
                    import meliae.scanner
                    import meliae.loader
                    _f = u'/tmp/memorydump-%s.json' % self.name
                    meliae.scanner.dump_all_objects(_f)
                    dump_logger.error(u'(support-only) Memory information dumped to file %s (to be sent to the support)' % _f)
                except ImportError:
                    dump_logger.error(u'(support-only) MEMORY DUMP: FAIL check if meliae lib is installed')
        else:
            # DO NOT FSYNC, if you are fsyncing, it will block database
            self.mongo_client.disconnect()
            # On the worker exit, we ask to clone all ssh tunnel
            # NOTE: in worker, at exit calls won't be executed (thanks multiprocessing...)
            mongo_by_ssh_mgr.close_all_tunnels()
            self._forced_system_exit(0)
    
    
    def _look_at_father(self, worker_id, fatherpid):
        # type: (int, int) -> None
        worker_logger = self.logger.get_sub_part(u'WORKER:%d' % worker_id)
        worker_logger.debug(u'Starting father process lookup loop')
        while True:
            try:
                os.kill(fatherpid, 0)  # fake kill
            except:  # no more father? fuck!
                worker_logger.error(u'I am a worker with pid: %d and my master process %s is dead, I exit.' % (os.getpid(), fatherpid))
                self._forced_system_exit(0)  # in a thread, raw kill
            time.sleep(10)
    
    
    def _massive_delete_in_mongo(self, collection, nb_item_to_del, where):
        # type: (MongoCollection, int, Dict[unicode, Any]) -> int
        try:
            group_ids = collection.find(where, {u'_id': 1}, modifiers={u'$maxTimeMS': self.mongo_timeout}, limit=self.size_chunk_to_delete)
            self.delete_logger.debug(u'Requesting a chunk of %s to delete, received %s' % (self.size_chunk_to_delete, len(group_ids)))
            # Important: #SEF-7855: another scheduler can delete them too! so if no more, exit (and force exit the while loop outside)
            if len(group_ids) == 0:
                return 0
            batch = [i[u'_id'] for i in group_ids if i is not None]
            collection.remove({u'_id': {u'$in': batch}}, modifiers={u'$maxTimeMS': self.mongo_timeout})
            nb_item_to_del -= len(group_ids)
        except ShinkenMongoException as e:
            self.delete_logger.error(u'After %s tries, we could not connect to mongo :[%s]' % (self.mongo_conf.auto_reconnect_max_try, e))
            self.delete_logger.print_stack()
            raise
        except Exception as e:
            self.delete_logger.error(u'We have an error:[%s]' % e)
            self.delete_logger.print_stack()
            raise
        return nb_item_to_del
    
    
    #################################
    # SAVE WORKER CODE
    #################################
    def _worker_exit_process(self, exit_code):
        # type: (int) -> None
        
        # Please give me a POSITIVE integer between 0 and 255
        # 0 means SUCCESS
        
        # On the worker exit, we ask to close all ssh tunnel
        # NOTE: in worker, at exit calls won't be executed (thanks multiprocessing...)
        mongo_by_ssh_mgr.close_all_tunnels()
        self._forced_system_exit(exit_code)
    
    
    def save_job(self, worker_id, father_pid, worker_name, _retry=0):
        # type: (int, int, unicode, int) -> None
        actual_pid = os.getpid()
        gc.disable()
        libc6 = ctypes.CDLL('libc.so.6')
        libc6.malloc_trim(0)
        
        # I'm in an new process, i can change my logger and sent it to MongoClient!
        self.logger = self.logger.get_sub_part(u'SAVE WORKER %d' % worker_id)
        self.mongo_client.set_logger(self.logger)
        for sig in (signal.SIGINT, signal.SIGTERM, signal.SIGUSR1):
            signal.signal(sig, self.manage_signal)
        
        # Launch a thread so this worker process raw exit if the master process die
        look_at_my_father_thread = LookAtMyFatherThread(father_pid, self.daemon_display_name, u'[MongodbRetention][worker:%d]' % worker_id, loop_speed=10)
        look_at_my_father_thread.start_thread()
        
        try:
            from setproctitle import setproctitle
            setproctitle(worker_name)
        except:
            pass
        
        try:
            self._save_job(worker_id)
        except ShinkenMongoException as e:
            self.logger.error(u'After %s tries, worker could not connect to mongo :[%s]' % (self.mongo_conf.auto_reconnect_max_try, e))
            self._worker_exit_process(2)
        except BulkWriteError as bwe:
            self.logger.error(u'Worker has an error:[%s]' % bwe)
            self.logger.error(u'The detail of this error is : %s' % bwe.details)
            self._worker_exit_process(1)
        except Exception as e:
            self.logger.error(u'Worker has an error:[%s]' % e)
            self.logger.print_stack(prefix=u'(pid=%d) ' % actual_pid)
            self._worker_exit_process(1)
    
    
    def _save_job(self, worker_id):
        # type: (int) -> int
        worker_start = time.time()
        actual_pid = os.getpid()
        all_data = self.all_data
        offset = self.nb_workers
        self.logger.debug(u'Worker spawned as process with pid %d' % actual_pid)
        
        os.nice(1)
        
        all_objs = {u'hosts': {}, u'services': {}}
        date = datetime.utcnow()
        
        hosts = all_data[u'hosts']
        services = all_data[u'services']
        
        self.logger.info(u'Preparing elements to save')
        
        # Prepare the encoding for all managed hosts
        i = -1
        nb_host_cluster_to_save = 0
        for h_key in hosts:
            # Only manage the worker id element of the offset (number of workers)
            # elements
            i += 1
            if (i % offset) != worker_id:
                continue
            nb_host_cluster_to_save += 1
            h = hosts[h_key]
            key = u'HOST-%s' % h_key
            val = cPickle.dumps(h, protocol=cPickle.HIGHEST_PROTOCOL)
            val2 = base64.b64encode(val)
            # We save it in the Gridfs for hosts
            all_objs[u'hosts'][key] = {u'_id': key, u'value': val2, u'date': date}
        
        i = -1
        nb_service_to_save = 0
        for s_key in services:
            i += 1
            # Only manage the worker id element of the offset (number of workers)
            # elements
            if (i % offset) != worker_id:
                continue
            nb_service_to_save += 1
            s = services[s_key]
            key = u'SERVICE-%s' % s_key
            # space are not allowed in a key.. so change it by SPACE token
            key = key.replace(u' ', u'SPACE')
            val = cPickle.dumps(s, protocol=cPickle.HIGHEST_PROTOCOL)
            val2 = base64.b64encode(val)
            all_objs[u'services'][key] = {u'_id': key, u'value': val2, u'date': date}
        
        serialize_end_time = time.time()
        self.logger.info(u'Took %s to prepare %d hosts/clusters and %d checks' % (self.logger.format_duration(serialize_end_time - worker_start), nb_host_cluster_to_save, nb_service_to_save))
        
        # Re init the mongodb connexion for new process
        self.create_connections(self.logger, verbose=False)
        
        self.logger.info(u'Took %s to connect to Mongo' % self.logger.format_duration(time.time() - serialize_end_time))
        
        self._save_into_mongo(nb_host_cluster_to_save, all_objs[u'hosts'], u'hosts/clusters', self.hosts_retention_collection)
        self._save_into_mongo(nb_service_to_save, all_objs[u'services'], u'checks', self.services_retention_collection)
        
        # Do not fsync the data, as it will block other workers. Disk write will wait that the mongo
        # will automatically do it, by default every 60s.
        
        self.mongo_client.disconnect()
        self.hosts_retention_collection = self.services_retention_collection = None
        if self.in_debug_mode:
            self.logger.debug(u'Memory usage after saving :[%s]Mo' % get_memory_consumption()[0])
        
        # On the worker exit, we ask to clone all ssh tunnel
        # NOTE: in worker, at exit calls won't be executed (thanks multiprocessing...)
        mongo_by_ssh_mgr.close_all_tunnels()
        
        self.logger.info(u'Worker ended in %s' % (self.logger.format_duration(time.time() - worker_start)))
        os._exit(0)
        # Return and so quit this sub-process
        return 0
    
    
    def _save_into_mongo(self, nb_elements_to_save, elements, element_type, collection):
        start_time = time.time()
        nb_elements_saved = 0
        if nb_elements_to_save != 0:
            self.logger.info(u'%s will be saved in groups of maximum %s' % (element_type, CHUNKS_SIZE))
            elements_stacks = list(chunks(elements.values(), CHUNKS_SIZE))
            for stack in elements_stacks:
                bulk_start_time = time.time()
                collection.replace_many(stack, upsert=True)
                nb_elements_saved += len(stack)
                self.logger.info(u'Saved %d/%d %s ( took %s )' % (nb_elements_saved, nb_elements_to_save, element_type, self.logger.format_duration(time.time() - bulk_start_time)))
        self.logger.info(u'Took %s to save %d %s' % (self.logger.format_duration(time.time() - start_time), nb_elements_saved, element_type))
    
    
    #################################
    # END SAVE WORKER CODE
    #################################
    
    def _kill_process(self, proc_entry, reason):
        # type: (Dict[unicode, Any], unicode) -> None
        process = proc_entry[u'process']
        if process is None:  # already did
            return
        proc_pid = process.pid
        worker_id = proc_entry[u'worker_id']
        try_nb = proc_entry[u'try']
        self.save_workers_logger.warning(u'The worker %d (pid:%d | try:%d) is restarting : %s' % (worker_id, proc_pid, try_nb, reason))
        process.terminate()
        # Give a small time to exit
        time.sleep(0.5)
        # And in all case: ATOMISE THIS
        try:
            os.kill(proc_pid, 9)
        except Exception:  # maybe was not exist
            pass
        process.join(0.1)
        proc_entry[u'process'] = None
    
    
    # We are just creating the process without launching it
    def _prepare_worker_process(self, worker_id, worker_name_template):
        # type: (int, str) -> Process
        
        proc = Process(target=self.save_job, args=(worker_id, os.getpid(), worker_name_template % worker_id))
        return proc
    
    
    def launch_and_check_workers(self):
        # type: () -> None
        
        try:
            self._really_launch_and_check_workers()
        except Exception:
            self.save_workers_logger.print_stack()
            sys.exit(2)
    
    
    def _really_launch_and_check_workers(self):
        # type: () -> None
        gc.disable()
        self.scheduler_daemon.sched_daemon.http_daemon.shutdown(quiet=True)
        self.save_workers_logger.debug(u'closing self.scheduler_daemon.sched_daemon.http_daemon')
        processes = {}
        start = time.time()
        
        # ATOMIZATION
        # here before launch any worker we clean all our mapped memory from scheduler
        # If you don't clean mem here the copy on write will duplicate the scheduler memory because scheduler will continue this work
        # We do not exec do remove all because we want keep self.all_data with all data to save in retention
        self.in_debug_mode = self.scheduler_daemon.sched_daemon.debug
        # NOTE: we are creating this template BEFORE _prepare_worker_process because we will delete self.scheduler_daemon just after
        worker_name_template = u'%s [ - %s - worker %s ]' % (self.scheduler_daemon.sched_daemon.daemon_display_name, self.get_name(), u'%d')
        
        if self.in_debug_mode:
            self.save_workers_logger.debug(u'[prepare worker] memory usage before cleaning memory :[%s]Mo' % get_memory_consumption()[0])
        start_atomization = time.time()
        
        mr = MacroResolver()
        for k in mr._Borg__shared_state.keys():
            del mr._Borg__shared_state[k]
        
        for t in (u'hostgroups', u'services', u'hosts', u'notificationways', u'checkmodulations', u'macromodulations', u'contacts', u'contactgroups', u'servicegroups', u'timeperiods', u'commands'):
            map(self.atomize, (i for i in getattr(self.scheduler_daemon, t)))
        
        proxyitemsmgr.refresh_items([])
        proxyitemsgraph.reset()
        
        c4 = self.scheduler_daemon.sched_daemon.conf
        self.atomize(c4)
        self.atomize(self.scheduler_daemon.sched_daemon)
        self.atomize(self.scheduler_daemon)
        
        malloc_trim(False)
        
        self.save_workers_logger.log_perf(start_atomization, u'MongodbRetention', u'atomization duration')
        if self.in_debug_mode:
            self.save_workers_logger.debug(u'[prepare worker] memory usage after cleaning memory :[%s]Mo' % (get_memory_consumption()[0]))
        
        finished_workers = set()
        
        for worker_id in xrange(self.nb_workers):
            processes[worker_id] = {u'worker_id': worker_id, u'process': None, u'pid': -1, u'start_time': 0.0, u'try': 0}
        
        while True:  # will be timeout or void
            now = time.time()
            
            # Did finish well
            if len(finished_workers) == self.nb_workers:  # No more process: we did finish
                return
            
            # Not good, Global timeout reach too late
            if now > start + self.worker_timeout:
                for (worker_id, proc_entry) in processes.items():
                    self._kill_process(proc_entry, u'worker did not exit on time ( global timeout %ds ).' % self.worker_timeout)
                self.save_workers_logger.error(u'some workers did fail to exit or encountered an error. The retention save can be incomplete.')
                self._forced_system_exit(2)
            
            # Join the process that did finished
            for (worker_id, proc_entry) in processes.items():
                if worker_id in finished_workers:
                    continue
                process = proc_entry[u'process']
                if process is None:  # was not started, will be done after
                    continue
                # Ok was started, try to join it
                process.join(0.1)
                if process.is_alive():  # if the process is still alive, timeout will manage it
                    continue
                return_code = process.exitcode
                if return_code == 0:  # all was well, we can skip this worker now
                    finished_workers.add(worker_id)
                    self.save_workers_logger.info(u'The worker %d successfully ended ( after %d tries )' % (worker_id, proc_entry[u'try']))
                
                elif return_code == 2:
                    self._kill_process(proc_entry, u'worker cannot connect to Mongo')
                
                else:  # ok was not a success, retry it
                    self._kill_process(proc_entry, u'worker %d FAILED' % worker_id)
            
            # Start the process that need it
            for (worker_id, proc_entry) in processes.items():
                if worker_id in finished_workers:  # already finished, skip it
                    continue
                if proc_entry[u'process'] is None:  # No process, start one
                    proc_entry[u'try'] += 1
                    if proc_entry[u'try'] > self.mongo_conf.auto_reconnect_max_try:
                        self.save_workers_logger.error(u'Too many tries failed')
                        self._forced_system_exit(2)
                    if self.enable_sub_processes_memory_usage_protection:
                        is_fork_possible = mem_wait_for_fork_possible(u'%s' % self.get_name(), reserved_memory=self.sub_process_memory_usage_system_reserved_memory, retry_time=self.sub_processes_memory_usage_protection_max_retry_time)
                        if not is_fork_possible:
                            for _proc_entry in processes.values():
                                self._kill_process(_proc_entry, u'not enough memory to start worker')
                            # Will be catch by create_queues => try_instance_init
                            self.save_workers_logger.error(u'Cannot start the %s worker process as there is not enough memory' % self.get_name())
                            self._forced_system_exit(2)
                    
                    try:
                        proc = self._prepare_worker_process(worker_id, worker_name_template)
                        if self.in_debug_mode:
                            self.save_workers_logger.debug(u'[prepare worker] memory usage before worker fork :[%s]Mo' % (get_memory_consumption()[0]))
                        proc.start()
                        proc_entry[u'process'] = proc
                        proc_entry[u'pid'] = proc.pid
                        proc_entry[u'start_time'] = time.time()
                        self.save_workers_logger.info(u'Starting worker %d with pid %d. Try: [ %d ], max time allowed [ %ds ]' % (worker_id, proc_entry[u'pid'], proc_entry[u'try'], self.worker_one_try_timeout))
                    except Exception as exp:
                        self.save_workers_logger.error(u'Cannot start the worker %d process: %s. Exiting the retention save, killing all currently launched workers' % (worker_id, exp))
                        for _proc_entry in processes.values():
                            self._kill_process(_proc_entry, u'cannot start worker')
                        self._forced_system_exit(2)
                    
            
            # worker timeout, not great
            for (worker_id, proc_entry) in processes.items():
                if worker_id in finished_workers:
                    continue
                if now > proc_entry[u'start_time'] + self.worker_one_try_timeout:
                    self._kill_process(proc_entry, u'worker did not exit on time ( %ds ).' % self.worker_one_try_timeout)
            
            time.sleep(0.1)
    
    
    @staticmethod
    def atomize(obj):
        # type: (Any) -> None
        if hasattr(obj, u'properties'):
            for k in obj.properties.keys():
                try:
                    delattr(obj, k)
                except:
                    pass
        if hasattr(obj, u'running_properties'):
            for k in obj.running_properties.keys():
                try:
                    delattr(obj, k)
                except:
                    pass
        for k in obj.__dict__.keys():
            delattr(obj, k)
    
    
    #################################
    # HOOK PART
    #################################
    def hook_save_retention(self, daemon):
        # type: (Scheduler) -> bool
        """
        main function that is called in the retention creation pass
        """
        # Save the scheduler daemon as we will need it in the sub-process
        self.scheduler_daemon = daemon
        try:
            _cpu_count = cpu_count()
        except NotImplementedError:
            _cpu_count = self.max_number_of_workers
        
        self.nb_workers = min(self.max_number_of_workers, _cpu_count)
        
        t0 = time.time()
        all_data = daemon.get_retention_data()
        save_global_logger = self.logger.get_sub_part(u'SAVE GLOBAL')
        save_global_logger.info(u'Starting to save retention with %s worker(s). [ %d:hosts/clusters ] [ %d:checks ] ( Database used = %s, use ssh = %s ), max time allowed for the save %d seconds' % (
            self.nb_workers, len(all_data[u'hosts']), len(all_data[u'services']), self.mongo_conf.uri, self.mongo_conf.use_ssh_tunnel, self.worker_timeout))
        
        self.all_data = all_data  # so worker process will be able to get it
        
        # We make a process here to clean the memory before launch worker
        _section_string = get_section_string(u'SAVE GLOBAL')
        try:
            p = Process(target=self.launch_and_check_workers)
            p.start()
            p.join()
        except OSError as exc:
            message = u'%s FAILED Retention could not be saved in mongodb : %s' % (_section_string, exc.strerror)
            self.all_data = None
            raise Exception(message)
        
        if p.exitcode != 0:
            mongo_connection_message = u' because mongo is unreachable' if not self.check_connection() else u''
            message = u'%s FAILED Retention could not be saved in mongodb%s. Total time %.2fs' % (_section_string, mongo_connection_message, time.time() - t0)
            self.all_data = None
            raise Exception(message)
        else:
            save_global_logger.info(u'Retention was saved into mongodb. Total time %.2fs' % (time.time() - t0))
        
        # The save was a success, allow the other module to run too
        self.all_data = None
        return True
    
    
    # Should return if it succeed in the retention load or not
    def hook_load_retention(self, daemon):
        # type: (Scheduler) -> bool
        
        if self.hosts_retention_collection is None or not self.check_connection():
            try:
                self.create_connections(self.load_logger)
            except (ShinkenMongoException, OSError) as exc:
                self.load_logger.error(u'FAILED Retention could not be loaded from mongodb: %s' % (getattr(exc, u'message', unicode(exc))))
                return False
        
        retention_start_time = time.time()
        # The scheduler can already have a retention cache (if it already have element before) so maybe we won't have to load all elements
        instances_uuids_to_get = daemon.get_instances_uuids_to_restore_retention()
        
        # Hosts / clusters
        host_clusters_uuids_to_load = instances_uuids_to_get[u'hosts'][u'to_load']
        nb_hosts_clusters_in_scheduler = instances_uuids_to_get[u'hosts'][u'total']
        data_hosts_clusters = {}
        nb_hosts_clusters_to_load = len(host_clusters_uuids_to_load)
        nb_hosts_clusters_loaded = 0
        
        self.hosts_clusters_load_logger.info(u'Scheduler has %d/%d hosts/clusters in its cache and need load retention for %d/%d' % (
            nb_hosts_clusters_in_scheduler - nb_hosts_clusters_to_load, nb_hosts_clusters_in_scheduler, nb_hosts_clusters_to_load, nb_hosts_clusters_in_scheduler))
        
        if nb_hosts_clusters_to_load != 0:
            
            _host_packs_list = split_list_by_pack(host_clusters_uuids_to_load, self.size_chunk_to_load)
            for _host_pack in _host_packs_list:
                try:
                    tmp_found_hosts = self.hosts_retention_collection.find({u'_id': {u'$in': [u'HOST-%s' % _host_uuid for _host_uuid in _host_pack]}}, modifiers={u'$maxTimeMS': self.mongo_timeout})
                except Exception as exp:
                    self.hosts_clusters_load_logger.error(u'error querying hosts/clusters entries: %s. Module exiting.' % exp)
                    return False
                
                for item in tmp_found_hosts:
                    val = item.get(u'value', None)
                    if val is None:
                        continue
                    val = base64.b64decode(val)
                    val = SafeUnpickler.loads(val, u'retention value from mongodb: host/cluster %s' % item[u'_id'])
                    data_hosts_clusters[item[u'_id'][5:]] = val  # to remove the HOST-
                    nb_hosts_clusters_loaded += 1
            missing_retention_message = u' : %d hosts/clusters have no retention ( may be new elements )' % (nb_hosts_clusters_to_load - nb_hosts_clusters_loaded) if nb_hosts_clusters_loaded < nb_hosts_clusters_to_load else u''
            self.hosts_clusters_load_logger.info(
                u'Took %s to load %d/%d hosts/clusters%s' % (self.hosts_clusters_load_logger.format_duration(time.time() - retention_start_time), nb_hosts_clusters_loaded, nb_hosts_clusters_to_load, missing_retention_message))
        host_loaded_time = time.time()
        
        # Services/Checks:
        service_uuids_to_get = instances_uuids_to_get[u'services'][u'to_load']
        nb_services_in_scheduler = instances_uuids_to_get[u'services'][u'total']
        data_services = {}
        nb_services_to_load = len(service_uuids_to_get)
        nb_services_loaded = 0
        
        self.checks_load_logger.info(u'Scheduler has %d/%d checks in its cache and need load retention for %d/%d' % (nb_services_in_scheduler - nb_services_to_load, nb_services_in_scheduler, nb_services_to_load, nb_services_in_scheduler))
        
        if nb_services_to_load != 0:
            _service_packs_list = split_list_by_pack(service_uuids_to_get, self.size_chunk_to_load)
            
            for _service_pack in _service_packs_list:
                try:
                    tmp_found_services = self.services_retention_collection.find({u'_id': {u'$in': [u'SERVICE-%s' % _service_id for _service_id in _service_pack]}}, modifiers={u'$maxTimeMS': self.mongo_timeout})
                except Exception as exp:
                    self.checks_load_logger.error(u'error querying checks entries: %s. Module exiting.' % exp)
                    return False
                
                for item in tmp_found_services:
                    val = item.get(u'value', None)
                    if val is not None:
                        val = base64.b64decode(val)
                        val = SafeUnpickler.loads(val, u'retention value from mongodb: check %s' % item[u'_id'])
                        data_services[item[u'_id'][8:]] = val  # to remove the SERVICE- part
                        nb_services_loaded += 1
            missing_retention_message = u' : %d checks have no retention ( may be new elements )' % (nb_services_to_load - nb_services_loaded) if nb_services_loaded < nb_services_to_load else u''
            self.checks_load_logger.info(u'Took %s to load %d/%d checks%s' % (self.checks_load_logger.format_duration(time.time() - host_loaded_time), nb_services_loaded, nb_services_to_load, missing_retention_message))
        
        retention_loaded_time = time.time()
        nb_elements_loaded = nb_hosts_clusters_loaded + nb_services_loaded
        nb_elements_to_load = nb_hosts_clusters_to_load + nb_services_to_load
        missing_retention_message = u' : %d elements have no retention ( may be new elements )' % (nb_elements_to_load - nb_elements_loaded) if nb_elements_loaded < nb_elements_to_load else u''
        self.load_logger.info(u'Took %s to load %d/%d elements %s' % (self.load_logger.format_duration(retention_loaded_time - retention_start_time), nb_elements_loaded, nb_elements_to_load, missing_retention_message))
        
        daemon.restore_retention_data({u'hosts': data_hosts_clusters, u'services': data_services})
        
        self.load_logger.info(u'Took %s to restore data to Scheduler' % self.load_logger.format_duration(time.time() - retention_loaded_time))
        return True
    
    
    def hook_delete_old_retention(self, _daemon):
        # type: (Scheduler) -> None
        
        if self.hosts_retention_collection is None or not self.check_connection():
            self.create_connections(self.delete_logger)
        
        _start_time = time.time()
        now = datetime.utcnow()
        time_delta = now - timedelta(days=self.nb_of_retention_day)
        _where = {u'date': {u'$lte': time_delta}}
        self.delete_logger.info(u'Checking old elements ( hosts/clusters/checks ) not updated since %d days -> %s UTC' % (self.nb_of_retention_day, time_delta.strftime(u'%Y-%m-%d %H:%M')))
        nb_hosts_to_del = total_hosts_to_del = self.hosts_retention_collection.find(_where, {u'_id': 1}, modifiers={u'$maxTimeMS': self.mongo_timeout}, only_count=True)
        if total_hosts_to_del != 0:
            self.delete_logger.debug(u' - Deleting %d hosts/clusters from old retention [ %d by %d ]' % (nb_hosts_to_del, self.size_chunk_to_delete, self.size_chunk_to_delete))
            while nb_hosts_to_del > 0:
                nb_hosts_to_del = self._massive_delete_in_mongo(self.hosts_retention_collection, nb_hosts_to_del, _where)
            self.delete_logger.info(u' - %d hosts/clusters deleted in %s' % (total_hosts_to_del, self.delete_logger.format_duration(time.time() - _start_time)))
        
        _after_host_time = time.time()
        
        nb_services_to_del = total_services_to_del = self.services_retention_collection.find(_where, {u'_id': 1}, modifiers={u'$maxTimeMS': self.mongo_timeout}, only_count=True)
        if total_services_to_del != 0:
            self.delete_logger.debug(u' - Deleting %d checks from old retention [ %d by %d ]' % (nb_services_to_del, self.size_chunk_to_delete, self.size_chunk_to_delete))
            while nb_services_to_del > 0:
                nb_services_to_del = self._massive_delete_in_mongo(self.services_retention_collection, nb_services_to_del, _where)
            self.delete_logger.info(u' - %d checks deleted in %s' % (total_services_to_del, self.delete_logger.format_duration(time.time() - _after_host_time)))
        
        if total_services_to_del == 0 and total_hosts_to_del == 0:
            self.delete_logger.info(u' - There is no data to delete')
        
        self.delete_logger.info(u'Total time for deleting %d old elements = %s' % (total_services_to_del + total_hosts_to_del, self.delete_logger.format_duration(time.time() - _start_time)))
    
    
    def do_loop_turn(self):
        # type: () -> None
        super(MongodbRetentionScheduler, self).do_loop_turn()
    
    
    def loop_turn(self):
        # type: () -> None
        super(MongodbRetentionScheduler, self).loop_turn()
