#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2013-2019
# This file is part of Shinken Enterprise, all rights reserved.

import datetime
import httplib
import json
import optparse
import random
import ssl
import sys
import time
from optparse import Option, _builtin_cvt, check_choice, OptionParser

from shinken.log import DEFAULT_LONG_FLUSH_STATS
from shinken.misc.type_hint import Dict, Any, Tuple, Optional, NoReturn, List
from shinken.modules.base_module.basemodule import ModuleState
from shinken.runtime_stats.threads_dumper import watchdog_fatal_status, WATCH_DOG_STATUS_CODE
from shinken.safepickle import SERIALIZATION_SECURITY_STATUS_CODE, serialization_security_container
from shinkensolutions.shinken_time_helper import print_human_readable_period, DisplayFormat as TimeDisplayFormat

API_VERSION = '2.12'

LONG_OUTPUT_BREAK = u'\n'
NEW_LINE = u'<br/><br/>'
BREAK_LINE = u'<br/>'
NEW_LINE_OUTPUT_AFTER_TABLE = u'<br/>'

EXECUTOR_LOAD_LIMIT = 0.95  # %
to_print = u''

random.seed()


def check_builtin(option, opt, value):
    (cvt, what) = _builtin_cvt[option.type]
    try:
        if value == u'':
            return option.default
        return cvt(value)
    except ValueError:
        raise ParseOptionError(
            u'option %s: invalid %s value: %r' % (opt, what, value))


class ShinkenOption(Option):
    TYPE_CHECKER = {
        u'int'    : check_builtin,
        u'long'   : check_builtin,
        u'float'  : check_builtin,
        u'complex': check_builtin,
        u'choice' : check_choice,
    }


# Allow \n in the parser output
class ShinkenParser(optparse.OptionParser):
    def __init__(self,
                 usage=None,
                 option_list=None,
                 option_class=ShinkenOption,
                 version=None,
                 conflict_handler=u'error',
                 description=None,
                 formatter=None,
                 add_help_option=True,
                 prog=None,
                 epilog=None
                 ):
        OptionParser.__init__(self, usage, option_list, option_class, version, conflict_handler, description, formatter, add_help_option, prog, epilog)


class ParseOptionError(SyntaxError):
    pass


class NotFoundException(BaseException):
    pass


class RaiseOnExitOptionParser(ShinkenParser):
    def exit(self, status=0, msg=None):
        raise ParseOptionError(msg)


class COLOR(object):
    BLACK = u'#000000'
    GREEN = u'#2A9A3D'
    RED = u'#FF0000'
    ORANGE = u'#f57700'
    GRAY = u'#808080'
    BLUE = u'#4242DB'
    
    DEFAULT_COLOR = BLACK


class HYPERVISOR(object):
    HYPERV = 'hyper-v'
    VMWARE = 'vmware'
    KVM = 'kvm'


# NOTE: don't know how to make it a class/union without breaking the HYPERVISOR class
HYPERVISOR_DISPLAY_NAME = {
    HYPERVISOR.HYPERV: 'HyperV',
    HYPERVISOR.VMWARE: 'VMWare',
    HYPERVISOR.KVM   : 'Kvm'
}


class CPU_STOLEN_NAME(object):
    VPTR = 'vptr'
    LPTR = 'lptr'
    CPU_READY = 'vmware_ready_stat'
    CPU_STEAL = 'cpu_steal'


class EXIT_STATUS(object):
    OK = 0
    WARNING = 1
    CRITICAL = 2
    UNKNOWN = 3


class Result(object):
    def __init__(self):
        self.status = EXIT_STATUS.OK
        self.warnings = []
        self.criticals = []
        self.outputs = []
        self.outputs_no_sort = []
        self.titles = []
        self.long_outputs = []
        self.perf_data = {}
        self.spare_info = ''
    
    
    def set_perf_data(self, perf_data):
        self.perf_data.update(perf_data)
    
    
    def add_perf_data(self, name, value):
        self.perf_data[name] = value
    
    
    def set_logger_stats(self, data):
        if not data:
            return
        logger_stats = data.get('logger_stats', DEFAULT_LONG_FLUSH_STATS)
        is_too_long = logger_stats.get('is_too_long', False)
        write_duration = logger_stats.get('write_duration', 0.0)
        log_path = logger_stats.get('log_path', '')
        # If the logger is going well, bail out
        if not is_too_long:
            return
        
        self.add_check(EXIT_STATUS.WARNING, '%s - Writing logs on disk took too much time ( worth time was %.1fs during the last minute)<br>Path: "%s"' % (HTMLTag.color_text('WARNING', COLOR.ORANGE), write_duration, log_path))
    
    
    def set_spare_info(self, data, daemon_type=None):
        if not data:
            return
        daemon_type = daemon_type or data.get('daemon_type', 'Daemon')
        spare_message = u'<div style="' \
                        u'margin: 5px 5px 5px 1px;' \
                        u'background: #0095da;' \
                        u'color: #fff;' \
                        u'padding: 1px 7px;' \
                        u'border-radius: 5px;' \
                        u'box-sizing: border-box;' \
                        u'display: inline-block;' \
                        u'display: inline-block;' \
                        u'">SPARE%s</div>'
        active_message = u'<div style="' \
                         u'margin: 5px 5px 5px 1px;' \
                         u'background: #2a9a3d;' \
                         u'color: #fff;' \
                         u'padding: 1px 7px;' \
                         u'border-radius: 5px;' \
                         u'box-sizing: border-box;' \
                         u'display: inline-block;' \
                         u'display: inline-block;' \
                         u'">RUNNING</div><br>'
        
        master_daemon = data.get('master_daemon')
        
        is_spare = data.get('spare', False)
        if is_spare:  # NOTE: the master case is less important, and only put results entry
            if master_daemon is None:  # old format daemon
                idle_message = u"This {0} is currently idle because it is configured as a Spare and its main daemon is running well.<br>It will take over another {0} when a main {0} stops working".format(
                    daemon_type.title())
            else:  # new Broker
                if master_daemon != '':
                    idle_message = u"I am the SPARE of the master daemon → {0}".format(HTMLTag.tag_value(master_daemon, COLOR.BLUE))
                else:  # useless
                    useless_message = u"No master is using this spare daemon  → {0}".format(HTMLTag.tag_value(u'UNUSED', COLOR.RED))
                    self.spare_info = spare_message % ''
                    self.hard_exit(EXIT_STATUS.CRITICAL, useless_message)
                    return
            # Ok now running or not?
            if data.get('activated', False):  # means running
                # If we have the name of the master daemon, put it
                if master_daemon:
                    _spare_message = spare_message % u' of [ %s ]' % master_daemon  # I don't know why, but the utf8 -> is not working there
                else:
                    _spare_message = spare_message % u''
                self.spare_info = u''.join((_spare_message, active_message))
            
            else:  # idle
                self.spare_info = spare_message % ''
                self.hard_exit(EXIT_STATUS.OK, idle_message)
    
    
    def add_check(self, status=EXIT_STATUS.OK, output='', long_output='', title=False, no_new_line=False):
        # type: (int, Optional[str, unicode], Optional[str, unicode], bool, bool) -> NoReturn
        if self.status < status:
            self.status = status
        
        if output:
            if not no_new_line:
                output = ''.join((output, BREAK_LINE))
            
            if title:
                self.titles.append(output)
            else:
                if status == EXIT_STATUS.CRITICAL:
                    self.criticals.append(output)
                elif status == EXIT_STATUS.WARNING:
                    self.warnings.append(output)
                else:
                    self.outputs.append(output)
                
                self.outputs_no_sort.append(output)
        if long_output:
            self.long_outputs.append(long_output)
    
    
    def add_title(self, title):
        self.add_check(output=title, title=True)
    
    
    def hard_exit(self, status, output, long_output=u''):
        # type: (int, unicode, unicode) -> None
        self.status = status
        self.outputs = [output]
        self.outputs_no_sort = []
        self.titles = []
        self.criticals = []
        self.warnings = []
        self.long_outputs = [long_output]
        self.perf_data = {}
        self.exit()
    
    
    def exit(self, sorted_by_level=True):
        global to_print
        
        if not self.titles:
            self.titles.append(BREAK_LINE)
        if sorted_by_level:
            warnings = ''
            criticals = ''
            if self.criticals:
                criticals = u''.join((u'<div class="skn-ln">%s%s</div>' % (HTMLTag.color_text(u'=> ', COLOR.RED), i) for i in self.criticals))
                criticals = HTMLTag.tag_border(criticals, COLOR.RED)
            if self.warnings:
                warnings = u''.join((u'<div class="skn-ln">%s%s</div>' % (HTMLTag.color_text(u'=> ', COLOR.ORANGE), i) for i in self.warnings))
                warnings = HTMLTag.tag_border(warnings, COLOR.ORANGE)
            
            output = u''.join((u''.join(self.titles), criticals, warnings, u''.join(self.outputs)))
        else:
            output = u''.join((u''.join(self.titles), u''.join(self.outputs_no_sort)))
        if self.long_outputs:
            output = u''.join((output, LONG_OUTPUT_BREAK, u''.join(self.long_outputs)))
        
        # print "exit status[%s] output[%s] <br>" % (self.status, output)
        tag = ''
        if self.status == EXIT_STATUS.OK:
            tag = HTMLTag.color_text(u'[OK]', COLOR.GREEN)
        elif self.status == EXIT_STATUS.WARNING:
            tag = HTMLTag.color_text(u'[WARNING]', COLOR.ORANGE)
        elif self.status == EXIT_STATUS.CRITICAL:
            tag = HTMLTag.color_text(u'[CRITICAL]', COLOR.RED)
        elif self.status == EXIT_STATUS.UNKNOWN:
            tag = HTMLTag.color_text(u'[UNKNOWN]', COLOR.BLACK)
        
        if self.perf_data is None:
            self.perf_data = {}
        
        perfdata = Result._do_perfdata(self.perf_data)
        output = Utils.add_style(output)
        to_print = u'%s%s %s' % (self.spare_info, tag, output.strip())
        if perfdata:
            to_print = u'%s| %s' % (to_print, perfdata)
        try:
            to_print = to_print.encode('utf8', 'ignore')
        except UnicodeDecodeError:
            pass
        print(to_print)
        sys.exit(self.status)
    
    
    @staticmethod
    def _do_perfdata(performances):
        return u' '.join([u'%s=%s' % (k, v) for (k, v) in performances.iteritems()])


class HTMLTag(object):
    used = False
    STYLE = u'''.skn-ctg{margin:1px;background:#DDD;padding:1px 7px;border-radius:5px;box-sizing:border-box;display:inline-block;border:1px solid} .skn-brd{margin:7px 2px;padding:5px 7px;border-radius:7px;box-sizing:border-box;border:2px solid;width:99%;word-break: break-word;}'''
    EXTRA_STYLE = u''
    EXTRA_CLASS = u''
    
    STATE_CRITICAL = u'CRITICAL'
    STATE_OK = u'OK'
    STATE_WARNING = u'WARNING'
    
    CRITICAL = u''
    OK = u''
    WARNING = u''
    
    STATE_COLOR = {
        u'FATAL'   : COLOR.RED,
        u'CRITICAL': COLOR.RED,
        u'OK'      : COLOR.GREEN,
        u'WARNING' : COLOR.ORANGE,
    }
    
    EXIT_STATUS_COLOR = {
        EXIT_STATUS.OK      : COLOR.GREEN,
        EXIT_STATUS.WARNING : COLOR.ORANGE,
        EXIT_STATUS.CRITICAL: COLOR.RED,
        EXIT_STATUS.UNKNOWN : COLOR.GRAY
    }
    
    
    @staticmethod
    def color_text(value, color=COLOR.BLACK, bold=True, italic=False):
        # type: (unicode, unicode, bool, bool) -> unicode
        _span = u'<span style="color:%(color)s;%(bold)s%(italic)s">%(value)s</span>'
        _info = {u'color': color, u'value': value, u'bold': u'', u'italic': u''}
        if bold:
            _info[u'bold'] = u'font-weight:bold;'
        if italic:
            _info[u'italic'] = u'font-style:italic;'
        return _span % _info
    
    
    @staticmethod
    def tag_border(value, color=COLOR.BLACK):
        HTMLTag.used = True
        _span = u'<table class="skn-brd %(extra_class)s" style="border-color:%(color)s; "><tr><td>%(value)s</tr></td></table>'
        return _span % {u'color': color, u'value': value, u'extra_class': HTMLTag.EXTRA_CLASS}
    
    
    @staticmethod
    def tag_value(value, color=COLOR.BLACK):
        HTMLTag.used = True
        _span = u'<span class="skn-ctg %(extra_class)s" style="color:%(color)s;border-color:%(color)s;">%(value)s</span>'
        return _span % {u'color': color, u'value': value, u'extra_class': HTMLTag.EXTRA_CLASS}
    
    
    @staticmethod
    def state_tag(state):
        return HTMLTag.tag_value(state, HTMLTag.STATE_COLOR.get(state, COLOR.DEFAULT_COLOR))
    
    
    @staticmethod
    def load_tag(load):
        if load == -1:
            return HTMLTag.tag_value('unavailable', COLOR.ORANGE)
        elif load == -2:
            return HTMLTag.tag_value('unreachable', COLOR.RED)
        elif load > EXECUTOR_LOAD_LIMIT:
            return HTMLTag.tag_value('No more CPU usable', COLOR.ORANGE)
        else:
            return HTMLTag.tag_value('Resources available', COLOR.BLACK)
    
    
    @staticmethod
    def ram_tag(ram_usage, max_ram_usage):
        if ram_usage == -1:
            return HTMLTag.tag_value('unavailable', COLOR.ORANGE)
        elif ram_usage == -2:
            return HTMLTag.tag_value('unreachable', COLOR.RED)
        elif ram_usage > max_ram_usage:
            return HTMLTag.tag_value('Limit reached', COLOR.RED)
        else:
            return HTMLTag.tag_value('normal', COLOR.BLACK)
    
    
    @staticmethod
    def cpu_queue_tag(cpu_running_queue, max_cpu_queue_per_cpu, nb_cpus):
        if cpu_running_queue == -1:
            return HTMLTag.tag_value('unavailable', COLOR.ORANGE)
        elif cpu_running_queue == -2:
            return HTMLTag.tag_value('unreachable', COLOR.RED)
        elif cpu_running_queue > (max_cpu_queue_per_cpu * nb_cpus):
            return HTMLTag.tag_value('Limit reached', COLOR.RED)
        else:
            return HTMLTag.tag_value('normal', COLOR.BLACK)


class HTMLList(object):
    used = False
    STYLE = u'.skn-ul{padding: 0; margin: 0 0 10px 25px;} .skn-ul-compact{margin-bottom: 0px;padding-left: 0px;margin-left: 25px;}'
    EXTRA_STYLE = u''
    
    
    @staticmethod
    def _list_item(item):
        return u"<li>%s</li>" % item
    
    
    @staticmethod
    def _list_header(header, special_class=''):
        if special_class != '':
            HTMLList.used = True
        if header is None:
            return u"<ul class='skn-ul %s'>" % special_class
        else:
            return "%s&nbsp;:<ul class='skn-ul %s'>" % (header, special_class)
    
    
    @staticmethod
    def _list_footer():
        return u"</ul>"
    
    
    @staticmethod
    def header_list(header, items, compact=False):
        # type: (Optional[unicode], list, bool) -> unicode
        special_class = 'skn-ul-compact' if compact else ''
        HTMLList.used = True
        return u'%(header)s%(items)s%(footer)s' % {
            'header': HTMLList._list_header(header, special_class=special_class),
            'items' : "".join(map(lambda item: HTMLList._list_item(item), items)),
            'footer': HTMLList._list_footer()}
    
    
    @staticmethod
    def simple_list(items):
        return HTMLList.header_list(None, items)
    
    
    # Just give me one bullet list
    @staticmethod
    def one_bullet_list(s):
        # type: (Optional[str, unicode]) -> unicode
        HTMLList.used = True  # Let the CSS be inserted
        return u'%s%s%s' % (HTMLList._list_header(None, special_class='skn-ul-compact'), HTMLList._list_item(s), HTMLList._list_footer())


class HTMLTable(object):
    used = False
    STYLE = u'''.skn-ict,.skn-ict td,.skn-ict th{border:1px solid #000000 !important;border-collapse:collapse !important;word-break: break-all !important;color:#000000 !important} .skn-ict{width:100% !important;} .skn-ict th{background-color:#DDDDDD !important;padding:2px !important;word-break:break-word !important} .skn-ict td{padding:2px !important;width:auto !important;font-weight:normal !important;word-break:break-word !important;background-color:#FFFFFF !important}'''
    EXTRA_STYLE = u''
    
    
    @staticmethod
    def add_extra_style(extra_style):
        HTMLTable.EXTRA_STYLE = '%s %s' % (HTMLTable.EXTRA_STYLE, extra_style)
    
    
    @staticmethod
    def generate_class_uuid():
        return 'skn-tbl-%05d' % int(random.random() * 100000)
    
    
    @staticmethod
    def table(headers, lines, title=None, left_headers=None, compact_title=False, extra_tags='', all_col_same_width=True, class_uuid='', extra_style=''):
        HTMLTable.used = True
        class_uuid = class_uuid or HTMLTable.generate_class_uuid()
        extra_tags = '' or extra_tags
        _table = []
        
        if extra_style:
            HTMLTable.add_extra_style(' '.join(extra_style))
        
        if all_col_same_width:
            _extra_style = [
                '.%s .skn-ict{table-layout: fixed;}' % class_uuid,
            ]
        else:
            _extra_style = [
                '.%s .skn-ict .skn-lfh {width: 25%%;}' % class_uuid,
            ]
        
        HTMLTable.add_extra_style(u' '.join(_extra_style))
        
        header_string = u''.join((u'<th>%s</th>' % header for header in headers))
        
        values_string = []
        for i, line in enumerate(lines):
            line_string = u''.join((u'<td>%s</td>' % l for l in line))
            if left_headers:
                line_string = u'<th class="skn-lfh">%s</th>%s' % (left_headers[i], line_string)
            values_string.append(u'<tr>%s</tr>' % line_string)
        values_string = u''.join(values_string)
        
        if title:
            _table.append(u'<div class="skn-ich">%s:</div>' % title)
            if not compact_title:
                _table.append(BREAK_LINE)
        
        _table.append(u'<div class="%s"><table class="skn-ict" %s>' % (class_uuid, extra_tags))
        if header_string:
            _table.append(u'<tr>%s</tr>' % header_string)
        _table.append(u'%s' % values_string)
        _table.append(u'</table></div>')
        
        return u''.join(_table)


class Utils(object):
    
    @staticmethod
    def _http_get_conn(full_uri, timeout, use_ssl, ssl_version=ssl.PROTOCOL_TLSv1):
        if use_ssl:
            # If we are in SSL mode, do not look at certificate too much
            # NOTE: ssl.SSLContext is only available on last python 2.7 versions
            if hasattr(ssl, 'SSLContext'):
                ssl_context = ssl.SSLContext(ssl_version)
                ssl_context.check_hostname = False
                ssl_context.verify_mode = ssl.CERT_NONE
            else:
                ssl_context = None
            
            args = {}
            if ssl_context:
                args['context'] = ssl_context
            conn = httplib.HTTPSConnection(full_uri, timeout=timeout, **args)
        else:
            conn = httplib.HTTPConnection(full_uri, timeout=timeout)
        return conn
    
    
    # First try in HTTP and if fail from the server, retry in HTTPs
    @staticmethod
    def _request_get(base_uri, uri, use_ssl=False, timeout=3):
        if base_uri.startswith('http://'):
            use_ssl = False
            base_uri = base_uri[7:-1]
        elif base_uri.startswith('https://'):
            use_ssl = True
            base_uri = base_uri[8:-1]
        
        start_time = time.time()
        conn = Utils._http_get_conn(base_uri, timeout=timeout, use_ssl=use_ssl)
        
        conn.request("GET", uri)
        r1 = conn.getresponse()
        
        buf = r1.read()
        if r1.status == 400 and not use_ssl and 'sent a plain HTTP request' in buf:
            return Utils._request_get(base_uri, uri, use_ssl=True, timeout=timeout)
        if r1.status == 404:
            conn.close()
            raise NotFoundException()
        if r1.status != 200:
            conn.close()
            raise Exception(buf)
        return buf, (time.time() - start_time)
    
    
    # First try in HTTP and if fail from the server, retry in HTTPs
    @staticmethod
    def request_get(result, base_uri, uri, use_ssl=False, raise_exp=False, timeout=3):
        try:
            return Utils._request_get(base_uri, uri, use_ssl, timeout=timeout)
        except NotFoundException:
            raise NotFoundException
        except Exception as e:
            if raise_exp:
                raise
            else:
                msg = str(e)
                if e.__class__.__name__ == 'timeout':
                    msg = 'The request timed out (%ss)' % timeout
                result.hard_exit(EXIT_STATUS.CRITICAL, 'Cannot connect to %s%s with exception : <br/> %s' % (base_uri, uri, msg))
    
    
    @staticmethod
    def print_time(_time):
        return datetime.datetime.fromtimestamp(_time).strftime('%x %X')
    
    
    @staticmethod
    def add_style(output):
        html_items = (HTMLList, HTMLTable, HTMLTag)
        
        active_item = []
        for html_item in html_items:
            if html_item.used:
                active_item.append(html_item.STYLE)
                active_item.append(html_item.EXTRA_STYLE)
                html_item.used = False
        
        if active_item:
            output = u'<style type="text/css">%s</style>%s' % (''.join(active_item), output)
        return output
    
    
    @staticmethod
    def print_human_readable_number(number):
        if sys.version_info < (2, 7):
            return str(int(number))
        else:
            return '{:,}'.format(int(number)).replace(',', ' ')
    
    
    # This is just a relay to print_human_readable_period with a specific format for checks, with integers
    @staticmethod
    def print_period(period):
        return print_human_readable_period(period, time_format='auto', display_format=TimeDisplayFormat('%dms', '%ds', '%dm', '%dh', '%d days'), separator=' ')
    
    
    @staticmethod
    def print_human_readable_size(size):
        if isinstance(size, basestring):
            try:
                size = int(size)
            except ValueError:
                return size
        
        if not isinstance(size, (float, int)):
            return size
        
        if size < 1024:
            return '%s octets' % size
        elif 1024 <= size < (1024 * 1024):
            return '%.2f Ko' % (size / 1024)
        elif (1024 * 1024) <= size < (1024 * 1024 * 1024):
            return '%.2f Mo' % (size / 1024 / 1024)
        else:
            return '%.2f Go' % (size / 1024 / 1024 / 1024)
    
    
    @staticmethod
    def print_human_readable_period(time_period, time_format='auto'):
        return print_human_readable_period(time_period, time_format)
    
    
    @staticmethod
    def print_percent(_value, _total):
        return '%0.2f' % ((_value / _total) * 100.0)


# Object that will do all the logic/verification about arbiters traces:
# * in the healtcheck
# * in a daemon check
class ArbiterTraceVerificationTree(object):
    def __init__(self):
        # type: () -> None
        self._in_conflict = False  # if the daemon was contacted by 2 distincts architectures, NOT by 2 arbiters
        self._in_architecture_names_conflict = False  # if contacted by 2 architectures, are the name the same?
        self._state = EXIT_STATUS.OK  # overall of the check result
        self._arbiters_tree = {}  # what to display
    
    
    def __detect_architecture_name_conflicts(self, master_arbiter_uuids):
        # type: (Dict) -> None
        in_architecture_names_conflict = False
        architecture_names_conflicts_uuids = {}
        
        # First: loop to detect in_architecture_names_conflict
        for (master_arbiter_uuid, arbiters) in master_arbiter_uuids.items():
            # For display, we are preparing the inner loop with arbiters, then the top level <li> with
            # an error or not based on the number master_arbiter_uuids
            for arbiter in arbiters:
                arbiter_architecture = arbiter.get('architecture_name', '')
                if arbiter_architecture:
                    if arbiter_architecture not in architecture_names_conflicts_uuids:
                        architecture_names_conflicts_uuids[arbiter_architecture] = set()
                    architecture_names_conflicts_uuids[arbiter_architecture].add(master_arbiter_uuid)
                    # DEBUG: uncomment this line to simulate the in_architecture_names_conflict behavior
                    # architecture_names_conflicts_uuids[arbiter_architecture].append(master_arbiter_uuid)
        
        # Maybe two distinct atchitecture (identified by the Arbiter MASTER server) have the same name: will be a problem in detection
        for (architecture_name, _uuids) in architecture_names_conflicts_uuids.items():
            if len(_uuids) >= 2:
                in_architecture_names_conflict = True
        
        self._in_architecture_names_conflict = in_architecture_names_conflict
    
    
    def __check_arbiter_connection__get_uuids_and_conflicts(self, traces):
        # type: (List) -> (Dict, bool, bool)
        # Now what we want to be check is arbiter conflicts:
        # * arbiter MASTER & (optionnal) SPARE with the same master_arbiter_uuids is NOT a problem (normal behavior of switching)
        # * but if there is more than 1 master_arbiter_uuids then this is a CRITICAL error, strange things will happen
        master_arbiter_uuids = {}
        for _arbiter in traces:
            master_arbiter_uuid = _arbiter[u'master_arbiter_uuid'].strip()  # old versions have \n in it
            if master_arbiter_uuid not in master_arbiter_uuids:
                master_arbiter_uuids[master_arbiter_uuid] = []
            master_arbiter_uuids[master_arbiter_uuid].append(_arbiter)
        
        # architecture_names_conflicts_uuids = {}  # if two architectures are with the same name, error
        self.__detect_architecture_name_conflicts(master_arbiter_uuids)
        
        self._in_conflict = len(master_arbiter_uuids) > 1  # if there is more than one architecture, CONFLICT
        
        return master_arbiter_uuids
    
    
    def analyse_arbiter_traces(self, traces):
        # type: (List) -> None
        worse_state = EXIT_STATUS.OK  # by defualt, all is well
        
        # Now what we want to be check is arbiter conflicts:
        # * arbiter MASTER & (optionnal) SPARE with the same master_arbiter_uuids is NOT a problem (normal behavior of switching)
        # * but if there is more than 1 master_arbiter_uuids then this is a CRITICAL error, strange things will happen
        master_arbiter_uuids = self.__check_arbiter_connection__get_uuids_and_conflicts(traces)
        
        if self._in_conflict:
            worse_state = EXIT_STATUS.CRITICAL
        
        self._arbiters_tree = {}
        
        for (master_arbiter_uuid, arbiters) in master_arbiter_uuids.items():
            
            tree_entry = {u'architecture_name': u'', u'arbiters': []}
            self._arbiters_tree[master_arbiter_uuid] = tree_entry
            
            architecture_name = ''
            # For display, we are preparing the inner loop with arbiters, then the top level <li> with
            # an error or not based on the number master_arbiter_uuids
            for arbiter in arbiters:
                name = arbiter[u'name']
                arbiter_architecture = arbiter.get(u'architecture_name', '')
                if arbiter_architecture:
                    architecture_name = arbiter_architecture
                
                insert_time = arbiter[u'insert_time']
                diff_time_with_arbiter = abs(arbiter[u'diff_time_with_arbiter'])  # abs: never believe a diff time
                expire_period = arbiter[u'expire_period']
                expire_time = insert_time + expire_period
                # NOTE: take time from the server so we NEVER use the check local time (that can be wrong), and only the
                #       distant daemon state, even if WE are out of time
                # this entry was missing before v02.07.06-Patched-08 (may 2021)
                server_now = arbiter.get(u'now', int(time.time()))
                expire_in = expire_time - server_now
                last_connection_time = server_now - insert_time
                last_connection_str = u'last connection %s ago' % Utils.print_period(last_connection_time) if last_connection_time >= 0 else u''
                
                if diff_time_with_arbiter > 30:
                    _out = u'server times are different, time shift of %s' % (Utils.print_period(diff_time_with_arbiter))
                    worse_state = EXIT_STATUS.CRITICAL
                    tree_entry[u'arbiters'].append((name, _out, EXIT_STATUS.CRITICAL))
                elif expire_in < 0:
                    _out = u'Missed connection from arbiter since %s ( > daemon check_interval * max_check_attempts )' % (Utils.print_period(last_connection_time))
                    tree_entry[u'arbiters'].append((name, _out, EXIT_STATUS.WARNING))
                    if worse_state == EXIT_STATUS.OK:  # do not change if was CRITICAL
                        worse_state = EXIT_STATUS.WARNING
                else:
                    _out = u'%s' % (last_connection_str)
                    tree_entry[u'arbiters'].append((name, _out, EXIT_STATUS.OK))
            
            if not architecture_name:
                architecture_name = u'( too old Shinken Enterprise version, cannot have architecture name )'
            tree_entry[u'architecture_name'] = architecture_name
        
        self._state = worse_state
    
    
    def get_in_conflict(self):
        return self._in_conflict
    
    
    def get_in_architecture_names_conflict(self):
        return self._in_architecture_names_conflict
    
    
    def get_state(self):
        return self._state
    
    
    def get_arbiters_tree(self):
        return self._arbiters_tree


class ShinkenUtils(object):
    
    @staticmethod
    def request_get_daemon(result, daemon_type, base_uri, uri, use_ssl=False, timeout=3):
        try:
            return Utils.request_get(result, base_uri, uri, use_ssl, timeout=timeout)
        except NotFoundException:
            result.hard_exit(EXIT_STATUS.CRITICAL, 'Cannot connect to uri : "%s%s". %sThe uri does not exists (error 404)' % (base_uri, uri, BREAK_LINE))
        except Exception as e:
            result.hard_exit(EXIT_STATUS.CRITICAL, 'Cannot connect to %s daemon at %s' % (daemon_type, base_uri), 'cause by : %s' % e)
    
    
    @staticmethod
    def add_module_info(result, data):
        module_infos = data.get('modules_info', {})
        if not module_infos:
            return ''
        
        lines = []
        status = EXIT_STATUS.OK
        for module_info in module_infos:
            submodules = module_info.get('modules', [])
            submodules_info = '-'
            if submodules:
                sub = ['%s : %s' % (i['name'], HTMLTag.state_tag(i['status'])) for i in submodules]
                submodules_info = HTMLList.simple_list(sub)
            
            last_restart = module_info.get('last_restart', {})
            last_restart = Utils.print_time(last_restart['timestamp']) if last_restart else ''
            module_state = HTMLTag.STATE_WARNING if module_info.get('nb_restart', 0) and module_info['status'] == ModuleState.OK else module_info['status']
            
            if module_info['status'] != 'OK':
                status = EXIT_STATUS.WARNING
            
            lines.append((module_info['name'], module_info['type'], HTMLTag.state_tag(module_state), module_info.get('nb_restart', 0), last_restart, submodules_info))
        
        long_output = HTMLTable.table(('Name', 'Type', 'Status', 'Restart in the last 2h', 'Last restart date', 'Submodules'), lines, 'Module info')
        result.add_check(status=status, long_output=long_output)
    
    
    @staticmethod
    def add_http_error_count_message(result, data):
        http_errors_count = data.get('http_errors_count', {})
        if http_errors_count:
            result.add_check(EXIT_STATUS.WARNING, "Some API calls between daemons failed in the last 24 hours (%d errors). Please look at your daemon logs for more details about these errors.%s" % (sum(http_errors_count.values()), BREAK_LINE))
    
    
    @staticmethod
    def add_warning_module_restart(result, data):
        modules_info = data.get('modules_info', [])
        
        restart_module = []
        for module_info in modules_info:
            module_restarts = module_info.get('restarts', [])
            if len(module_restarts) <= 0:
                continue
            
            # Manage restarts stored in the old format : [ts1, ts2, ts3]
            if isinstance(module_restarts[-1], float):
                module_restarts = [{'timestamp': ts, 'reason': "The reason for this restart was not saved"} for ts in module_restarts]
            
            limit_dt = datetime.datetime.now() - datetime.timedelta(minutes=120)
            restart_count = len(filter(lambda i: datetime.datetime.fromtimestamp(i['timestamp']) > limit_dt, module_restarts))
            module_info['nb_restart'] = restart_count
            module_info['last_restart'] = module_restarts[-1]
            if restart_count > 0:
                restart_module.append('The module %s has restarted [%s times]. A restart is removed from count after 2h.' % (module_info['name'], restart_count))
        
        if restart_module:
            result.add_check(EXIT_STATUS.WARNING, output=HTMLList.header_list('Some modules have restarted since the last 2h', restart_module))
    
    
    @staticmethod
    def check_arbiter_connection(result, uri, daemon_type, timeout=3):
        # type: (Result, unicode, unicode, int) -> None
        data, _ = ShinkenUtils.request_get_daemon(result, daemon_type, uri, '/arbiter_traces_get', timeout=timeout)
        traces = json.loads(data)
        if len(traces) == 0:
            # Not yet contacted by an arbiter.
            result.hard_exit(EXIT_STATUS.WARNING, u'Daemon has not been contacted by an arbiter for now.')
            return
        
        verification_tree = ArbiterTraceVerificationTree()
        verification_tree.analyse_arbiter_traces(traces)
        
        arbiters_tree = verification_tree.get_arbiters_tree()
        in_conflict = verification_tree.get_in_conflict()
        in_architecture_names_conflict = verification_tree.get_in_architecture_names_conflict()
        state = verification_tree.get_state()
        
        master_arbiter_uuid_outputs = []
        for master_arbiter_uuid, tree_entry in arbiters_tree.items():
            architecture_name = tree_entry['architecture_name']
            
            arbiter_outputs = []
            for arbiter_entry in tree_entry['arbiters']:
                # 0: name, 1: txt, 2: status, 3: arbiter status
                prefix = '%s ' % HTMLTag.tag_value(u'ERROR', COLOR.RED) if arbiter_entry[2] != EXIT_STATUS.OK else ''
                arbiter_outputs.append(u'%s%s => <span style="color:purple">%s</span>' % (prefix, arbiter_entry[0], arbiter_entry[1]))
            if in_architecture_names_conflict:  # If the name is not uniq, help the user to find it
                arbiter_outputs.append(u'defined by the arbiter MASTER on the server with uuid %s' % master_arbiter_uuid)
            architecture_header = u'Architecture <i>%s</i>' % architecture_name
            master_arbiter_ouput = HTMLList.header_list(architecture_header, arbiter_outputs) + '<br/>'
            master_arbiter_uuid_outputs.append(master_arbiter_ouput)
        
        title = u'Arbiters connection:' if not in_conflict else HTMLTag.tag_value(u'Arbiters CONFLICT ( contacted by Arbiters from different architectures )', COLOR.RED)
        output = HTMLList.header_list(title, master_arbiter_uuid_outputs)
        
        # We have architecture_name conflict, we need to help the user about how to find the servers
        if in_architecture_names_conflict:
            output += u'NOTE:<i><ul><li>Some Arbiter MASTER on different servers have the same architecture name in their architecture_export module.</li><li>Look at "/var/lib/shinken/server.uuid" to identify them</li></ul></i>'
        
        # Only display architecture if there is some conflicts
        if state != EXIT_STATUS.OK:
            result.add_check(state, output)
    
    
    @staticmethod
    def _check_versions(daemon_type, daemon_api_version, daemon_version, arbiter_version, shinken_supervisor_version):
        # type: (unicode, unicode, unicode, unicode, unicode) -> Optional[unicode]
        # Let's check first the API version
        if daemon_api_version and daemon_api_version != API_VERSION:
            if daemon_version and shinken_supervisor_version:
                return u'Your %s is alive but this daemon (%s) and the Shinken installation (%s) that monitors it are not in the same version.' % (
                    daemon_type, get_version_for_output(daemon_version), get_version_for_output(shinken_supervisor_version))
            return u'Your %s is alive but not up to date. Please update.' % daemon_type
        
        if not daemon_version or not daemon_api_version:
            return u'Your %s is alive but not up to date. Please update.' % daemon_type
        
        if daemon_type != u'synchronizer' and not arbiter_version:
            return u'Your %s is alive but not up to date. Please update.' % daemon_type
        
        # the synchronizer has no arbiter_version because it doesn't use 'put_conf' but it's always on the same machine as the arbiter, so we don't need to test it
        if daemon_type != u'synchronizer':
            if daemon_version != arbiter_version:
                return u'Your %s is alive but its version (%s) is not the same as its arbiter (%s). Please update.' % (daemon_type, get_version_for_output(daemon_version), get_version_for_output(arbiter_version))
        
        return None
    
    
    @staticmethod
    def minimal_check(result, data, daemon_type, shinken_supervisor_version):
        have_conf = data.get('have_conf', False)
        if not have_conf:
            output = 'No configuration given by an Arbiter for now.'
            result.hard_exit(EXIT_STATUS.WARNING, output)
        
        # If the daemon have his type in get_raw_stats, we compare it with the wanted daemon_type. Else, We pass because lower API_VERSION can be checked
        checked_daemon_type = data.get('daemon_type', None)
        if checked_daemon_type and checked_daemon_type != daemon_type:
            output = 'The daemon being checked is not a %s. This daemon is a %s. Please check the connection information (address, port) in the check configuration.' % (daemon_type, checked_daemon_type)
            result.hard_exit(EXIT_STATUS.WARNING, output)
        
        daemon_version = data.get('daemon_version', None)
        arbiter_version = data.get('arbiter_version', None)
        daemon_api_version = data.get('api_version', None)
        output = ShinkenUtils._check_versions(daemon_type, daemon_api_version, daemon_version, arbiter_version, shinken_supervisor_version)
        if output:
            result.hard_exit(EXIT_STATUS.WARNING, output)
        
        result.set_spare_info(data, daemon_type)
        result.set_logger_stats(data)  # if logger is too slow, will be WARNING
        
        # Before any test, we need to check if the daemon is dead lock, because if so, all tests
        # are meaningless
        assert_daemon_is_not_deadlocked_from_monitoring_check(result, data)
        check_daemon_serialization_security_errors(result, data)
    
    
    @staticmethod
    def _check_cpu_stolen(result, hypervisor_type, dict_value_cpu, cpu_steal_threshold_warning, cpu_steal_threshold_critical, output_in_li):
        # type: (Result, str, Dict, int, int, bool) -> NoReturn
        return_msg, exit_status, perfdatas = ShinkenUtils._compute_status_for_cpu_stolen(hypervisor_type, dict_value_cpu, cpu_steal_threshold_warning, cpu_steal_threshold_critical, output_in_li)
        # In all cases, stack the perfdatas
        for metric_name, metric_value in perfdatas.items():  # items => manage both py2 and py3
            result.add_perf_data(metric_name, metric_value)
        if exit_status is None:
            return
        no_new_line = output_in_li  # if we are asking a list output, do not force a new line too
        result.add_check(exit_status, return_msg, no_new_line=no_new_line)
    
    
    @staticmethod
    def _compute_status_for_cpu_stolen(hypervisor_type, dict_value_cpu, cpu_steal_threshold_warning, cpu_steal_threshold_critical, output_in_li=False):
        # type: (str, Dict, int, int, bool) -> Tuple[Optional[unicode], Optional[int], Dict[unicode, float]]
        exit_status = None
        return_msg = None
        perfdatas = {}
        
        _hypervisor_display = HYPERVISOR_DISPLAY_NAME.get(hypervisor_type, 'UNKNOWN')
        if hypervisor_type == HYPERVISOR.VMWARE:
            cpu_ready = dict_value_cpu[CPU_STOLEN_NAME.CPU_READY]
            message_by_status, exit_status = ShinkenUtils._compute_status_code_for_cpu_stolen(hypervisor_type, cpu_ready, cpu_steal_threshold_warning, cpu_steal_threshold_critical)
            return_msg = message_by_status + ur'<br/>&nbsp;&nbsp;&nbsp;→ On the VCenter search the data <b>CPU %ready + %costop</b><br/>&nbsp;&nbsp;&nbsp;→ Please have a look at the Shinken Enterprise documentation about advices to reduce it'
            perfdatas[u'cpu_stolen__vmware'] = u'%.2f%%' % cpu_ready
        elif hypervisor_type == HYPERVISOR.HYPERV:
            vptr = dict_value_cpu[CPU_STOLEN_NAME.VPTR]
            lptr = dict_value_cpu[CPU_STOLEN_NAME.LPTR]
            cpu_stolen_value = lptr - vptr
            message_by_status, exit_status = ShinkenUtils._compute_status_code_for_cpu_stolen(hypervisor_type, cpu_stolen_value, cpu_steal_threshold_warning, cpu_steal_threshold_critical)
            return_msg = message_by_status + u'%s LPTR: %2.f&#37; VPTR: %2.f&#37; ' % (_hypervisor_display, lptr, vptr)
        elif hypervisor_type == HYPERVISOR.KVM:
            cpu_steal = dict_value_cpu[CPU_STOLEN_NAME.CPU_STEAL]
            message_by_status, exit_status = ShinkenUtils._compute_status_code_for_cpu_stolen(hypervisor_type, cpu_steal, cpu_steal_threshold_warning, cpu_steal_threshold_critical)
            return_msg = message_by_status + u'%s CPU steal%% : %2.f&#37.' % (_hypervisor_display, cpu_steal)
        if output_in_li and exit_status == EXIT_STATUS.OK:  # we only want the <li> for ok, because on other
            # we want it in a full bloc
            return_msg = HTMLList.one_bullet_list(return_msg)
        return return_msg, exit_status, perfdatas
    
    
    @staticmethod
    def _compute_status_code_for_cpu_stolen(hypervisor_type, cpu_stolen_value, cpu_steal_threshold_warning, cpu_steal_threshold_critical):
        # type: (unicode, int, int, int) -> Tuple[Optional[unicode], Optional[int]]
        _msg_cpu_ok = u'You don\'t have a stolen cpu on your machine → '
        
        if hypervisor_type == HYPERVISOR.VMWARE:
            _msg_cpu_steal_not_ok = u'Your machine got <b>%d%% of CPU STOLEN</b> from the Hypervisor ( <i>Type %s</i> )' % (cpu_stolen_value, HYPERVISOR_DISPLAY_NAME.get(hypervisor_type, 'UNKNOWN'))
        else:
            _msg_cpu_steal_not_ok = u'Your machine got <b>CPU STOLEN</b> from the Hypervisor ( <i>Type %s</i> ) → ' % (HYPERVISOR_DISPLAY_NAME.get(hypervisor_type, 'UNKNOWN'))
        
        message_by_status = _msg_cpu_ok
        exit_status = EXIT_STATUS.OK
        if cpu_stolen_value == 0:
            message_by_status = _msg_cpu_ok
        elif cpu_stolen_value >= cpu_steal_threshold_critical:
            exit_status = EXIT_STATUS.CRITICAL
            message_by_status = _msg_cpu_steal_not_ok
        elif cpu_stolen_value >= cpu_steal_threshold_warning:
            exit_status = EXIT_STATUS.WARNING
            message_by_status = _msg_cpu_steal_not_ok
        
        return message_by_status, exit_status
    
    
    # NOTE: result != None => call from a check
    #       result == None => ?
    @staticmethod
    def check_hypervisor_vm_cpu_stolen(data, cpu_steal_threshold_warning, cpu_steal_threshold_critical, result=None, output_in_li=False):
        # type: (Dict[str, Any], int, int, Result, bool) -> Tuple[Optional[str], Optional[int], Dict]
        message_by_status = None
        exit_status = None
        perfdatas = {}
        if data.get('vmware_stats_enabled') and data.get(HYPERVISOR.VMWARE, False) and data.get('vmware_vm', False):
            dict_value_cpu = {
                CPU_STOLEN_NAME.CPU_READY: data[CPU_STOLEN_NAME.CPU_READY]
            }
            hypervisor_type = HYPERVISOR.VMWARE
        elif data.get(HYPERVISOR.HYPERV, False):
            dict_value_cpu = {
                CPU_STOLEN_NAME.VPTR: data[CPU_STOLEN_NAME.VPTR],
                CPU_STOLEN_NAME.LPTR: data[CPU_STOLEN_NAME.LPTR]
            }
            hypervisor_type = HYPERVISOR.HYPERV
        elif data.get(HYPERVISOR.KVM, False):
            dict_value_cpu = {
                CPU_STOLEN_NAME.CPU_STEAL: data[CPU_STOLEN_NAME.CPU_STEAL]
            }
            hypervisor_type = HYPERVISOR.KVM
        else:
            return message_by_status, exit_status, perfdatas
        
        if result:
            ShinkenUtils._check_cpu_stolen(result, hypervisor_type, dict_value_cpu, cpu_steal_threshold_warning, cpu_steal_threshold_critical, output_in_li=output_in_li)
        else:
            message_by_status, exit_status, perfdatas = ShinkenUtils._compute_status_for_cpu_stolen(hypervisor_type, dict_value_cpu, cpu_steal_threshold_warning, cpu_steal_threshold_critical)
            return message_by_status, exit_status, perfdatas
    
    
    # Look at raw data about the spare information:
    # is_spare yes/no? look at spare_daemon, master_daemon (if existing!)
    # NOTE: spare case is manage on the minimal check, because it can break/stops all results
    @staticmethod
    def check_master_information(data, result):
        # type: (Dict[str, Any], Result) -> Tuple[Optional[str], Optional[int]]
        
        # check only daemons that manage it(currently Brokers)
        if 'spare' not in data or 'master_daemon' not in data or 'spare_daemon' not in data:
            return None, None
        
        is_spare = data.get('spare')  # we did assert it exists
        spare_daemon = data.get('spare_daemon')
        if not is_spare:
            if spare_daemon != '':
                return_msg = u'The %s daemon is %s' % (HTMLTag.tag_value(u'SPARE', COLOR.BLUE), HTMLTag.tag_value(spare_daemon, COLOR.GREEN))
                if not data.get('spare_must_have_the_same_list_of_module_type', True):
                    return_msg = u"%s ( the broker configuration doesn't require the same list of modules on its spare )" % return_msg
            else:
                return_msg = u'This daemon do not have any spare defined'
            result.add_check(EXIT_STATUS.OK, HTMLList.one_bullet_list(return_msg), no_new_line=True)
    
    
    @staticmethod
    def get_satellites_connection(result, base_uri, timeout, raise_excepton=False):
        # type: (Result, unicode, int, Optional[bool]) -> (List, float)
        buf, connexion_time = Utils.request_get(result, base_uri, u'/check_satellites_connexion?timeout=%s' % timeout, timeout=timeout, raise_exp=raise_excepton)
        return json.loads(buf), connexion_time


HTMLTag.CRITICAL = HTMLTag.state_tag(HTMLTag.STATE_CRITICAL)
HTMLTag.WARNING = HTMLTag.state_tag(HTMLTag.STATE_WARNING)
HTMLTag.OK = HTMLTag.state_tag(HTMLTag.STATE_OK)


### Some specifics tests, some can be call by checks AND/OR healtcheck
# Before all tests, we must look if the daemon is NOT deadlocked, because if so
# all the tests are meaningless. If so, break the test and get out
def assert_daemon_is_not_deadlocked_from_monitoring_check(result, raw_stats):
    # type: (Result, Dict) -> None
    dead_lock_status = raw_stats.get(u'dead_lock', None)
    if dead_lock_status is None:  # old daemon (before 001.9, december 2021), did not ave info, so skip test
        return
    # Let the watch dog fatal object check itself if it's ok or not ^^
    dead_lock_status_code, dead_lock_output = watchdog_fatal_status.check_status_and_output_html(dead_lock_status)
    if dead_lock_status_code == WATCH_DOG_STATUS_CODE.CRITICAL:
        result.hard_exit(EXIT_STATUS.CRITICAL, dead_lock_output, u'')


def check_daemon_serialization_security_errors(result, raw_stats):
    # type: (Result, Dict) -> None
    serialization_security_errors = raw_stats.get(u'serialization_security_errors', None)
    
    if serialization_security_errors is None:  # old daemon (before feb 2022), drop the check
        return
    
    # Checking if we got security errors for the daemon
    serialization_security_status_code, serialization_security_output = serialization_security_container.check_status_and_output_html(serialization_security_errors)
    if serialization_security_status_code == SERIALIZATION_SECURITY_STATUS_CODE.CRITICAL:
        result.add_check(EXIT_STATUS.WARNING, serialization_security_output, u'')


def get_version_for_output(version):
    # type: (unicode) -> unicode
    return version if version.startswith(u'V') else u'V%s' % version
