#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2013-2019
# This file is part of Shinken Enterprise, all rights reserved.

VERSION = '0.1'
DAEMON_TYPE = 'broker'

import json

from shinkensolutions.lib_checks.common import BREAK_LINE, COLOR, EXIT_STATUS, HTMLList, HTMLTable, HTMLTag, ParseOptionError, RaiseOnExitOptionParser, Result, ShinkenUtils
from shinkensolutions.lib_checks.graphite import CheckGraphite, TAG_FOR_STATE, TAG_CRITICAL, TAG_OK, TAG_WARNING, GRAPHITE_API_VERSION, GRAPHITE_STATS_KEY, GRAPHITE_STATS_FILE_IS_TOO_OLD, NB_METRICS_COUNT_FILE
from shinken.util import format_t_into_dhms_format

result = Result()

parser = RaiseOnExitOptionParser('%prog [options] [--help]', version='%prog ' + VERSION)
parser.add_option('-H', '--hostname', dest='hostname', help='The hostname of the shinken daemon')
parser.add_option('-p', '--port', dest='port', type='int', help='The port of the shinken daemon')
parser.add_option('-t', '--timeout', dest='timeout', type='int', default=3, help='timeout to connect to the shinken daemon. Default : 3')
parser.add_option('-w', '--webui', dest='webui', default='', help='Name of the webui to check. Default : the first one')

parser.add_option('-P', '--ssh-port', dest='ssh_port', type='int', default=22, help='SSH port to connect to. Default : 22')
parser.add_option('-i', '--ssh-key', dest='ssh_key_file', default='~/.ssh/id_rsa', help='SSH key file to use. By default it will take ~/.ssh/id_rsa.')
parser.add_option('-u', '--ssh-user', dest='user', default='shinken', help='remote user to use. By default shinken.')
parser.add_option('-r', '--passphrase', dest='passphrase', default='', help='SSH key passphrase. By default will use void.')

parser.add_option('--shinkenversion', dest='shinken_supervisor_version', default='', help='This shinken version number to compare with the monitored shinken. Mandatory if shinken mode.')


def _parse_args():
    opts = None
    try:
        opts, args = parser.parse_args()
        if args:
            parser.error('Does not accept any argument.')
        
        if opts and not opts.hostname:
            parser.error('Missing parameter hostname (-H/--hostname)')
        if opts and not opts.port:
            parser.error('Missing parameter port (-p/--port)')
    
    except ParseOptionError as e:
        if e.msg:
            result.hard_exit(EXIT_STATUS.CRITICAL, 'Fail to parse command argument : %s %s' % (BREAK_LINE, BREAK_LINE.join(e.msg.split('\n'))))
        exit(0)
    
    return opts


class CheckGraphiteForReader(CheckGraphite):
    
    def __init__(self, graphite_hostname, ssh_user, ssh_passphrase, ssh_key_file, ssh_port):
        super(CheckGraphiteForReader, self).__init__(
            graphite_hostname=graphite_hostname,
            ssh_port=ssh_port,
            ssh_key_file=ssh_key_file,
            passphrase=ssh_passphrase,
            user=ssh_user,
            graphite_location=None,
            graphite_user=None,
            storage_usage_warning=None,
            storage_usage_critical=None,
            graphite_port=None,
            graphite_conf_file=None,
            graphite_process_name=None
        )
    
    
    def result_add_check(self, status, output, step_index=1, step_name='STEP'):
        self.summary.append('%s : %s' % (step_name, output))
        self.result.add_check(status)


def check_graphite_server_read(server, opts):
    check_graphite = CheckGraphiteForReader(opts.hostname, opts.user, opts.passphrase, opts.ssh_key_file, opts.ssh_port)
    check_graphite.result = Result()
    check_graphite.summary = []
    
    _step_name = 'HTTP API status'
    _version = server.get('version', None)
    _reachable = server.get('reachable', False)
    _nb_metrics = server.get('nb_metrics', None)
    _read_time = server.get(GRAPHITE_STATS_KEY.TIME_READ, -1)
    _server_time = server.get(GRAPHITE_STATS_KEY.LOCAL_TIME, None)
    
    if _reachable:
        check_graphite.result_add_check(EXIT_STATUS.OK, '%s - Can connect to graphite API.' % (TAG_OK), step_name=_step_name)
        if not _version or _version != GRAPHITE_API_VERSION:
            check_graphite.result_add_check(EXIT_STATUS.WARNING, '%s - Graphite backend is not up to date. Please update graphite backend with shinken version %s.' % (TAG_WARNING, opts.shinken_supervisor_version), step_name='HTTP API Stats')
        elif _nb_metrics is not None:
            metric_file_is_too_old = ''
            if _server_time is None or _read_time == -1:
                metric_file_is_too_old = 'The graphite stats file "%s" seems to be too old. You must look at the Gatherer log (/var/log/shinken/gatherer.log). ' \
                                         'Then, only if need, you can restart the Gatherer with "service shinken-gatherer restart".' % (NB_METRICS_COUNT_FILE)
            else:
                stats_file_age = _server_time - _read_time
                if stats_file_age > GRAPHITE_STATS_FILE_IS_TOO_OLD:
                    metric_file_is_too_old = 'The graphite stats file "%s" seems to be too old (not update since %s > %ss). You must look at the Gatherer log (/var/log/shinken/gatherer.log). ' \
                                             'Then, only if need, you can restart the Gatherer with "service shinken-gatherer restart".' % (
                                                 NB_METRICS_COUNT_FILE,
                                                 format_t_into_dhms_format(stats_file_age),
                                                 GRAPHITE_STATS_FILE_IS_TOO_OLD)
            if _server_time is not None:
                stats_file_age = _server_time - _read_time
                if stats_file_age > GRAPHITE_STATS_FILE_IS_TOO_OLD:
                    check_graphite.result_add_check(EXIT_STATUS.WARNING, metric_file_is_too_old, step_name='HTTP API Stats')
            
            check_graphite.result_add_check(EXIT_STATUS.OK, '%s - %s metrics found.' % (TAG_OK, _nb_metrics), step_name='HTTP API Stats')
            check_graphite.result.add_perf_data('nb_metrics', _nb_metrics)
        else:
            check_graphite.result_add_check(EXIT_STATUS.WARNING, '%s - Can not get stats about metrics and hosts/clusters. The graphite backend may be slow. Please check the logs on graphite backend.' % (TAG_WARNING), step_name='HTTP API Stats')
    
    else:
        check_graphite.result_add_check(EXIT_STATUS.CRITICAL, '%s - Fail to request metric to graphite server.' % (TAG_CRITICAL), step_name=_step_name)
    
    return check_graphite.result, [HTMLList.simple_list(check_graphite.summary)]


def metacheck_graphite_server(raw_stats, read_status, opts):
    headers = []
    lines = []
    known_realms = raw_stats.get('known_realms', [])
    servers = [i for i in read_status if i['realm'] in known_realms or i['realm'] == '*']
    servers_state = EXIT_STATUS.OK
    unreachable_backends = 0
    
    for server in servers:
        check_graphite_server_read_result, summary = check_graphite_server_read(server, opts)
        server_state = check_graphite_server_read_result.status
        if server_state == EXIT_STATUS.CRITICAL:
            unreachable_backends += 1
        
        headers.append('%s is %s' % (server['host'], TAG_FOR_STATE[server_state]))
        lines.append(summary)
        servers_state = max(server_state, servers_state)
        for perf_name, perf_value in check_graphite_server_read_result.perf_data.iteritems():
            result.add_perf_data(perf_name, perf_value)
    
    metacheck_graphit_server_long_output = HTMLTable.table([], lines, left_headers=headers, title='Graphite backend(s)', all_col_same_width=False)
    output = 'All graphite backends are available.'
    if servers_state == EXIT_STATUS.CRITICAL:
        if unreachable_backends == len(servers):
            output = 'All graphite backends are critical.'
        else:
            output = 'Some graphite backends are critical.'
    elif servers_state == EXIT_STATUS.WARNING:
        output = 'Some graphite backends are warning.'
    
    result.add_check(status=servers_state, output=output, long_output=metacheck_graphit_server_long_output)


def metacheck_module(raw_stats, read_status, webui_name):
    modules_state = EXIT_STATUS.OK
    known_realms = raw_stats.get('known_realms', [])
    headers = []
    lines = []
    modules_list = {}
    
    for rs in read_status:
        m_name = rs['module_name']
        realm = rs['realm']
        if m_name not in modules_list:
            modules_list[m_name] = {}
        if realm not in modules_list[m_name]:
            modules_list[m_name][realm] = []
        modules_list[m_name][realm].append(rs)
    
    for module_name, realms_infos in modules_list.iteritems():
        
        if webui_name and webui_name != module_name:
            continue
        
        module_state = EXIT_STATUS.OK
        module_summary = []
        manage_all = False
        
        if len(realms_infos) == 1 and realms_infos.get('*', None):
            manage_all = True
            useless_realm = []
            _all_hosts = realms_infos['*'][0].get('nb_hosts_clusters', None)
            if _all_hosts is None:
                _all_realms = ['%s : Can not get hosts/clusters infos. Check graphite backends part.' % TAG_WARNING]
            else:
                _all_realms = ['Contains %s hosts/clusters with metrics' % _all_hosts]
            module_summary.append(HTMLList.header_list(HTMLTag.color_text('All realms'), _all_realms))
        else:
            useless_realm = [r for r in realms_infos.iterkeys() if r not in known_realms]
        
        all_realms = set(useless_realm).union(set(known_realms))
        
        for realm_name in all_realms:
            realm_summary = []
            realm_state = EXIT_STATUS.OK
            
            # Compute exit for realm who are in backend but broker can't manage
            if realm_name in useless_realm:
                realm_state = EXIT_STATUS.WARNING
                realm_summary.append('%s : The module %s doesn\'t have any data for this realm.' % (TAG_FOR_STATE[realm_state], HTMLTag.color_text(module_name)))
            realm_name = 'All realms' if realm_name == '*' else realm_name
            
            # Take all realm info if backend is set on '*'
            info = realms_infos['*'] if manage_all else realms_infos.get(realm_name, None)
            
            if info:
                for i in info:
                    if i['reachable']:
                        state = EXIT_STATUS.OK if i.get('version', None) else EXIT_STATUS.WARNING
                        _nb_hosts = i.get('nb_hosts_clusters', None)
                        if _nb_hosts is None:
                            state = EXIT_STATUS.WARNING
                            output = 'Can not get the hosts number. Check the graphite backend.'
                        else:
                            output = 'Contains %s hosts/clusters with metrics' % _nb_hosts
                        
                        _graphite_summary = '%s : Graphite backend %s ' % (TAG_FOR_STATE[state], HTMLTag.color_text(i['host']))
                        if state != EXIT_STATUS.OK:
                            _graphite_summary += 'has some issues. Check graphite backends part.'
                        
                        if manage_all:
                            _realm_summary = _graphite_summary
                        else:
                            _realm_summary = HTMLList.header_list(_graphite_summary, [output])
                    else:
                        state = EXIT_STATUS.CRITICAL
                        _realm_summary = HTMLList.header_list('%s : Graphite backend %s. ' % (TAG_FOR_STATE[state], HTMLTag.color_text(i['host'])), ['Server is %s' % HTMLTag.color_text('unreachable', color=COLOR.RED)])
                    realm_summary.append(_realm_summary)
                    realm_state = max(realm_state, state)
            else:
                realm_state = EXIT_STATUS.CRITICAL
                realm_summary.append('%s : The module %s doesn\'t have a graphite backend for this realm.' % (TAG_FOR_STATE[realm_state], HTMLTag.color_text(module_name)))
            module_summary.append(HTMLList.header_list(HTMLTag.color_text(realm_name), realm_summary))
            module_state = max(module_state, realm_state)
        
        headers.append('%s is %s' % (module_name, TAG_FOR_STATE[module_state]))
        lines.append([HTMLList.simple_list(module_summary)])
        modules_state = max(modules_state, module_state)
    
    metacheck_graphit_server_long_output = HTMLTable.table([], lines, left_headers=headers, title='Module(s)', all_col_same_width=False)
    output = 'All modules can contact their graphite backends.'
    if modules_state == EXIT_STATUS.WARNING:
        
        output = 'Some modules have issue.'
    elif modules_state == EXIT_STATUS.CRITICAL:
        output = 'Some realms are missing for modules.'
    result.add_check(status=modules_state, output=output, long_output=metacheck_graphit_server_long_output)


def main():
    opts = _parse_args()
    
    daemon_adr = opts.hostname
    daemon_port = opts.port
    shinken_supervisor_version = opts.shinken_supervisor_version
    timeout = opts.timeout
    webui_name = opts.webui
    
    if timeout <= 0:
        result.hard_exit(EXIT_STATUS.CRITICAL, 'The --timeout option (%s) must be greater than 0' % timeout)
    
    html, connection_time = ShinkenUtils.request_get_daemon(result, DAEMON_TYPE, '%s:%s' % (daemon_adr, daemon_port), '/get_raw_stats', timeout=timeout)
    raw_stats = json.loads(html)
    ShinkenUtils.minimal_check(result, raw_stats, DAEMON_TYPE, shinken_supervisor_version)
    
    html, connection_time = ShinkenUtils.request_get_daemon(result, DAEMON_TYPE, '%s:%s' % (daemon_adr, daemon_port), '/check_graphite_read_status', timeout=timeout)
    read_status = json.loads(html)
    
    modules_info_webui = [i for i in raw_stats['modules_info'] if i['type'] == 'webui']
    if not modules_info_webui:
        result.hard_exit(EXIT_STATUS.OK, 'No module "visualisation ui" on this broker')
    
    metacheck_graphite_server(raw_stats, read_status, opts)
    result.add_check(long_output=BREAK_LINE)
    metacheck_module(raw_stats, read_status, webui_name)
    
    result.exit()


if __name__ == '__main__':
    main()
