#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2013-2021
# This file is part of Shinken Enterprise, all rights reserved.

import json

from shinkensolutions.lib_checks.common import BREAK_LINE, COLOR, EXIT_STATUS, HTMLList, HTMLTable, HTMLTag, ParseOptionError, RaiseOnExitOptionParser, Result, ShinkenUtils, Utils, ShinkenOption
from shinkensolutions.lib_checks.graphite import CheckGraphiteForWriter
from shinkensolutions.lib_checks.graphite import TAG_CRITICAL, TAG_FOR_STATE

VERSION = u'0.1'
DAEMON_TYPE = u'broker'
GRAPHITE_CONF_FILE = u'/opt/graphite/conf/carbon.conf'


class WRITE_STATUS(object):
    NO_MODULE = u'NO_MODULE'
    NO_CONF = u'NO_CONF'
    OK = u'OK'


def _init_parser():
    parser = RaiseOnExitOptionParser(u'%prog [options] [--help]', version=u'%prog ' + VERSION, option_class=ShinkenOption)
    parser.add_option(u'-H', u'--hostname', dest=u'hostname', help=u'The hostname of the shinken daemon')
    parser.add_option(u'-p', u'--port', dest=u'port', type=u'int', help=u'The port of the shinken daemon')
    parser.add_option(u'-t', u'--timeout', dest=u'timeout', type=u'int', default=3, help=u'timeout to connect to the shinken daemon. Default : 3')
    parser.add_option(u'-P', u'--ssh-port', dest=u'ssh_port', type=u'int', default=22, help=u'SSH port to connect to. Default : 22')
    parser.add_option(u'-i', u'--ssh-key', dest=u'ssh_key_file', default=u'~/.ssh/id_rsa', help=u'SSH key file to use. By default it will take ~/.ssh/id_rsa.')
    parser.add_option(u'-u', u'--ssh-user', dest=u'user', default=u'shinken', help=u'remote user to use. By default shinken.')
    parser.add_option(u'-r', u'--passphrase', dest=u'passphrase', default=u'', help=u'SSH key passphrase. By default void will be used.')
    parser.add_option(u'-d', u'--graphite_location', dest=u'graphite_location', default=u'/opt/graphite/storage/whisper', help=u'Graphite Data Location. Default : /opt/graphite/storage/whisper')
    parser.add_option(u'-U', u'--graphite_user', dest=u'graphite_user', default=u'apache', help=u'Graphite user. Default : apache')
    parser.add_option(u'-w', u'--warning', dest=u'storage_usage_warning', type=u'int', default=85, help=u'Warning value for Graphite space usage. In percent. Default : 85%')
    parser.add_option(u'-c', u'--critical', dest=u'storage_usage_critical', type=u'int', default=95, help=u'Critical value for Graphite space usage. In percent. Default : 95%')
    parser.add_option(u'-D', u'--graphite_disks', dest=u'graphite_disks', default=u'', help=u'Filter for disk I/O : list of disks separated by commas. Default no filter')
    parser.add_option(u'-W', u'--warning-io', dest=u'storage_io_warning', type=u'int', default=85, help=u'Warning value for Graphite io usage. In percent. Default : 85%')
    parser.add_option(u'-C', u'--critical-io', dest=u'storage_io_critical', type=u'int', default=95, help=u'Critical value for Graphite io usage. In percent. Default : 95%')
    
    parser.add_option(u'--shinkenversion', dest=u'shinken_supervisor_version', default='', help=u'The shinken version number used to compare with the monitored shinken. Mandatory if in shinken mode.')
    parser.add_option(u'-n', u'--cache', dest=u'graphite_cache_name', default=u'carbon-cache', help=u'Name for the graphite relay process. Default : carbon-relay')
    parser.add_option(u'-R', u'--relay', dest=u'graphite_relay_name', default=u'carbon-relay', help=u'Name for the graphite cache process. Default : carbon-cache')
    return parser


result = Result()


def _parse_args():
    parser = _init_parser()
    opts = None
    try:
        opts, args = parser.parse_args()
        if args:
            parser.error('Does not accept arguments.')
        
        if opts and not opts.hostname:
            parser.error('Missing parameter hostname (-H/--hostname)')
        if opts and not opts.port:
            parser.error('Missing parameter port (-p/--port)')
    
    except ParseOptionError as e:
        if e.msg:
            result.hard_exit(EXIT_STATUS.CRITICAL, 'Fail to parse command argument : %s %s' % (BREAK_LINE, BREAK_LINE.join(e.msg.split('\n'))))
        exit(0)
    
    return opts


def meta_check_graphite_server(write_status, opts):
    headers = []
    lines = []
    graphite_result = Result()
    for write_status_server in write_status['data']:
        
        server_host = write_status_server['host']
        server_host = opts.hostname if server_host in ['127.0.0.1', 'localhost'] else server_host
        server_port = write_status_server['port']
        module_name = write_status_server['module_name']
        can_post_data = write_status_server['can_post_data']
        server_state = EXIT_STATUS.OK if can_post_data else EXIT_STATUS.CRITICAL
        
        check_graphite_for_writer = CheckGraphiteForWriter(
            graphite_disks=opts.graphite_disks,
            storage_io_warning=opts.storage_io_warning,
            storage_io_critical=opts.storage_io_critical,
            graphite_hostname=server_host,
            graphite_port=server_port,
            ssh_port=opts.ssh_port,
            ssh_key_file=opts.ssh_key_file,
            passphrase=opts.passphrase,
            user=opts.user,
            graphite_location=opts.graphite_location,
            graphite_user=opts.graphite_user,
            storage_usage_warning=opts.storage_usage_warning,
            storage_usage_critical=opts.storage_usage_critical,
            graphite_cache_name=opts.graphite_cache_name,
            graphite_relay_name=opts.graphite_relay_name,
            graphite_conf_file=GRAPHITE_CONF_FILE,
        )
        
        check_graphite_for_writer.result = graphite_result
        check_graphite_for_writer.do_all_check()
        
        _title = 'Server %s used by module : %s' % (server_host, HTMLTag.color_text(module_name))
        if can_post_data:
            check_graphite_for_writer.result_add_check(EXIT_STATUS.OK, u'Module can post data.', check_index=0, step_name=None)
        else:
            check_graphite_for_writer.result_add_check(EXIT_STATUS.CRITICAL, u'Module cannot post data on server [ %s ]' % (u'%s:%s' % (server_host, server_port)), check_index=0, step_name=None)
        check_list_str = HTMLList.header_list(_title, check_graphite_for_writer.summary)
        if check_graphite_for_writer.is_relay:
            check_list_str += HTMLList.header_list('Node information', check_graphite_for_writer.nodes_summary)
        check_list_str += u'<div style="margin-left:13px;">%s</div>' % HTMLTag.color_text('Note: Refer to the <b>Shinken Graphite Status</b> check of each node for more details.', COLOR.GRAY, False, True)
        server_state = max(server_state, check_graphite_for_writer.result.status)
        
        headers.append('%s:%s is %s<br>%s' % (server_host, server_port, TAG_FOR_STATE[server_state], check_graphite_for_writer.get_mode()))
        lines.append([check_list_str])
    
    meta_check_graphite_server_long_output = HTMLTable.table([], lines, left_headers=headers, title='Graphite servers', compact_title=True, all_col_same_width=False)
    
    output = u'Graphite servers : EXIT_STATUS is not known'
    if graphite_result.status == EXIT_STATUS.OK:
        output = u'Graphite servers : All servers are available.'
    elif graphite_result.status == EXIT_STATUS.WARNING:
        output = u'Graphite servers : Some problems have been detected :'
    elif graphite_result.status == EXIT_STATUS.CRITICAL:
        output = u'Graphite servers : Critical errors have been detected :'
    elif graphite_result.status == EXIT_STATUS.UNKNOWN:
        output = u'Graphite servers : Some information are not available :'
    
    result.add_check(output=output, long_output=meta_check_graphite_server_long_output)
    for _output in graphite_result.criticals:
        _output = HTMLTag.tag_border('<div class="skn-ln">%s%s</div>' % (HTMLTag.color_text(u'=> ', COLOR.RED), _output), COLOR.RED)
        result.add_check(output=_output, status=EXIT_STATUS.CRITICAL)
    for _output in graphite_result.warnings:
        _output = HTMLTag.tag_border('<div class="skn-ln">%s%s</div>' % (HTMLTag.color_text(u'=> ', COLOR.ORANGE), _output), COLOR.ORANGE)
        result.add_check(output=_output, status=EXIT_STATUS.WARNING)
    for _output in graphite_result.outputs:
        _output = HTMLTag.tag_border('<div class="skn-ln">%s%s</div>' % (HTMLTag.color_text(u'=> ', COLOR.BLACK), _output), COLOR.BLACK)
        result.add_check(output=_output, status=EXIT_STATUS.UNKNOWN)
    
    for _key, _value in graphite_result.perf_data.iteritems():
        result.add_perf_data(_key, _value)


def check_modules(raw_stats):
    graphite_modules = raw_stats.get(u'module_stats', {}).get(u'graphite_perfdata', {})
    result.add_check(output=u'Module stats :')
    if not graphite_modules:
        _output = u'<div class="skn-ln">%s%s</div>' % (HTMLTag.color_text(u'=> ', COLOR.RED), u'%s - There is no information about graphite_perfdata module on the Broker daemon.' % TAG_CRITICAL)
        _output = HTMLTag.tag_border(_output, COLOR.RED)
        
        _long_output = HTMLTable.table(
            [],
            [[u'There is no information about graphite_perfdata module on this Broker daemon. Maybe, the broker cannot compute the stats in time. Please check the broker logs']],
            left_headers=[TAG_CRITICAL],
            title=u'Module stats',
            compact_title=True,
            all_col_same_width=False
        )
        
        result.add_check(EXIT_STATUS.CRITICAL, _output, long_output=_long_output)
        return
    
    headers = []
    modules_result = Result()
    for module_name, module_stats in sorted(graphite_modules.iteritems()):
        module_result = Result()
        workers = module_stats[u'workers']
        expected_nb_workers = module_stats[u'nb_workers']
        started_nb_workers = len(workers)
        if started_nb_workers == 0:
            module_result.add_check(EXIT_STATUS.CRITICAL, u'%s - The module does not have any workers.' % TAG_CRITICAL)
    
        # Check that the workers are here and in good number (real versus expected)
        elif started_nb_workers != expected_nb_workers:
            module_result.add_check(EXIT_STATUS.CRITICAL, u'%s - You only have %d workers but the module is configured to have %s.' % (TAG_CRITICAL, started_nb_workers, expected_nb_workers))
        else:
        
            last_minute_nb_metric_sent = module_stats[u'last_minute_nb_metric_sent']
            last_minute_sent_size = float(module_stats[u'last_minute_sent_size'])
        
            cumulative_parse_time = module_stats[u'cumulative_parse_time']
            cumulative_connection_time = module_stats[u'cumulative_connection_time']
            cumulative_other_time = module_stats[u'cumulative_other_time']
        
            last_minute_nb_broks_sent = module_stats[u'last_minute_broks_sent_nb']
        
            # Time sum is acceptable (sum of time is lower than number of workers = number of seconds)
            time_sum = cumulative_other_time + cumulative_parse_time + cumulative_connection_time
            module_load = (time_sum / started_nb_workers) * 100  # len cannot be 0 here
        
            module_result.add_check(EXIT_STATUS.OK, u'Number of metrics sent to graphite in the last minute: %s' % last_minute_nb_metric_sent)
            module_result.add_check(EXIT_STATUS.OK, u'Number of broks managed in the last minute : %s' % last_minute_nb_broks_sent)
            module_result.add_check(EXIT_STATUS.OK, u'Volume of metrics sent to graphite in the last minute : %s' % (Utils.print_human_readable_size(last_minute_sent_size)))
            module_result.add_check(EXIT_STATUS.WARNING if module_load > 80.0 else EXIT_STATUS.OK, u'The module load is %d%%' % module_load)
            module_result.add_check(EXIT_STATUS.OK, u'All workers (%s/%s) are running <br>' % (started_nb_workers, expected_nb_workers))
            for index, (worker_id, worker_data) in enumerate(sorted(workers.iteritems())):
                work_time, work_range = worker_data.get(u'work_time', (0, 0))
                work_time_sampling = [i for i in worker_data.get(u'work_time_sampling', []) if i != -1]
                _worker_load = (work_time / 60) * 100
                worker_state = EXIT_STATUS.WARNING if _worker_load > 80.0 else EXIT_STATUS.OK
                broks = [
                    u'%d broks were processed during the last minute' % (worker_data.get(u'last_minute_broks_sent_nb', 0)),
                    HTMLList.simple_list([u'%s metrics were processed.' % (worker_data.get(u'last_minute_metrics_sent_nb', 0))])
                ]
            
                worker_lines = [
                    u'%s hosts are managed by this worker.' % (worker_data.get(u'number_of_managed_hosts', 0)),
                    u''.join(broks),
                    u'%s sent to graphite in the last minute' % (Utils.print_human_readable_size(worker_data.get(u'last_minute_sent_size', 0))),
                    u'Work time during the last %.3f seconds : %.3f seconds (%3d%%).' % (work_range, work_time, _worker_load),
                    u'Work time per 10 second sample : %s.' % u' - '.join([u'%.3fs' % i for i in work_time_sampling])
                ]
            
                worker_info = HTMLList.header_list(u'Worker with id %s is %s' % (worker_id, TAG_FOR_STATE[worker_state]), worker_lines)
            
                _is_last_worker = (index == (started_nb_workers - 1))
                module_result.add_check(worker_state, worker_info, no_new_line=_is_last_worker)
                result.add_perf_data(u'worker_%s_last_minute_metrics_sent_nb' % worker_id, worker_data.get(u'last_minute_metrics_sent_nb', 0))
                result.add_perf_data(u'worker_%s_last_minute_work_time' % worker_id, work_time)
    
        headers.append(u'%s : %s' % (module_name, TAG_FOR_STATE[module_result.status]))
        
        check_list_str = HTMLList.simple_list(module_result.outputs_no_sort)
        modules_result.add_check(module_result.status, [check_list_str], no_new_line=True)
    
    stats = HTMLTable.table([], modules_result.outputs_no_sort, left_headers=headers, title=u'Module stats', compact_title=True, all_col_same_width=False)
    
    output = u'Modules are running properly.'
    if modules_result.status != EXIT_STATUS.OK:
        output = u'The modules are not running properly.'
    
    result.add_check(status=modules_result.status, output=output, long_output=stats)


def main():
    HTMLTag.EXTRA_STYLE = u'.skn-met{margin:7px 7px;border:1px solid;width:calc(100% - 14px);}'
    HTMLTag.EXTRA_CLASS = u'skn-met'
    opts = _parse_args()
    
    daemon_adr = opts.hostname
    daemon_port = opts.port
    shinken_supervisor_version = opts.shinken_supervisor_version
    timeout = opts.timeout
    
    if timeout <= 0:
        result.hard_exit(EXIT_STATUS.CRITICAL, 'The --timeout option (%s) must be greater than 0' % timeout)
    
    html, connection_time = ShinkenUtils.request_get_daemon(result, DAEMON_TYPE, '%s:%s' % (daemon_adr, daemon_port), '/get_raw_stats', timeout=timeout)
    raw_stats = json.loads(html)
    ShinkenUtils.minimal_check(result, raw_stats, DAEMON_TYPE, shinken_supervisor_version)
    
    html, connection_time = ShinkenUtils.request_get_daemon(result, DAEMON_TYPE, '%s:%s' % (daemon_adr, daemon_port), '/check_graphite_write_status', timeout=timeout)
    write_status = json.loads(html)
    
    if write_status['status'] == WRITE_STATUS.NO_MODULE:
        result.hard_exit(EXIT_STATUS.OK, 'No module "graphite_perfdata" on this broker')
    
    meta_check_graphite_server(write_status, opts)
    check_modules(raw_stats)
    
    result.add_check()
    
    result.exit(sorted_by_level=False)


if __name__ == '__main__':
    main()
