#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2013-2019
# This file is part of Shinken Enterprise, all rights reserved.

import json
from datetime import datetime, timedelta

from shinkensolutions.lib_checks.common import Result, RaiseOnExitOptionParser, ParseOptionError, EXIT_STATUS, BREAK_LINE, ShinkenUtils, HTMLTag, HTMLList, HTMLTable, COLOR, Utils

VERSION = '0.1'
DAEMON_TYPE = 'broker'

TAG_FOR_STATE = {
    EXIT_STATUS.OK      : HTMLTag.OK,
    EXIT_STATUS.WARNING : HTMLTag.WARNING,
    EXIT_STATUS.CRITICAL: HTMLTag.CRITICAL
}

TO_MEGABYTE = 1000000
DATE_FORMAT = '%Y/%m/%d'
DATE_WITH_TIME_FORMAT = '%Y/%m/%d - %H:%M:%S'

result = Result()
warnings = []
criticals = []

parser = RaiseOnExitOptionParser('%prog [options] [--help]', version='%prog ' + VERSION)
parser.add_option('-H', '--hostname', dest='hostname', help='The hostname of the shinken daemon')
parser.add_option('-p', '--port', dest='port', type='int', help='The port of the shinken daemon')
parser.add_option('-t', '--timeout', dest='timeout', type='int', default=3, help='timeout to connect to the shinken daemon. Default : 3')
parser.add_option('-m', '--minutes', dest='minutes_of_stats', type='int', default=1, help='Number of minutes worth of stats displayed. Defualt : 1 minute')
parser.add_option('--workerwarning', dest='worker_warning', type='float', default=40.00, help='Warning value for a worker\'s load. In percent. Default : 40.00%')
parser.add_option('--workercritical', dest='worker_critical', type='float', default=80.00, help='Critical value for a worker\'s load. In percent. Default : 80.00%')
parser.add_option('--storagewarning', dest='storage_warning', type='int', default=0, help='Warning value for storage size. In MB.')
parser.add_option('--storagecritical', dest='storage_critical', type='int', default=0, help='Critical value for storage size. In MB.')
parser.add_option('--shinkenversion', dest='shinken_supervisor_version', default='', help='The shinken version number used to compare with the monitored shinken. Mandatory if in shinken mode.')


def _parse_args():
    opts = None
    try:
        opts, args = parser.parse_args()
        if args:
            parser.error('Does not accept arguments.')
        if not opts.hostname:
            parser.error('Missing parameter hostname (-H/--hostname)')
        if not opts.port:
            parser.error('Missing parameter port (-p/--port)')
    except ParseOptionError as e:
        if e.msg:
            result.hard_exit(EXIT_STATUS.CRITICAL, 'Fail to parse command argument: %s %s' % (BREAK_LINE, BREAK_LINE.join(e.msg.split('\n'))))
        exit(0)
    return opts


def get_human_readable_storage_size(size, exit_status):
    color = COLOR.RED if exit_status == EXIT_STATUS.CRITICAL else (COLOR.ORANGE if exit_status == EXIT_STATUS.WARNING else COLOR.GREEN)
    return HTMLTag.color_text(Utils.print_human_readable_size(size), color)


def get_date_string_from_timestamp(timestamp, date_stirng_format):
    return datetime.fromtimestamp(timestamp).strftime(date_stirng_format)


def check_writer(module_name, module_stats, minutes_of_stats, warning, critical):
    status = EXIT_STATUS.OK
    lines = []
    
    total_hosts_managed = 0
    total_clusters_managed = 0
    total_checks_managed = 0
    total_sla_written_last_minute = 0
    
    worker_stats = module_stats.get('workers', {})
    worker_index = 0
    nb_workers = len(worker_stats)
    for worker_id, worker_data in sorted(worker_stats.iteritems()):
        worker_lines = []
        worker_index += 1
        
        alive = worker_data['alive']
        if alive:
            nb_hosts_managed = worker_data['number_of_managed_hosts']
            nb_clusters_managed = worker_data['number_of_managed_clusters']
            nb_checks_managed = worker_data['number_of_managed_checks']
            sla_stats = worker_data['sla_stats']
            
            work_time, work_range = worker_data['work_time']
            worker_load = (work_time / 60) * 100
            
            worker_load_exit_status = EXIT_STATUS.CRITICAL if worker_load >= critical else (EXIT_STATUS.WARNING if worker_load >= warning else EXIT_STATUS.OK)
            worker_load_color = COLOR.RED if worker_load_exit_status == EXIT_STATUS.CRITICAL else (COLOR.ORANGE if worker_load_exit_status == EXIT_STATUS.WARNING else COLOR.GREEN)
            worker_load_str = str(round(worker_load, 2)) + '%'
            
            total_hosts_managed += nb_hosts_managed
            total_clusters_managed += nb_clusters_managed
            total_checks_managed += nb_checks_managed
            
            worker_lines.append('Hosts: %d' % nb_hosts_managed)
            worker_lines.append('Clusters: %d' % nb_clusters_managed)
            worker_lines.append('Checks: %d' % nb_checks_managed)
            
            if len(sla_stats) >= 2:
                max_index = minutes_of_stats * 6 if len(sla_stats) >= minutes_of_stats * 6 + 1 else len(sla_stats) - 1
                delta = (sla_stats[0][0] - sla_stats[max_index][0], sla_stats[0][1] - sla_stats[max_index][1], sla_stats[0][2] - sla_stats[max_index][2])
                total_sla_written_last_minute += delta[1]
                worker_lines.append('SLAs written in the last %g seconds: %d' % (round(delta[0], 2), delta[1]))
                worker_lines.append('SLAs write execution time: %g seconds' % round(delta[2], 3))
                
                if len(sla_stats) >= 7:
                    last_minute_delta = (sla_stats[0][0] - sla_stats[6][0], sla_stats[0][1] - sla_stats[6][1], sla_stats[0][2] - sla_stats[6][2])
                    result.add_perf_data('worker_%s_sla_last_minute_write_nb' % worker_id, last_minute_delta[1])
                    result.add_perf_data('worker_%s_sla_last_minute_write_time' % worker_id, last_minute_delta[2])
            else:
                worker_lines.append('SLAs written in the last minute: 0')
            worker_lines.append('Worker load in the last %g seconds: %s' % (work_range, HTMLTag.color_text(worker_load_str, worker_load_color)))
            status = max(status, worker_load_exit_status)
            
            result.add_perf_data('worker_%s_worker_load' % worker_id, worker_load)
            
            msg = 'SLA - Writer - Worker %s\'s load is %s' % (worker_id, HTMLTag.color_text(worker_load_str, worker_load_color))
            if worker_load_exit_status == EXIT_STATUS.WARNING:
                warnings.append(msg)
            elif worker_load_exit_status == EXIT_STATUS.CRITICAL:
                criticals.append(msg)
            
            worker_list = HTMLList.header_list("Worker %s" % worker_id, worker_lines)
            if worker_index != nb_workers:
                worker_list += BREAK_LINE
            lines.append(worker_list)
        else:
            criticals.append('SLA - Writer - Worker %s is %s' % (worker_id, HTMLTag.color_text('dead', COLOR.RED)))
            lines.append('Worker %s is %s' % (worker_id, HTMLTag.color_text('dead', COLOR.RED)))
            status = max(status, EXIT_STATUS.CRITICAL)
    
    lines.insert(0, HTMLList.header_list("Writing RAW DATA", ['Hosts: %d' % total_hosts_managed, 'Clusters: %d' % total_clusters_managed, 'Checks: %d' % total_checks_managed]) + BREAK_LINE)
    total_elements_managed_by_shinken = total_hosts_managed + total_clusters_managed + total_checks_managed
    
    long_output = HTMLTable.table([], [[HTMLList.simple_list(lines)]], left_headers=['%s - Writer:<br>%s' % (module_name.upper(), TAG_FOR_STATE[status])], all_col_same_width=False) + BREAK_LINE
    result.add_check(status=status, long_output=long_output)
    return status, total_elements_managed_by_shinken


def check_archive(module_name, module_stats):
    status = EXIT_STATUS.OK
    lines = []
    
    archive_in_progress = module_stats.get('archive_in_progress', False)
    total_sla_current_archive = module_stats.get('total_sla_current_archive', 0)
    latest_archive_start_time = get_date_string_from_timestamp(module_stats.get('latest_archive_start_time', 0), DATE_WITH_TIME_FORMAT)
    oldest_sla_date = module_stats.get('oldest_sla_date', 0)
    
    latest_lines = []
    if archive_in_progress and total_sla_current_archive > 0:
        sla_archived_during_current_archive = module_stats.get('sla_archived_during_current_archive', 0)
        archive_progression_date = get_date_string_from_timestamp(module_stats.get('archive_progression_date', 0), DATE_FORMAT)
        percent_sla_archived = round(float(sla_archived_during_current_archive) / float(total_sla_current_archive) * 100, 2)
        
        latest_lines.append('Archive progression for %s: %g%% (%d / %d)' % (archive_progression_date, percent_sla_archived, sla_archived_during_current_archive, total_sla_current_archive))
        latest_lines.append('Start time: %s' % latest_archive_start_time)
        latest_lines.append('Data for %d SLAs is being processed' % total_sla_current_archive)
        latest_list = HTMLList.header_list('Archive is currently in progress (Calculating and compressing SLA data)', latest_lines)
        lines.append(latest_list)
    else:
        latest_archive_execution_time = module_stats.get('latest_archive_execution_time', 0)
        if latest_archive_execution_time:
            latest_archive_execution_time_str = str(round(latest_archive_execution_time, 3)) + ' seconds'
            total_sla_archived = module_stats.get('total_sla_archived', 0)
            
            latest_lines.append('Start time: %s' % latest_archive_start_time)
            latest_lines.append('Execution time: %s' % latest_archive_execution_time_str)
            latest_lines.append('Data for %d SLAs was processed' % total_sla_archived)
        else:
            latest_lines.append('An archive has never been launched')
        latest_list = HTMLList.header_list('Latest archive (Calculated and compressed SLA data)', latest_lines)
        if latest_archive_start_time or oldest_sla_date:
            latest_list += BREAK_LINE
        lines.append(latest_list)
    
    previous_archive_start_time = get_date_string_from_timestamp(module_stats.get('previous_archive_start_time', 0), DATE_WITH_TIME_FORMAT)
    previous_archive_execution_time = module_stats.get('previous_archive_execution_time', 0)
    previous_archive_sla_archived = module_stats.get('previous_archive_sla_archived', 0)
    if previous_archive_execution_time:
        previous_archive_execution_time_str = str(round(previous_archive_execution_time, 3)) + ' seconds'
        previous_list = HTMLList.header_list('Previous archive',
                                             ['Start time: %s' % previous_archive_start_time, 'Execution time: %s' % previous_archive_execution_time_str, 'Data for %d SLAs was processed' % previous_archive_sla_archived]) + BREAK_LINE
        lines.append(previous_list)
    
    if oldest_sla_date:
        oldest_sla_date = get_date_string_from_timestamp(oldest_sla_date, DATE_FORMAT)
        lines.append('Date the oldest SLA\'s data was stored: %s' % oldest_sla_date)
    
    long_output = HTMLTable.table([], [[HTMLList.simple_list(lines)]], left_headers=['%s - Archive:<br>%s' % (module_name.upper(), TAG_FOR_STATE[status])], all_col_same_width=False) + BREAK_LINE
    result.add_check(status=status, long_output=long_output)
    return status


def check_migration(module_name, module_stats):
    status = EXIT_STATUS.OK
    lines = []
    
    all_database_migrated = module_stats.get('all_database_migrated', True)
    migration_in_progress = module_stats.get('migration_in_progress', False)
    execution_time_last_migration = module_stats.get('execution_time_last_migration', 0)
    total_sla_to_migrate = module_stats.get('total_sla_to_migrate', 0)
    
    migrate_data_lines = []
    if not all_database_migrated:
        migrate_data_lines.append('%d SLAs are using an old format' % total_sla_to_migrate)
    elif all_database_migrated and total_sla_to_migrate > 0:
        migrate_data_lines.append('%d SLAs have been updated to the latest format' % total_sla_to_migrate)
    
    if len(migrate_data_lines) > 0:
        if total_sla_to_migrate > 0:
            migrate_data_list = HTMLList.header_list('The database is%sfully migrated' % (' not ' if not all_database_migrated else ' '), migrate_data_lines)
            if execution_time_last_migration:
                migrate_data_list += BREAK_LINE
            lines.append(migrate_data_list)
        else:
            lines.append('The database is%sfully migrated' % (' not ' if not all_database_migrated else ' '))
    else:
        lines.append('The database is%sfully migrated' % (' not ' if not all_database_migrated else ' '))
    
    if not all_database_migrated and total_sla_to_migrate:
        if migration_in_progress:
            nb_sla_left_to_migrate = module_stats.get('nb_sla_left_to_migrate', 0)
            percent_sla_migrated = round(float(total_sla_to_migrate - nb_sla_left_to_migrate) / float(total_sla_to_migrate) * 100, 2)
            progression_list = HTMLList.header_list('Migration is currently in progress', ['Migration progression: %g%% (%d / %d)' % (percent_sla_migrated, total_sla_to_migrate - nb_sla_left_to_migrate, total_sla_to_migrate)])
            lines.append(progression_list)
    
    if not migration_in_progress and execution_time_last_migration:
        execution_time_last_migration_str = str(round(execution_time_last_migration, 3)) + ' seconds'
        lines.append('Time taken for last migration: %s' % execution_time_last_migration_str)
    
    long_output = HTMLTable.table([], [[HTMLList.simple_list(lines)]], left_headers=['%s - Migration:<br>%s' % (module_name.upper(), TAG_FOR_STATE[status])], all_col_same_width=False) + BREAK_LINE
    result.add_check(status=status, long_output=long_output)
    return status


def check_daily_clean(module_name, module_stats, total_elements_managed_by_shinken, warning, critical):
    status = EXIT_STATUS.OK
    lines = []
    
    days_to_keep_sla = module_stats.get('days_to_keep_sla', -1)
    if days_to_keep_sla != -1:
        threshold_date = datetime.now() + timedelta(days=-days_to_keep_sla)
        threshold_date_str = threshold_date.strftime('%Y/%m/%d')
        daily_clean_in_progress = module_stats.get('daily_clean_in_progress', False)
        execution_time_last_daily_clean = module_stats.get('execution_time_last_daily_clean', 0)
        total_sla_to_remove = module_stats.get('total_sla_to_remove', 0)
        nb_sla_left_to_remove = module_stats.get('nb_sla_left_to_remove', 0)
        
        to_remove_lines = []
        if total_sla_to_remove > 0:
            if nb_sla_left_to_remove == 0:
                to_remove_lines.append('SLAs removed: %s' % total_sla_to_remove)
            else:
                to_remove_lines.append('SLAs to remove: %s' % total_sla_to_remove)
            to_remove_list = HTMLList.header_list('SLAs stored before %s will be removed (%d days)' % (threshold_date_str, days_to_keep_sla), to_remove_lines) + BREAK_LINE
            lines.append(to_remove_list)
        else:
            lines.append('SLAs stored before %s will be removed (%d days)' % (threshold_date_str, days_to_keep_sla))
        
        if daily_clean_in_progress:
            percent_sla_migrated = round(float(total_sla_to_remove - nb_sla_left_to_remove) / float(total_sla_to_remove) * 100, 2)
            progression_list = HTMLList.header_list('Database cleanup is currently in progress', ['Removal progression: %g%% (%d / %d)' % (percent_sla_migrated, total_sla_to_remove - nb_sla_left_to_remove, total_sla_to_remove)]) + BREAK_LINE
            lines.append(progression_list)
        
        if not daily_clean_in_progress and execution_time_last_daily_clean:
            execution_time_last_daily_clean_str = str(round(execution_time_last_daily_clean, 3)) + ' seconds'
            lines.append('Time taken for last daily clean: %s' % execution_time_last_daily_clean_str)
    else:
        lines.append('SLAs are kept forever')
    
    sla_current_storage_size = module_stats.get('sla_current_storage_size', 0)
    sla_current_storage_size_exit_status = EXIT_STATUS.OK
    if warning is not None and sla_current_storage_size >= warning:
        sla_current_storage_size_exit_status = EXIT_STATUS.WARNING
    if critical is not None and sla_current_storage_size >= critical:
        sla_current_storage_size_exit_status = EXIT_STATUS.CRITICAL
    status = max(status, sla_current_storage_size_exit_status)
    
    msg = 'SLA - Daily clean - SLA storage size is bigger than threshold of %s'
    if sla_current_storage_size_exit_status == EXIT_STATUS.WARNING:
        warnings.append(msg % get_human_readable_storage_size(warning, sla_current_storage_size_exit_status))
    elif sla_current_storage_size_exit_status == EXIT_STATUS.CRITICAL:
        criticals.append(msg % get_human_readable_storage_size(critical, sla_current_storage_size_exit_status))
    
    result.add_perf_data('storage_size', sla_current_storage_size)
    
    sla_storage_lines = []
    total_unique_elements_stored = module_stats.get('total_unique_elements_stored', 0)
    elements_stored_but_not_managed = total_unique_elements_stored - total_elements_managed_by_shinken if total_unique_elements_stored - total_elements_managed_by_shinken > 0 else 0
    if elements_stored_but_not_managed > 0 or total_elements_managed_by_shinken > 0:
        sla_storage_lines.append('Elements monitored: %d' % total_elements_managed_by_shinken)
        sla_storage_lines.append('Elements stored but no longer monitored: %d' % elements_stored_but_not_managed)
        sla_storage_data = HTMLList.header_list('SLA storage size: %s' % get_human_readable_storage_size(sla_current_storage_size, sla_current_storage_size_exit_status), sla_storage_lines)
        lines.append(sla_storage_data)
    else:
        lines.append('SLA storage size: %s' % get_human_readable_storage_size(sla_current_storage_size, sla_current_storage_size_exit_status))
    
    long_output = HTMLTable.table([], [[HTMLList.simple_list(lines)]], left_headers=['%s - Database cleanup:<br>%s' % (module_name.upper(), TAG_FOR_STATE[status])], all_col_same_width=False)
    result.add_check(status=status, long_output=long_output)
    return status


def check_modules(raw_stats, minutes_of_stats, thresholds):
    sla_modules = raw_stats.get('module_stats', {}).get('sla', {})
    if not sla_modules:
        result.add_check(EXIT_STATUS.CRITICAL, '%s - There is no SLA module on the Broker daemon.' % TAG_FOR_STATE[EXIT_STATUS.CRITICAL])
        return
    if len(sla_modules) > 1:
        result.add_check(EXIT_STATUS.CRITICAL, '%s - There is more than one SLA module on the Broker daemon.' % TAG_FOR_STATE[EXIT_STATUS.CRITICAL])
        return
    
    modules_info = raw_stats.get('modules_info', [])
    if not modules_info:
        result.add_check(EXIT_STATUS.CRITICAL, '%s - There is no module info on the Broker daemon.' % TAG_FOR_STATE[EXIT_STATUS.CRITICAL])
        return
    sla_module_info = next(iter([module_info for module_info in modules_info if module_info['type'] == 'sla']), None)
    if sla_module_info is None:
        result.add_check(EXIT_STATUS.CRITICAL, '%s - There is no SLA module info on the Broker daemon.' % TAG_FOR_STATE[EXIT_STATUS.CRITICAL])
        return
    
    sla_module_status = sla_module_info.get('status', [])
    # SLA module info status stuff
    _output = sla_module_info.get('output', '')
    if sla_module_status == 'FATAL':
        if isinstance(_output, basestring):
            result.add_check(EXIT_STATUS.CRITICAL, '%s %s' % (HTMLTag.state_tag('FATAL'), _output))
        else:
            for output in _output:
                result.add_check(EXIT_STATUS.CRITICAL, '%s %s' % (HTMLTag.state_tag('FATAL'), output))
        return
    elif sla_module_status == 'CRITICAL':
        if isinstance(_output, basestring):
            result.add_check(EXIT_STATUS.CRITICAL, '%s %s' % (HTMLTag.state_tag('CRITICAL'), _output))
        else:
            for output in _output:
                result.add_check(EXIT_STATUS.CRITICAL, '%s %s' % (HTMLTag.state_tag('CRITICAL'), output))
        return
    
    module_name, module_stats = sorted(sla_modules.iteritems())[0]
    database_status = module_stats.get('database_status', '')
    if not database_status:
        result.add_check(EXIT_STATUS.OK if database_status else EXIT_STATUS.CRITICAL, 'Database status: %s' % TAG_FOR_STATE[EXIT_STATUS.OK if database_status else EXIT_STATUS.CRITICAL])
    
    writer_status, total_elements_managed_by_shinken = check_writer(module_name, module_stats, minutes_of_stats, thresholds[0], thresholds[1])
    archive_status = check_archive(module_name, module_stats)
    migration_status = check_migration(module_name, module_stats)
    daily_clean_status = check_daily_clean(module_name, module_stats, total_elements_managed_by_shinken, thresholds[2], thresholds[3])
    
    statuses = [writer_status, archive_status, migration_status, daily_clean_status]
    check_status = max(statuses)
    
    output = []
    if check_status == EXIT_STATUS.CRITICAL:
        output.append('Module is in a critical state.')
        for critical in criticals:
            output.append('%s %s' % (HTMLTag.CRITICAL, critical))
        for warning in warnings:
            output.append('%s %s' % (HTMLTag.WARNING, warning))
    elif check_status == EXIT_STATUS.WARNING:
        output.append('Module has some anomalies')
        for warning in warnings:
            output.append('%s %s' % (HTMLTag.WARNING, warning))
    elif check_status == EXIT_STATUS.OK:
        output.append('Module is working as intended.')
    result.add_check(status=check_status, output='<br>'.join(output), title=True)


def main():
    opts = _parse_args()
    daemon_adr = opts.hostname
    daemon_port = opts.port
    timeout = opts.timeout
    minutes_of_stats = opts.minutes_of_stats
    if minutes_of_stats <= 0:
        minutes_of_stats = 1
    worker_warning = opts.worker_warning
    if worker_warning < 0 or worker_warning > 100:
        worker_warning = 40.00
    worker_critical = opts.worker_critical
    if worker_critical < 0 or worker_critical > 100:
        worker_critical = 80.00
    storage_warning = opts.storage_warning
    if storage_warning > 0:
        storage_warning = storage_warning * TO_MEGABYTE
    else:
        storage_warning = None
    storage_critical = opts.storage_critical
    if storage_critical > 0:
        storage_critical = storage_critical * TO_MEGABYTE
    else:
        storage_critical = None
    shinken_supervisor_version = opts.shinken_supervisor_version
    
    html, connection_time = ShinkenUtils.request_get_daemon(result, DAEMON_TYPE, '%s:%s' % (daemon_adr, daemon_port), '/get_raw_stats', timeout=timeout)
    raw_stats = json.loads(html)
    ShinkenUtils.minimal_check(result, raw_stats, DAEMON_TYPE, shinken_supervisor_version)
    
    check_modules(raw_stats, minutes_of_stats, [worker_warning, worker_critical, storage_warning, storage_critical])
    result.exit()


if __name__ == '__main__':
    main()
