#!/usr/bin/env python
# -*- coding: utf-8 -*-
import heapq
import time
from bisect import bisect_left
from threading import RLock

from .log import logger, get_chapter_string

_OVER_THE_TOP = -1

# -1 => over the top
_DEFAULT_EXEC_STAT_RANGE = [_OVER_THE_TOP, 50, 100, 200, 300, 400, 1000, 5000, 15000]  # Upper value, need to be sorted!

TOP_SIZE = 5

CHAPTER_EXECUTOR_STATS = get_chapter_string('STATS')


# This class is used to manage ranges and check cpu times, it will mainly give
# data to get_raw_stats for shinken checks:
# * get_top_usage => (can be costly) get top 5 check execution time
# * get_stats => give a repartition (nb, %) of check times across ranges
class ExecutorStats(object):
    def __init__(self):
        self._lock = RLock()  # get_raw_stats will be asked in a thread, so protect us!
        self._exec_stat_ranges = _DEFAULT_EXEC_STAT_RANGE
        self._generate_top_usage_structure()
        
        self._keep_timeout_time = 1200
        self._timeouts_by_date = {}  # epoch => [(cmd, exec_time)]
        self._timeouts_occurrences = {}  # cmd => [epoch1, epoch2, etc]
    
    
    def _generate_top_usage_structure(self):
        with self._lock:
            self._cmd_cpu_usage = {}
            for _range in self._exec_stat_ranges:
                self._cmd_cpu_usage[_range] = {'cpu_times': {}}  # dict so is we need to add a structure for top, we are ready
    
    
    def reset(self):
        self._generate_top_usage_structure()
        
        self._timeouts_by_date = {}  # epoch => [(cmd, exec_time)]
        self._timeouts_occurrences = {}  # cmd => [epoch1, epoch2, etc]
    
    
    def set_keep_timeout_time(self, keep_timeout_time):
        self._keep_timeout_time = keep_timeout_time
    
    
    def get_keep_timeout_time(self):
        return self._keep_timeout_time
    
    
    def update_ranges(self, exec_stat_ranges):
        with self._lock:
            configured_ranges = exec_stat_ranges[:]  # copy it
            # Put -1 to catch even very BIG entries
            if _OVER_THE_TOP not in configured_ranges:
                configured_ranges.insert(0, _OVER_THE_TOP)
            configured_ranges.sort()  # be sure it's sorted
            if configured_ranges == self._exec_stat_ranges:
                return
            logger.info('%s Stat ranges are now configured as: %s' % (CHAPTER_EXECUTOR_STATS, ','.join([str(e) for e in exec_stat_ranges])))
            
            self._exec_stat_ranges = configured_ranges
            
            # We are changing ranges, so we are cleaning the stats
            self._generate_top_usage_structure()
    
    
    def _find_range(self, cpu_time):
        idx = bisect_left(self._exec_stat_ranges, cpu_time)
        # warning: maybe the idx is AFTER the ranges, because the time is
        #          over the maximum in the .cfg (or 15s by default). if so, put in _OVER_THE_TOP entry
        if idx == len(self._exec_stat_ranges):
            idx = 0  # match the _OVER_THE_TOP entry
        return self._exec_stat_ranges[idx]
    
    
    # We have a cpu time, we must update it's entry in the good range
    # but if it was in another range, we must delete it from there
    def update_action_time(self, command_name, cpu_time):
        with self._lock:
            cpu_time = int(cpu_time * 1000.0)  # set in ms so we don't work with floats
            current_range = self._find_range(cpu_time)
            
            # First look in the same range than now if possible
            current_range_entry = self._cmd_cpu_usage[current_range]
            old_cpu_time = current_range_entry['cpu_times'].get(command_name, None)
            if old_cpu_time is not None:
                # we did found it, and it's in the same range, we can just clean it and update it
                if cpu_time == old_cpu_time:
                    # was the exact same time, skip update
                    return
                current_range_entry['cpu_times'][command_name] = cpu_time
                return
            else:  # Is first time or change range from previous run
                # First clean if was in a range
                for old_range, old_range_entry in self._cmd_cpu_usage.iteritems():
                    old_cpu_time = old_range_entry['cpu_times'].get(command_name, None)
                    if old_cpu_time is not None:
                        del old_range_entry['cpu_times'][command_name]
                        break
                # Now update new entry
                current_range_entry['cpu_times'][command_name] = cpu_time
    
    
    # We are requesting a top5 of ALL executions, of course we will look at higher
    # range too, and when we have all 5 after a range top, we are good
    # but beware: we don't do a range sort() because it's too costly, we only want top5
    # so we are using heapq for this
    def get_top_usage(self):
        top_limit = TOP_SIZE
        t0 = time.time()
        nb_range_look = 0
        nb_elements_look = 0
        top5 = []
        with self._lock:
            range_from_higher = self._exec_stat_ranges[:]
            range_from_higher.reverse()
            over_the_top = range_from_higher.pop()
            range_from_higher.insert(0, over_the_top)
            for _range in range_from_higher:
                nb_range_look += 1
                range_entry = self._cmd_cpu_usage[_range]
                for command_name, cpu_time in range_entry['cpu_times'].iteritems():
                    nb_elements_look += 1
                    if len(top5) < top_limit or cpu_time > top5[0][0]:
                        # If the heap is full, remove the smallest element on the heap.
                        if len(top5) == top_limit:
                            heapq.heappop(top5)
                        # add the current element into the sorted heap
                        heapq.heappush(top5, (cpu_time, command_name))  # IMPORTANT: heapq need time as first index!
                
                # Maybe with this only range we are already full, and we start from higher, so we can stop now
                if len(top5) >= top_limit:
                    break
            # IMPORTANT: heapq need to be pop to be sorted!
            top5_sorted = [heapq.heappop(top5) for _ in xrange(len(top5))]
            top5_sorted = [(n, v / 1000.0) for (v, n) in top5_sorted]  # must be returned into s and inverse time,name -> name,time
            top5_sorted.reverse()  # need bigger first
            
            logger.debug('%s top%s execution time %.3fs (loop over %s ranges and %s elements)' % (CHAPTER_EXECUTOR_STATS, top_limit, time.time() - t0, nb_range_look, nb_elements_look))
            
            return top5_sorted
    
    
    def get_stats(self):
        t0 = time.time()
        with self._lock:
            stats = {}
            total = 0
            for _range, range_entry in self._cmd_cpu_usage.iteritems():
                nb_entry = len(range_entry['cpu_times'])
                stats[_range] = [nb_entry, 0]
                total += nb_entry
            if total == 0:
                total = 1
            for _range, stat_entry in stats.iteritems():
                stat_entry[1] = stat_entry[0] * 100 / total
            
            logger.debug('%s Compute  "Checks per CPU running time" : %.3fs (on a total of %s checks)' % (CHAPTER_EXECUTOR_STATS, time.time() - t0, total))
            return stats
    
    
    # As we are stacking timeouts we need to clean them sometimes
    # * timeout_by_dates => just drop old entries, easy one
    # * timeout_occurrences => need to loop over it, if all occurrences are old
    #                         drop the entry, if not, only drop occurrences no more need
    def clean_timeouts(self):
        start = time.time()
        now = int(start)
        clean_date = now - self._keep_timeout_time
        with self._lock:
            nb_commands = len(self._timeouts_occurrences)
            all_dates = self._timeouts_by_date.keys()
            need_to_del_dates = [date for date in all_dates if date < clean_date]
            for date in need_to_del_dates:
                del self._timeouts_by_date[date]
            command_names = self._timeouts_occurrences.keys()  # NOT iterkeys() as we del in place
            for command_name in command_names:
                command_occurrences = self._timeouts_occurrences[command_name]
                # already (!?) No more occurrences, delete it
                if len(command_occurrences) == 0:
                    del self._timeouts_occurrences[command_name]
                    continue
                # If the more recent is too old, do not need to loop
                if command_occurrences[-1] < clean_date:
                    del self._timeouts_occurrences[command_name]
                    continue
                # Just clean old occurrences
                self._timeouts_occurrences[command_name] = [date for date in command_occurrences if date >= clean_date]
        logger.info('%s Clean checks in timeouts structure in %.3fs (before clean: %s commands in timeouts, after clean: %s)' % (CHAPTER_EXECUTOR_STATS, time.time() - start, nb_commands, len(self._timeouts_occurrences)))
    
    
    # Add a timeout entry, we are keeping top 5 entries
    # but we must have the number of timeout occurrences for a period
    # so:
    # * not a new entry: add the time in the occurrences
    # * new command: delete an old one
    def add_timeout(self, command_name, execution_time, timeout_date):
        with self._lock:
            if timeout_date not in self._timeouts_by_date:
                self._timeouts_by_date[timeout_date] = {}
            self._timeouts_by_date[timeout_date][command_name] = execution_time
            if command_name not in self._timeouts_occurrences:
                self._timeouts_occurrences[command_name] = []
            self._timeouts_occurrences[command_name].append(timeout_date)
    
    
    # We will look at dates from newer to older in self._timeouts_by_date
    # and when we have enough elements, skip it
    # we will find nb occurrences in the self._timeouts_occurrences
    def get_timeout_stats(self):
        t0 = time.time()
        with self._lock:
            r = []
            timeout_dates = self._timeouts_by_date.keys()
            timeout_dates.sort()
            # take newer first
            timeout_dates.reverse()
            nb_founded = 0
            already_added = set()
            for timeout_date in timeout_dates:
                commands_in_timeout = self._timeouts_by_date[timeout_date]
                # not need for sorting here, it's on hte same second, if the check
                # need it, it can do it itself
                for command_name, execution_time in commands_in_timeout.iteritems():
                    # Maybe it's a command we did have just before, skip it
                    if command_name in already_added:
                        continue
                    nb_occurrences = len(self._timeouts_occurrences.get(command_name, []))
                    if nb_occurrences == 0:  # What? the structure have incoherency?
                        continue
                    r.append((command_name, execution_time, timeout_date, nb_occurrences))
                    already_added.add(command_name)
                    nb_founded += 1
                    # If we already found our elements, do not need to take more in this sec
                    if nb_founded == 5:
                        break
                # If we already have found our element, do not need to look at previous sec
                if nb_founded == 5:
                    break
            logger.debug('%s Compute  "Checks in timeouts" stats : %.3fs in a total of %s commands in timeouts' % (CHAPTER_EXECUTOR_STATS, time.time() - t0, len(self._timeouts_occurrences)))
        return r
