#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2013-2019
# This file is part of Shinken Enterprise, all rights reserved.
import shutil
import sys
import time
import json
import os
import threading
import traceback
import signal
from string import digits

PY3 = sys.version_info >= (3,)
if PY3:
    basestring = str

GATHERER_LOG_FILE = '/var/log/shinken/gatherer.log'
GATHERER_PID_FILE = '/var/run/shinken/gatherer.pid'

BYTES_PER_SECTOR = 512
IOSTATS_FILE = '/tmp/__check_graphite_iostats.tmp'

IOSTATS_COLLECTOR_PIDFILE = "/opt/graphite/storage/iostats_collector.pid"

graphite_count_metrics_thread = None

DEFAULT_UMASK = 0022
# Set umask to avoid problems when creating files
os.umask(DEFAULT_UMASK)

from shinkensolutions.lib_checks.graphite import GraphiteMetricsCounter
from shinken.util import set_process_name
from shinken.log import logger, get_chapter_string, get_section_string
from shinken.vmware_stats import VMWare_Stats_Compute

logger.register_local_log(GATHERER_LOG_FILE)
logger.setLevel('INFO')
logger.set_name('gatherer')
logger.set_human_format()

if os.environ.get('GATHERER_DEBUG', '0') == '1':
    logger.setLevel('DEBUG')

BASE_PROCESS_NAME = 'shinken-gatherer     [ Main daemon ]'

CHAPTER_DAEMON = get_chapter_string('DAEMON')
CHAPTER_IO_STATS = get_chapter_string('IO-STATS')


class IoStats(object):
    def __init__(self):
        self.previous_raw = {}
        self.previous_time = 0
        
        # Columns for disk entry in /proc/diskstats
        self.columns_disk = ['major', 'minor', 'device', 'reads', 'reads_merged', 'read_sectors', 'read_ms', 'writes', 'writes_merged', 'write_sectors', 'write_ms', 'cur_ios', 'total_io_ms', 'total_io_weighted_ms']
        # We don't care about theses fields
        # NOTE: write_ms and read_ms are over the sleep time, not sure about what it means
        self.columns_to_del_in_raw = ('major', 'minor', 'cur_ios', 'total_io_weighted_ms', 'read_ms', 'write_ms')
        self.data = {}
        if os.path.exists(IOSTATS_FILE):
            try:
                with open(IOSTATS_FILE, 'r') as f:
                    self.data = json.loads(f.read())
            except (ValueError, IOError):
                pass
    
    
    def _get_disk_stats(self):
        file_path = '/proc/diskstats'
        result = {}
        
        # ref: http://lxr.osuosl.org/source/Documentation/iostats.txt
        
        # columns_partition = ['major', 'minor', 'device', 'reads', 'rd_sectors', 'writes', 'wr_sectors']
        
        with open(file_path, 'r') as f:
            lines = f.readlines()
        for line in lines:
            if line == '':
                continue
            split = line.split()
            if len(split) == len(self.columns_disk):
                columns = self.columns_disk
            # elif len(split) == len(columns_partition):
            #    columns = columns_partition
            else:
                # No match, drop partitions too
                continue
            
            data = dict(zip(columns, split))
            
            device_name = data['device']
            
            # If there is a number in the device, we drop (don't look at partition)
            # NOTE: car in digit is faster than regexp re.search('\d+', value)
            # (⌐■_■)==ε╦╤─   regexp
            if any(char in digits for char in device_name):
                continue
            
            for key in data:
                if key != 'device':
                    data[key] = int(data[key])
            # We don't care about some raw fields
            for k in self.columns_to_del_in_raw:
                del data[k]
            
            result[device_name] = data
        
        return result
    
    
    def compute_linux_disk_stats(self, new_raw_stats, diff_time):
        r = {}
        for (device, new_stats) in new_raw_stats.items():
            old_stats = self.previous_raw.get(device, None)
            # A new disk did spawn? wait a loop to compute it
            if old_stats is None:
                continue
            r[device] = {}
            for (k, new_v) in new_stats.items():
                old_v = old_stats[k]
                
                # String= device name, but we already have it in the key path
                if isinstance(old_v, basestring):
                    continue
                # Some columns are finally computed in /s (diff/time)
                elif k in ('reads', 'reads_merged', 'writes', 'writes_merged'):
                    this_type_consumed = int((new_v - old_v) / float(diff_time))
                    r[device][k + '/s'] = this_type_consumed
                # Sectors are transformed into bytes/s
                elif k == 'read_sectors':
                    computed_v = int(BYTES_PER_SECTOR * (new_v - old_v) / float(diff_time))
                    r[device]['read_bytes/s'] = computed_v
                elif k == 'write_sectors':
                    computed_v = int(BYTES_PER_SECTOR * (new_v - old_v) / float(diff_time))
                    r[device]['write_bytes/s'] = computed_v
                # Time are trasnformed into % activity
                # NOTE: ms=> s = *1000
                #       percent= *100
                elif k == 'total_io_ms':
                    computed_v = int(100 * (new_v - old_v) / float(diff_time * 1000))
                    r[device][r'util%'] = computed_v
        return r
    
    
    def launch(self):
        
        if not sys.platform.startswith('linux'):  # linux2 on python2, linux on python3
            return False
        
        logger.debug('%s Starting the get all disks stats' % CHAPTER_IO_STATS)
        
        new_stats = self._get_disk_stats()
        new_time = time.time()
        # First loop: do a 1s loop an compute it, to directly have results
        if self.previous_time == 0:
            self.previous_time = time.time()
            self.previous_raw = new_stats
            time.sleep(1)
            new_stats = self._get_disk_stats()
            new_time = time.time()
        
        # So compute the diff
        iostats = self.compute_linux_disk_stats(new_stats, new_time - self.previous_time)
        self.previous_raw = new_stats
        self.previous_time = new_time
        
        return iostats
    
    
    def dump(self, iostats):
        for device, value in iostats.iteritems():
            if not self.data.get(device, None):
                self.data[device] = []
            self.data[device].append(value['util%'])
            
            if len(self.data[device]) > 60:
                try:
                    self.data[device].pop(0)
                except IndexError:
                    pass
        
        logger.debug('%s Saving iostats file %s' % (CHAPTER_IO_STATS, IOSTATS_FILE))
        try:
            with open('%s.new' % IOSTATS_FILE, 'w') as f:
                f.write(json.dumps(self.data))
            shutil.move('%s.new' % IOSTATS_FILE, IOSTATS_FILE)
            logger.debug('%s The iostats file was saved to %s' % (CHAPTER_IO_STATS, IOSTATS_FILE))
        except Exception as exp:
            logger.error('%s Cannot save io stats file %s: %s' % (CHAPTER_IO_STATS, IOSTATS_FILE, exp))


def _clean_pid_file():
    try:
        os.unlink(GATHERER_PID_FILE)
    except Exception:
        pass


def _check_pid_file(pid_file):
    my_pid = str(os.getpid())
    if os.path.exists(pid_file):
        try:
            with open(pid_file, 'r') as f:
                pid_in_file = int(f.read())
                if pid_in_file and pid_is_running(pid_in_file):
                    logger.info('%s The gatherer is already running as pid %s. Bailing out.' % (CHAPTER_DAEMON, pid_in_file))
                    exit(0)
        except Exception:
            pass
    with open(pid_file, 'w') as f:
        logger.info('%s Starting the gatherer as the process pid %s.' % (CHAPTER_DAEMON, my_pid))
        f.write(my_pid)


def pid_is_running(pid):
    try:
        os.kill(pid, 0)
    except OSError:
        return False
    else:
        return True


# Note: we will compute each minute the number of files inside the graphite directory
def do_graphite_count_metrics_thread():
    # logger.info('Starting the graphite metric count thread')
    graphite_counter = GraphiteMetricsCounter()
    # Generate the first count by listing directories
    while True:
        set_process_name('%s [currently updating metrology metric count...]' % BASE_PROCESS_NAME)
        logger.debug('%s Start computing graphite counters.' % CHAPTER_DAEMON)
        graphite_counter.update_count()
        set_process_name('%s' % BASE_PROCESS_NAME)
        time.sleep(60)


def assert_graphite_count_metrics_thread():
    global graphite_count_metrics_thread
    if graphite_count_metrics_thread is None or not graphite_count_metrics_thread.is_alive():
        graphite_count_metrics_thread = threading.Thread(target=do_graphite_count_metrics_thread, name="do_graphite_count_metrics_thread")
        graphite_count_metrics_thread.daemon = True
        graphite_count_metrics_thread.start()


# Before this version there was the IOSTATS_COLLECTOR_PIDFILE file
# if so, kill the old process, and delete the pid file
def _check_old_pid_file():
    if os.path.exists(IOSTATS_COLLECTOR_PIDFILE):
        try:
            with open(IOSTATS_COLLECTOR_PIDFILE) as f:
                old_pid = int(f.read().strip())
                logger.info('%s We did detect the old script process as pid %s' % (CHAPTER_DAEMON, old_pid))
            os.unlink(IOSTATS_COLLECTOR_PIDFILE)
            os.kill(old_pid, 9)  # HEAD SHOOT
            logger.info('%s The old script was stopped successful.' % CHAPTER_DAEMON)
        except Exception as exp:
            logger.error('%s Cannot stop the old script process: %s' % (CHAPTER_DAEMON, exp))


def sigint_handler(signal, frame):
    logger.info('%s Receiving a stop (signal %s). Exiting.' % (CHAPTER_DAEMON, signal))
    _clean_pid_file()
    sys.exit(0)


for sig in (signal.SIGTERM, signal.SIGINT, signal.SIGUSR1, signal.SIGUSR2):
    signal.signal(sig, sigint_handler)


def main():
    _check_old_pid_file()
    
    _check_pid_file(GATHERER_PID_FILE)
    
    iostats = IoStats()
    last_threads_check = 0
    
    # IMPORTANT: we CANNOT do VMWare stuff in a thread, it will just segfault
    #            ==> IN THE MAIN THREAD
    logger.debug('%s Creating VMWare stats object.' % CHAPTER_DAEMON)
    vmware_stats_writer = VMWare_Stats_Compute()
    logger.debug('%s Starting requesting VMWare stats.' % CHAPTER_DAEMON)
    vmware_stats_writer.update_stats()
    
    while True:
        # if the graphite count did crash, do not hammer the start as it will count files in
        # graphite directory
        now = time.time()
        if now > last_threads_check + 60:
            assert_graphite_count_metrics_thread()
        
        _iostats = iostats.launch()
        iostats.dump(_iostats)
        
        logger.debug('%s Starting requesting VMWare stats.' % CHAPTER_DAEMON)
        vmware_stats_writer.update_stats()
        
        logger.debug('%s Gatherer is running.' % CHAPTER_DAEMON)
        time.sleep(1)


if __name__ == '__main__':
    try:
        main()
    except KeyboardInterrupt:
        logger.info('%s Exiting gatherer.' % CHAPTER_DAEMON)
        _clean_pid_file()
        sys.exit(0)
    except Exception as exp:
        logger.error('%s Did have a unknown exception: %s. Exiting the gatherer.' % (CHAPTER_DAEMON, traceback.format_exc()))
