import os, time, fnmatch, socket, errno
from os.path import isdir, isfile, join, exists, splitext, basename, realpath
import whisper
from graphite.remote_storage import RemoteStore
from django.conf import settings
from graphite.logger import log
import glob
import threading

try:
    import rrdtool
except ImportError:
    rrdtool = False

try:
    import gzip
except ImportError:
    gzip = False

try:
    import cPickle as pickle
except ImportError:
    import pickle

DATASOURCE_DELIMETER = '::RRD_DATASOURCE::'

# ELEMENTS_MAPPING:
#   host_name:
#      'uuid' => uuid of the host
#      'checks':
#          'service description':
#             'uuid' => uuid of the Check
ELEMENTS_MAPPING = {
    # EXAMPLE/
    # 'serveur_linux_shinken': {
    #     'uuid'  : 'd41ed7d463e011e88ecd080027f6d105',
    #     'checks': {
    #         'Kernel_Stats': {
    #             'uuid': 'c2971d2c5ad911e58cc5080027f08538',
    #         },
    #         'Load_Average': {
    #             'uuid': 'c297260a5ad911e58cc5080027f08538',
    #         },
    #     },
    # },
}

ELEMENTS_MAPPING_LOCK = threading.RLock()

MONGODB_CONNECTION = None
MONGODB_DATABASE_NAME = 'shinken'
MONGODB_URI = 'mongodb://localhost/?w=1&fsync=false'
MONGODB_USE_SSH = False
MONGODB_SSH_USER = 'shinken'
MONGODB_SSH_KEYFILE = '/opt/graphite/conf/id_rsa'
MONGODB_SSH_TUNNEL_TIMEOUT = 5
MONGODB_CONFIGURATION_FILE = '/opt/graphite/conf/mongodb.conf'

CACHE_INVALIDATION_FILE = '/opt/graphite/storage/whisper/.cacheinvalidation'
CACHE_INVALIDATION_DATE = 0

MOST_RECENT_MONITORING_START_TIME = 0  # we get all sla info data, but each time we only ask
LAST_MONGO_QUERY_TIME = 0  # do not query mongo too much
MONGO_QUERY_MIN_INTERVAL = 10  # query max every 10s

LOG_FLAG_FILE = '/opt/graphite/storage/whisper/.apache_graphite_host_filter_log'
LOG_FLAG_FILE_LAST_CHECK = 0
LOG_FLAG_FILE_FILTER = '--no-filter--'

graphite_banner = '[ APACHE(pid=%6d) / GRAPHITE ]' % os.getpid()


def _get_mongodb_collection():
    global MONGODB_CONNECTION, MONGODB_DATABASE_NAME, MONGODB_URI, MONGODB_USE_SSH, MONGODB_SSH_USER, MONGODB_SSH_KEYFILE, MONGODB_SSH_TUNNEL_TIMEOUT
    if MONGODB_CONNECTION is not None:
        return getattr(MONGODB_CONNECTION, MONGODB_DATABASE_NAME).sla_info
    from shinkensolutions.ssh_mongodb.sshtunnelmongomgr import mongo_by_ssh_mgr
    if os.path.isfile(MONGODB_CONFIGURATION_FILE):
        from ConfigParser import ConfigParser, NoOptionError
        parser = ConfigParser()
        parser.read(MONGODB_CONFIGURATION_FILE)
        
        MONGODB_URI = parser.get('mongodb', 'URI')
        MONGODB_DATABASE_NAME = parser.get('mongodb', 'DATABASE')
        # NOTE: theses properties are only after 2.06.01, so can be missing in the file
        try:
            MONGODB_USE_SSH = (parser.get('mongodb', 'USE_SSH_TUNNEL') == '1')
        except NoOptionError:
            pass
        try:
            MONGODB_SSH_USER = parser.get('mongodb', 'SSH_USER')
        except NoOptionError:
            pass
        try:
            MONGODB_SSH_KEYFILE = parser.get('mongodb', 'SSH_KEYFILE')
        except NoOptionError:
            pass
        try:
            MONGODB_SSH_TUNNEL_TIMEOUT = int(parser.get('mongodb', 'SSH_TUNNEL_TIMEOUT'))
        except NoOptionError:
            pass
        
        log.info('%s Reading mongodb parameters %s from %s' % (graphite_banner, str(locals().keys()), MONGODB_CONFIGURATION_FILE))
    
    con_result = mongo_by_ssh_mgr.get_connection(
        MONGODB_URI,
        use_ssh=MONGODB_USE_SSH,
        ssh_keyfile=MONGODB_SSH_KEYFILE,
        ssh_user=MONGODB_SSH_USER,
        ssh_tunnel_timeout=MONGODB_SSH_TUNNEL_TIMEOUT,
        requestor='graphite',
    )
    MONGODB_CONNECTION = con_result.get_connection()
    return getattr(MONGODB_CONNECTION, MONGODB_DATABASE_NAME).sla_info


def _reset_cache():
    global ELEMENTS_MAPPING, MOST_RECENT_MONITORING_START_TIME, LAST_MONGO_QUERY_TIME
    with ELEMENTS_MAPPING_LOCK:
        ELEMENTS_MAPPING.clear()
        LAST_MONGO_QUERY_TIME = 0
        MOST_RECENT_MONITORING_START_TIME = 0


def _look_at_cache_invalidation():
    global CACHE_INVALIDATION_DATE, CACHE_INVALIDATION_FILE
    
    # If the path do not exists, do nothing and wait for a new conf
    if not os.path.exists(CACHE_INVALIDATION_FILE):
        _reset_cache()
        return
    
    stats = os.stat(CACHE_INVALIDATION_FILE)
    last_modification_time = stats.st_mtime
    if last_modification_time != CACHE_INVALIDATION_DATE:
        CACHE_INVALIDATION_DATE = last_modification_time
        log.info('%s Resetting the cache because the invalidation file %s did changed meaning a new configuration was pushed.' % (graphite_banner, CACHE_INVALIDATION_FILE))
        _reset_cache()


FAKE_HOST_CHECK = {'uuid': '__HOST__'}


def _read_host_name(element):
    global MOST_RECENT_MONITORING_START_TIME
    monitoring_start_time = element['monitoring_start_time']
    # log.info('MONITORING START TIME: %s %s' % (monitoring_start_time, MOST_RECENT_MONITORING_START_TIME))
    if monitoring_start_time > MOST_RECENT_MONITORING_START_TIME:
        MOST_RECENT_MONITORING_START_TIME = monitoring_start_time
        # log.info('%s Update host/check mapping:: updating most recent monitoring start to reduce query size to %s' % (graphite_banner, MOST_RECENT_MONITORING_START_TIME))
    host_name = element['host_name'].replace('.', '_').replace(' ', '_').replace('.', '_')
    if isinstance(host_name, str):
        host_name = host_name.decode('utf8', 'ignore')
    return host_name


def _log_about(host_name, line):
    global LOG_FLAG_FILE_LAST_CHECK, LOG_FLAG_FILE_FILTER
    if not os.path.exists(LOG_FLAG_FILE):
        return
    now = int(time.time())
    if now > LOG_FLAG_FILE_LAST_CHECK + 10:  # re-read the filter log file every 10s
        with open(LOG_FLAG_FILE, 'r') as f:
            old_filter = LOG_FLAG_FILE_FILTER
            new_filter = f.read().strip()
            if new_filter != '' and old_filter != new_filter:
                log.info('%s Updating log filter from %s to %s' % (graphite_banner, old_filter, LOG_FLAG_FILE_FILTER))
                LOG_FLAG_FILE_FILTER = new_filter
            LOG_FLAG_FILE_LAST_CHECK = now
    if LOG_FLAG_FILE_FILTER in host_name:
        log.info(line)


def _update_hosts_checks_mapping():
    with ELEMENTS_MAPPING_LOCK:
        _do_update_hosts_checks_mapping()


# same but with lock
def _do_update_hosts_checks_mapping():
    global LAST_MONGO_QUERY_TIME, MOST_RECENT_MONITORING_START_TIME
    now = int(time.time())
    
    # Check if the cache should be invalidated (new conf)
    _look_at_cache_invalidation()
    
    # We get back in time? reset all
    if now < LAST_MONGO_QUERY_TIME:
        LAST_MONGO_QUERY_TIME = 0
    
    # Maybe it's just too short since last query, skip it
    if now < LAST_MONGO_QUERY_TIME + MONGO_QUERY_MIN_INTERVAL:
        # log.info('%s _update_hosts_checks_mapping:: SKIPING MONGGO QUERY, too short interval' % graphite_banner)
        return
    
    LAST_MONGO_QUERY_TIME = now
    
    t0 = time.time()
    # Ok we can update now, let's grok objects since the MOST_RECENT_MONITORING_START_TIME
    # to do not dump all every time
    col = _get_mongodb_collection()
    new_elements = list(col.find({'monitoring_start_time': {'$gt': MOST_RECENT_MONITORING_START_TIME}}))
    log.info('%s Update host/check mapping:: did get %d new elements from date %s (and in %3fs)' % (graphite_banner, len(new_elements), MOST_RECENT_MONITORING_START_TIME, time.time() - t0))
    
    new_hosts = [element for element in new_elements if element['service_description'] == '']
    new_checks = [element for element in new_elements if element['service_description'] != '']
    
    # SLA INFO entries looks like:
    # {u'monitoring_start_time': 1527631200,
    # u'service_description': u'Disks Stats',
    # u'_id': u'd41ed7d463e011e88ecd080027f6d105-c29735965ad911e58cc5080027f08538',
    # u'host_name': u'serveur linux shinken',
    # u'check_interval': 5}
    t0 = time.time()
    # Beware: the hosts are all migrated, but some old checks can stay, so first we need to be sure about the hosts

    for element in new_hosts:
        host_name = _read_host_name(element)
        host_uuid = element['_id']
        host_entry = ELEMENTS_MAPPING.get(host_name, None)
        if host_entry is None:  # New element?
            # NOTE: we always have the __HOST__ check entry, that do not exists in the SLA database, but in the graphite data
            host_entry = {'uuid'  : host_uuid,
                          'checks': {
                              '__HOST__': FAKE_HOST_CHECK,
                          },
                          }
            ELEMENTS_MAPPING[host_name] = host_entry
            _log_about(host_name, '%s [FILTER=%s] Update host/check mapping:: new host detected: %s, uuid=%s' % (graphite_banner, LOG_FLAG_FILE_FILTER, host_name, host_uuid))
        
        # Be sure to always update the uuid
        host_entry['uuid'] = host_uuid
    
    for element in new_checks:
        host_name = _read_host_name(element)
        
        host_uuid = element['_id']
        service_description = element.get('service_description', '').replace('.', '_').replace(' ', '_').replace('.', '_')
        if isinstance(service_description, str):
            service_description = service_description.decode('utf8', 'ignore')
        
        host_uuid, service_uuid = host_uuid.split('-', 1)  # DO NOT TAKE host_uuid as maybe this is an old check
        # log.info('LOOKING AT HOST %s in cache: %s' % (host_name, ELEMENTS_MAPPING.keys()))
        host_entry = ELEMENTS_MAPPING.get(host_name, None)
        if host_entry is None:  # New element?
            # NOTE: we always have the __HOST__ check entry, that do not exists in the SLA database, but in the graphite data
            host_entry = {'uuid'  : host_uuid,
                          'checks': {
                              '__HOST__': FAKE_HOST_CHECK,
                          },
                          }
            ELEMENTS_MAPPING[host_name] = host_entry
            _log_about(host_name, '%s [FILTER=%s] Update host/check mapping:: new host detected: %s, uuid=%s' % (graphite_banner, LOG_FLAG_FILE_FILTER, host_name, host_uuid))
        if host_uuid != host_entry['uuid']:  # seems like an old check entry, do not use it
            _log_about(host_name, '%s [FILTER=%s] Update host/check mapping:: Old check name is detected, skipping it: %s/%s %s %s' % (graphite_banner, LOG_FLAG_FILE_FILTER, host_name, service_description, host_uuid, service_uuid))
            continue
        
        service_entry = host_entry['checks'].get(service_description, None)
        if service_entry is None:  # new service
            service_entry = {'uuid': service_uuid}
        host_entry['checks'][service_description] = service_entry
        _log_about(host_name, '%s [FILTER=%s] Update host/check mapping:: New check add: %s/%s, uuid=%s-%s' % (graphite_banner, LOG_FLAG_FILE_FILTER, host_name, service_description, host_uuid, service_uuid))
    
    nb_hosts = len(ELEMENTS_MAPPING)
    nb_checks = 0
    for host_entry in ELEMENTS_MAPPING.itervalues():
        nb_checks += (len(host_entry['checks']) - 1)  # remove the __host__ entry
    log.info('%s [FILTER=%s] Update host/check mapping:: Total number of hosts/checks in mapping cache from SLA database: %s/%s' % (graphite_banner, LOG_FLAG_FILE_FILTER, nb_hosts, nb_checks))
    log.info('%s Update host/check mapping:: Cache update took: %.3fs' % (graphite_banner, time.time() - t0))


class Store:
    def __init__(self, directories=[], remote_hosts=[]):
        self.directories = directories
        self.remote_hosts = remote_hosts
        self.remote_stores = [RemoteStore(host) for host in remote_hosts if not is_local_interface(host)]
        
        if not (directories or remote_hosts):
            raise ValueError("directories and remote_hosts cannot both be empty")


    def is_relay(self):
        log.info('[ NEW CONFIGURATION / MIGRATION ] [ RELAY ] [DEBUG] Looking for relay nodes %s %s' % (self.remote_stores, self.remote_hosts))
        return len(self.remote_stores) != 0


    def do_migration_relay(self, migration_table):
        retval = {
            'error_count': 0,
            'ok_count'   : 0,
            'nb_migrated': 0,
            'last_error' : '',
            'server_ok':[],
        }
        for store in self.remote_stores:
            s_retval = store.relay_migration_table(migration_table)
            for k in ('error_count', 'ok_count', 'nb_migrated', 'last_error', 'server_ok'):
                retval[k] += s_retval[k]
        log.info('[ NEW CONFIGURATION / MIGRATION ] [ RELAY ] return of our relay nodes : %s' % (retval))
        return retval
    
    
    def get(self, metric_path):  # Deprecated
        for directory in self.directories:
            relative_fs_path = metric_path.replace('.', '/') + '.wsp'
            absolute_fs_path = join(directory, relative_fs_path)
            
            if exists(absolute_fs_path):
                return WhisperFile(absolute_fs_path, metric_path)
    
    
    def _is_alive(self):
        if len(self.remote_stores) == 0:
            return True
        
        return True in [r.available for r in self.remote_stores]
    
    
    def find(self, query, search_by_name=True, metric_listing=False):
        # IMPORTANT: if we have no more remote server available, we return an error, so the interface can warn
        # the user than something is wrong.
        if not self._is_alive():
            raise Exception('No remote servers are available')
        
        if is_pattern(query):
            
            for match in self.find_all(query, search_by_name=search_by_name, metric_listing=metric_listing):
                yield match
        
        else:
            match = self.find_first(query, search_by_name=search_by_name, metric_listing=metric_listing)
            
            if match is not None:
                yield match
    
    
    def find_first(self, query, search_by_name=True, metric_listing=False):
        # Search locally first
        for directory in self.directories:
            for match in find(directory, query, search_by_name=search_by_name, metric_listing=metric_listing):
                return match
        
        # If nothing found earch remotely
        remote_requests = [r.find(query, search_by_name=search_by_name, metric_listing=metric_listing) for r in self.remote_stores if r.available]
        
        for request in remote_requests:
            for match in request.get_results():
                return match
    
    
    def find_all(self, query, search_by_name=True, metric_listing=False):
        # Start remote searches
        found = set()
        remote_requests = [r.find(query, search_by_name=search_by_name, metric_listing=metric_listing) for r in self.remote_stores if r.available]
        
        # Search locally
        for directory in self.directories:
            for match in find(directory, query, search_by_name=search_by_name, metric_listing=metric_listing):
                if match.metric_path not in found:
                    yield match
                    found.add(match.metric_path)
        
        # Gather remote search results
        for request in remote_requests:
            for match in request.get_results():
                
                if match.metric_path not in found:
                    yield match
                    found.add(match.metric_path)


def is_local_interface(host):
    if ':' in host:
        host = host.split(':', 1)[0]
    
    for port in xrange(1025, 65535):
        try:
            sock = socket.socket()
            sock.bind((host, port))
            sock.close()
        
        except socket.error, e:
            if e.args[0] == errno.EADDRNOTAVAIL:
                return False
            elif e.errno == -2:  # Name or service not known
                return False
            else:
                continue
        
        else:
            return True
    
    raise Exception("Failed all attempts at binding to interface %s, last exception was %s" % (host, e))


def is_pattern(s):
    return '*' in s or '?' in s or '[' in s or '{' in s


def is_escaped_pattern(s):
    for symbol in '*?[{':
        i = s.find(symbol)
        if i > 0:
            if s[i - 1] == '\\':
                return True
    return False


def find_escaped_pattern_fields(pattern_string):
    pattern_parts = pattern_string.split('.')
    for index, part in enumerate(pattern_parts):
        if is_escaped_pattern(part):
            yield index


def _find_all_hosts_matching(server_expr, absolute_root):
    t0 = time.time()
    _update_hosts_checks_mapping()
    
    res = set()
    with ELEMENTS_MAPPING_LOCK:
        matching_hosts = fnmatch.filter(ELEMENTS_MAPPING.keys(), server_expr)
        _log_about(server_expr, '%s [CACHE READ] Looking for host that match %s => %s' % (graphite_banner, server_expr, ', '.join(matching_hosts)))
        for name in matching_hosts:
            host_uuid = ELEMENTS_MAPPING[name]['uuid']
            # If the uuid directory is missing, means that this host got no metrics
            pth = os.path.join(absolute_root, host_uuid)
            if not os.path.exists(pth):
                continue
            res.add((name, host_uuid))
    log.info('%s [PERF] Look at all host matching a pattern: %.3fs' % (graphite_banner, time.time() - t0))
    # log.info('_find_all_hosts_matching:: result => %s' % (res))
    return res


def _find_matching_host_check_paths(matching_hosts, check_patern, absolute_root):
    t0 = time.time()
    res = []
    with ELEMENTS_MAPPING_LOCK:
        for (host_name, host_uuid) in matching_hosts:
            checks = ELEMENTS_MAPPING[host_name]['checks']
            _log_about(host_name, '%s [CACHE READ] In cache checks for host:: %s => %s' % (graphite_banner, host_name, ', '.join(checks.keys())))
            matching_checks_names = fnmatch.filter(checks.keys(), check_patern)
            _log_about(host_name, '%s [CACHE READ] In cache check that match pattern %s for host %s => %s' % (graphite_banner, check_patern, host_name, ', '.join(matching_checks_names)))
            for matching_check_name in matching_checks_names:
                check_uuid = checks[matching_check_name]['uuid']
                pth = os.path.join(host_uuid, check_uuid)
                
                # If this check got no metrics, we are not interested about it
                dir_path = os.path.join(absolute_root, pth)
                if not os.path.exists(dir_path):
                    continue
                res.append((host_name, matching_check_name, pth))
    log.info('_find_matching_host_check_paths:: time: %.3f' % (time.time() - t0))
    return res


def __protect_search_patern(patern):
    patern = patern.replace('[[]', '__PROTECTED1__').replace('[', '__OPEN_1__')
    patern = patern.replace('[]]', '__PROTECTED2__').replace(']', '__CLOSE_1__')
    patern = patern.replace('__PROTECTED1__', '[[]').replace('__OPEN_1__', '[[]')
    patern = patern.replace('__PROTECTED2__', '[]]').replace('__CLOSE_1__', '[]]')
    return patern


# Generates nodes beneath root_dir matching the given pattern
# - Search by name:
#     * True: (default mode) we are using the mongo mapping to known which uuid are interesting
#     * False: using the old standard way, we are only talking to direct uuids to disks
# - metric_listing:
#     * True: we are giving only the end metric name so external tool can use it in listing
#     * False: (default) we are giving the full name of the metric, with host+check+metric, for graph legends
def find(root_dir, pattern, search_by_name=True, metric_listing=False):
    clean_pattern = pattern.replace('\\', '')
    pattern_parts = clean_pattern.split('.')
    absolute_root = os.path.abspath(root_dir)
    
    # Do not query mongo just for a void rrd directory
    if root_dir.startswith('/opt/graphite/storage/rrd/'):
        return
    
    _log_about(pattern, '%s [ METRICS FIND ] [FILTER=%s] FIND:: root_dir=%s find by name=%s   metric listing=%s and patern=%s' % (graphite_banner, LOG_FLAG_FILE_FILTER, root_dir, search_by_name, metric_listing, pattern))
    
    # Len pattern_parts:
    # 1: listing of hosts
    # 2: listing of checks
    # 3+: get real metrics
    
    # old school
    if not search_by_name:
        finded_paths = list(_find(root_dir, pattern_parts))
        # log.info(' find: %s %s' % (root_dir, pattern))
        
        for absolute_path in finded_paths:
            results = _get_nodes_from_absolute_path(root_dir, pattern, pattern_parts, absolute_path)
            for result in results:
                yield result
        return
    
    # Now we want to manage all by name
    # Listing hosts
    if len(pattern_parts) == 1:
        host_patern = pattern_parts[0]
        host_patern = __protect_search_patern(host_patern)
        matching_hosts = _find_all_hosts_matching(host_patern, absolute_root)
        _log_about(host_patern, '%s [ METRICS FIND ] [FILTER=%s] LIST HOST:: find:: matching_hosts: %s => %s' % (graphite_banner, LOG_FLAG_FILE_FILTER, host_patern, matching_hosts))
        # Looks like: 'serveur linux shinken', 'd41ed7d463e011e88ecd080027f6d105'
        for (host_name, host_uuid) in matching_hosts:
            absolute_path = os.path.join(absolute_root, host_uuid)
            _log_about(host_name, '%s [ METRICS FIND ] [FILTER=%s] Listing host: %s (%s)' % (graphite_banner, LOG_FLAG_FILE_FILTER, absolute_path, host_name))
            yield Branch(absolute_path, host_name)
        return
    
    # not host.check.metric? bail out
    if len(pattern_parts) == 2:
        host_patern = __protect_search_patern(pattern_parts[0])
        
        matching_hosts = _find_all_hosts_matching(host_patern, absolute_root)
        _log_about(host_patern, '%s [ METRICS FIND ] [FILTER=%s] Listing host/checks metrics:: find:: matching_hosts: %s => %s' % (graphite_banner, LOG_FLAG_FILE_FILTER, host_patern, matching_hosts))
        # Looks like: 'serveur linux shinken', 'd41ed7d463e011e88ecd080027f6d105'
        
        check_patern = __protect_search_patern(pattern_parts[1])
        
        matching_host_check_paths = _find_matching_host_check_paths(matching_hosts, check_patern, absolute_root)
        _log_about(host_patern,
                   '%s [ METRICS FIND ] [FILTER=%s] [DEBUG-SUPPORT ONLY] find:: matching_host_check_paths: host pattern=%s check pattern=%s => %s' % (graphite_banner, LOG_FLAG_FILE_FILTER, host_patern, check_patern, matching_host_check_paths))
        # looks like ('server 1', 'Kernel Stats', 'd41ed7d463e011e88ecd080027f6d105/c2971d2c5ad911e58cc5080027f08538')
        for (host_name, check_name, relative_path) in matching_host_check_paths:
            absolute_path = os.path.join(absolute_root, relative_path)
            yield Branch(absolute_path, check_name)
        
        return
    
    # Ok now grok real metrics ^^
    host_patern = __protect_search_patern(pattern_parts[0])  # always exiting
    matching_hosts = _find_all_hosts_matching(host_patern, absolute_root)
    # log.info('find:: matching_hosts: %s => %s' % (host_patern, matching_hosts))
    
    check_patern = __protect_search_patern(pattern_parts[1])
    matching_host_check_paths = _find_matching_host_check_paths(matching_hosts, check_patern, absolute_root)
    _log_about(host_patern,
               '%s [ METRICS FIND ] [FILTER=%s] [DEBUG-SUPPORT ONLY] find:: matching_host_check_paths: host pattern=%s check pattern=%s => %s' % (graphite_banner, LOG_FLAG_FILE_FILTER, host_patern, check_patern, matching_host_check_paths))
    
    metric_patern = '.'.join(pattern_parts[2:])
    
    # matching_host_check_paths looks like list of ('server 1', 'Kernel Stats', 'd41ed7d463e011e88ecd080027f6d105/c2971d2c5ad911e58cc5080027f08538')
    for (host_name, check_name, relative_path) in matching_host_check_paths:
        whisper_dir = os.path.join(absolute_root, relative_path)
        whisper_file_patern = whisper_dir + os.sep + metric_patern + '.wsp'
        # log.info('GLOB:: %s' % whisper_file_patern)
        whisper_files = glob.glob(whisper_file_patern)
        # log.info('GLOB RESULT: %s' % whisper_files)
        # log.info('Look for whisper files patern: %s => %s => founded: %s' % (metric_patern, whisper_file_patern, whisper_files))
        for whisper_file_path in whisper_files:
            short_metric_name = os.path.splitext(os.path.basename(whisper_file_path))[0]  # pgfault_by_s for example
            # If we are in a listing mode, we give the short version
            if metric_listing:
                name = short_metric_name
            else:  # full read access, so here for legend
                name = host_name + '.' + check_name + '.' + short_metric_name
            _log_about(host_patern, '%s [ METRICS FIND ] [FILTER=%s] Giving back a whisper data: %s => %s => %s' % (graphite_banner, LOG_FLAG_FILE_FILTER, metric_patern, name, whisper_file_path))
            yield WhisperFile(whisper_file_path, name)
    return


def _get_nodes_from_absolute_path(root_dir, pattern, pattern_parts, absolute_path):
    if DATASOURCE_DELIMETER in basename(absolute_path):
        (absolute_path, datasource_pattern) = absolute_path.rsplit(DATASOURCE_DELIMETER, 1)
    else:
        datasource_pattern = None
    
    relative_path = absolute_path[len(root_dir):].lstrip('/')
    metric_path = relative_path.replace('/', '.')
    
    # Preserve pattern in resulting path for escaped query pattern elements
    metric_path_parts = metric_path.split('.')
    for field_index in find_escaped_pattern_fields(pattern):
        metric_path_parts[field_index] = pattern_parts[field_index].replace('\\', '')
    metric_path = '.'.join(metric_path_parts)
    
    if isdir(absolute_path):
        yield Branch(absolute_path, metric_path)
    
    elif isfile(absolute_path):
        (metric_path, extension) = splitext(metric_path)
        
        if extension == '.wsp':
            yield WhisperFile(absolute_path, metric_path)
        
        elif extension == '.gz' and metric_path.endswith('.wsp'):
            metric_path = splitext(metric_path)[0]
            yield GzippedWhisperFile(absolute_path, metric_path)
        
        elif rrdtool and extension == '.rrd':
            rrd = RRDFile(absolute_path, metric_path)
            
            if datasource_pattern is None:
                yield rrd
            
            else:
                for source in rrd.getDataSources():
                    if fnmatch.fnmatch(source.name, datasource_pattern):
                        yield source


def _find(current_dir, patterns):
    """Recursively generates absolute paths whose components underneath current_dir
    match the corresponding pattern in patterns"""
    pattern = patterns[0]
    patterns = patterns[1:]
    entries = os.listdir(current_dir)
    
    subdirs = [e for e in entries if isdir(join(current_dir, e))]
    matching_subdirs = match_entries(subdirs, pattern)
    
    if len(patterns) == 1 and rrdtool:  # the last pattern may apply to RRD data sources
        files = [e for e in entries if isfile(join(current_dir, e))]
        rrd_files = match_entries(files, pattern + ".rrd")
        
        if rrd_files:  # let's assume it does
            datasource_pattern = patterns[0]
            
            for rrd_file in rrd_files:
                absolute_path = join(current_dir, rrd_file)
                yield absolute_path + DATASOURCE_DELIMETER + datasource_pattern
    
    if patterns:  # we've still got more directories to traverse
        for subdir in matching_subdirs:
            
            absolute_path = join(current_dir, subdir)
            for match in _find(absolute_path, patterns):
                yield match
    
    else:  # we've got the last pattern
        files = [e for e in entries if isfile(join(current_dir, e))]
        matching_files = match_entries(files, pattern + '.*')
        
        for basename in matching_subdirs + matching_files:
            yield join(current_dir, basename)


def _deduplicate(entries):
    yielded = set()
    for entry in entries:
        if entry not in yielded:
            yielded.add(entry)
            yield entry


def match_entries(entries, pattern):
    # First we check for pattern variants (ie. {foo,bar}baz = foobaz or barbaz)
    v1, v2 = pattern.find('{'), pattern.find('}')
    
    if v1 > -1 and v2 > v1:
        variations = pattern[v1 + 1:v2].split(',')
        variants = [pattern[:v1] + v + pattern[v2 + 1:] for v in variations]
        matching = []
        
        for variant in variants:
            matching.extend(fnmatch.filter(entries, variant))
        
        return list(_deduplicate(matching))  # remove dupes without changing order
    
    else:
        matching = fnmatch.filter(entries, pattern)
        matching.sort()
        return matching


def _protect_string(s):
    if isinstance(s, str):
        return s.decode('utf8', 'ignore')
    return s


# Node classes
class Node:
    context = {}
    
    
    def __init__(self, fs_path, metric_path):
        self.fs_path = _protect_string(fs_path)
        self.metric_path = _protect_string(metric_path)
        self.real_metric = _protect_string(metric_path)
        self.name = self.metric_path.split('.')[-1]
    
    
    def getIntervals(self):
        return []
    
    
    def updateContext(self, newContext):
        raise NotImplementedError()


class Branch(Node):
    "Node with children"
    
    
    def fetch(self, startTime, endTime, search_by_name=True):
        "No-op to make all Node's fetch-able"
        return []
    
    
    def isLeaf(self):
        return False


class Leaf(Node):
    "(Abstract) Node that stores data"
    
    
    def isLeaf(self):
        return True


# Database File classes
class WhisperFile(Leaf):
    cached_context_data = None
    extension = '.wsp'
    
    
    def __init__(self, *args, **kwargs):
        Leaf.__init__(self, *args, **kwargs)
        real_fs_path = realpath(self.fs_path)
        
        if real_fs_path != self.fs_path:
            relative_fs_path = self.metric_path.replace('.', '/') + self.extension
            base_fs_path = realpath(self.fs_path[:-len(relative_fs_path)])
            relative_real_fs_path = real_fs_path[len(base_fs_path) + 1:]
            self.real_metric = relative_real_fs_path[:-len(self.extension)].replace('/', '.')
    
    
    def getIntervals(self):
        start = time.time() - whisper.info(self.fs_path)['maxRetention']
        end = max(os.stat(self.fs_path).st_mtime, start)
        return [(start, end)]
    
    
    def fetch(self, startTime, endTime, search_by_name=True):
        (timeInfo, values) = whisper.fetch(self.fs_path, startTime, endTime)
        return (timeInfo, values)
    
    
    @property
    def context(self):
        if self.cached_context_data is not None:
            return self.cached_context_data
        
        context_path = self.fs_path[:-len(self.extension)] + '.context.pickle'
        
        if exists(context_path):
            fh = open(context_path, 'rb')
            context_data = pickle.load(fh)
            fh.close()
        else:
            context_data = {}
        
        self.cached_context_data = context_data
        return context_data
    
    
    def updateContext(self, newContext):
        self.context.update(newContext)
        context_path = self.fs_path[:-len(self.extension)] + '.context.pickle'
        
        fh = open(context_path, 'wb')
        pickle.dump(self.context, fh)
        fh.close()


class GzippedWhisperFile(WhisperFile):
    extension = '.wsp.gz'
    
    
    def fetch(self, startTime, endTime, search_by_name=True):
        if not gzip:
            raise Exception("gzip module not available, GzippedWhisperFile not supported")
        
        fh = gzip.GzipFile(self.fs_path, 'rb')
        try:
            return whisper.file_fetch(fh, startTime, endTime)
        finally:
            fh.close()
    
    
    def getIntervals(self):
        if not gzip:
            return []
        
        fh = gzip.GzipFile(self.fs_path, 'rb')
        try:
            start = time.time() - whisper.__readHeader(fh)['maxRetention']
            end = max(os.stat(self.fs_path).st_mtime, start)
        finally:
            fh.close()
        return [(start, end)]


class RRDFile(Branch):
    def getDataSources(self):
        info = rrdtool.info(self.fs_path)
        if 'ds' in info:
            return [RRDDataSource(self, datasource_name) for datasource_name in info['ds']]
        else:
            ds_keys = [key for key in info if key.startswith('ds[')]
            datasources = set(key[3:].split(']')[0] for key in ds_keys)
            return [RRDDataSource(self, ds) for ds in datasources]
    
    
    def getRetention(self):
        info = rrdtool.info(self.fs_path)
        if 'rra' in info:
            rras = info['rra']
        else:
            # Ugh, I like the old python-rrdtool api better..
            rra_count = max([int(key[4]) for key in info if key.startswith('rra[')]) + 1
            rras = [{}] * rra_count
            for i in range(rra_count):
                rras[i]['pdp_per_row'] = info['rra[%d].pdp_per_row' % i]
                rras[i]['rows'] = info['rra[%d].rows' % i]
        
        retention_points = 0
        for rra in rras:
            points = rra['pdp_per_row'] * rra['rows']
            if points > retention_points:
                retention_points = points
        
        return retention_points * info['step']


class RRDDataSource(Leaf):
    def __init__(self, rrd_file, name):
        Leaf.__init__(self, rrd_file.fs_path, rrd_file.metric_path + '.' + name)
        self.rrd_file = rrd_file
    
    
    def getIntervals(self):
        start = time.time() - self.rrd_file.getRetention()
        end = max(os.stat(self.rrd_file.fs_path).st_mtime, start)
        return [(start, end)]
    
    
    def fetch(self, startTime, endTime):
        startString = time.strftime("%H:%M_%Y%m%d+%Ss", time.localtime(startTime))
        endString = time.strftime("%H:%M_%Y%m%d+%Ss", time.localtime(endTime))
        
        if settings.FLUSHRRDCACHED:
            rrdtool.flushcached(self.fs_path, '--daemon', settings.FLUSHRRDCACHED)
        (timeInfo, columns, rows) = rrdtool.fetch(self.fs_path, 'AVERAGE', '-s' + startString, '-e' + endString)
        colIndex = list(columns).index(self.name)
        rows.pop()  # chop off the latest value because RRD returns crazy last values sometimes
        values = (row[colIndex] for row in rows)
        
        return (timeInfo, values)


# Exposed Storage API
LOCAL_STORE = Store(settings.DATA_DIRS)
STORE = Store(settings.DATA_DIRS, remote_hosts=settings.CLUSTER_SERVERS)
