# CSVHelper.py - Data handling routines developed by Optibrium for integration
# with StarDrop software
# Use of this code is subject to the terms of the StarDrop License Agreement and
# use, copying or redistribution of this file for use by other persons or
# organisations is prohibited
#
# Copyright(C) Optibrium Ltd 2017
# stardrop-support@optibrium.com

"""
Module provides some functions to assist in parsing CSV type data into a
StarDrop data set.
"""
import datetime
from time import strptime
import app
import string
import io
import csv
from collections import defaultdict


class ColumnInfo:
    """
    A small class to represent a column of information to be loaded into StarDrop.
    """

    def __init__(self, name, index):
        """
        Create a ColumnInfo object with a name and column index.

        @param: name: The name of the column to appear in StarDrop.
        @param: index: The column index of the columns data in the CVS data.
        """
        self.name = name
        self.columnIndex = index
        self.errorIndex = -1

    def __repr__(self):
        return "Name: %s, index %i, error %i" % (self.name, self.columnIndex, self.errorIndex)


def checkType(values, name, config):
    """
    Deduce the desired type from a column of data. Assumes data is continuous by
    default.

    @param: values: A list of strings representing the data in the column.
    @param: name: The name of the column.
    @param: config: A configuration module that can be used to control the process.
    @returns: A StarDrop data type (cont, mol, or string)
    """
    # check for molecules/structures
    rv = app.cont
    if name.upper().find("STRUCT") != -1 or name.upper().find("SMILES") != -1:
        rv = app.mol
        return rv
    # check for numerical values - check first TYPE_CHECK_DEPTH, and look for
    # at least TYPE_CHECK_FRACTION to be numbers
    count = 0.0
    numbers = 0.0
    index = 0
    while count < config.TYPE_CHECK_DEPTH:
        if index < len(values):
            # check for a null - count as valid number
            if values[index] is None or values[index] == '':
                pass
            else:
                count += 1.0
                try:
                    v = float(values[index])
                    numbers += 1.0
                except ValueError:
                    if values[index].lower() == 'invalid': # count this entry as
                        # representing a number column with invalid entries
                        numbers += 1.0
                    elif len(values[index]) > 0 and values[index][0] in ('<', '>', '='):  # count a EQ, GT or LT operator as
                        # being a number
                        try:
                            val = float(values[index][1:])
                            numbers += 1.0
                        except ValueError:
                            pass
                    pass
            index += 1
        else:
            break
    if count > 0:
        if numbers / count >= config.TYPE_CHECK_FRACTION:
            rv = app.cont
        else:
            if entryIsDateTime(values, config):
                rv = app.datetime
            else:
                rv = app.string

    return rv


def entryIsDateTime(values, config):
    count = 0.0
    date_entries = 0.0
    for value in values:
        if count >= config.TYPE_CHECK_DEPTH:
            break
        if hasattr(config, 'DATE_FORMAT'):
            try:
                datetime.datetime.strptime(value, config.DATE_FORMAT)
                date_entries += 1.0
            except ValueError:
                if value == 'invalid':  # count this entry as representing a number column with invalid entries
                    date_entries += 1.0
        count += 1.0

    if count > 0:
        if date_entries / count >= config.TYPE_CHECK_FRACTION:
            return True
        else:
            return False
    return False


def extractResult(result):
    """
    Extract a value and a qualifier from an input string.

    @param: result: The inpiut string.
    @returns: A tuple of qualifier and value (as a float)
    """
    # if the data is empty or invalid at this point an exception will be thrown
    # thrown and caught in loadData
    qual = '\x00'
    value = 0.0
    main = result.rstrip(string.ascii_letters)
    if main[0] == '<':
        qual = '\x01'
        main = main.lstrip('<')
        main = main.lstrip()
    elif main[0] == '>':
        qual = '\x02'
        main = main.lstrip('>')
        main = main.lstrip()
    elif main[0] == '=':
        main = main.lstrip('=')
        main = main.lstrip()
    value = float(main)
    return qual, value


def extractValueFromList(results):
    """
    Get a data entry from a list of values by calculating a mean.

    @param: results: The values as strings.
    @returns: A numeric data entry containing the average value, or an invalid entry.
    """
    count = 0
    total = 0.0
    for r in results:
        try:
            value = float(r.lstrip().rstrip())
            total += value
            count += 1
        except:
            pass
    if count > 0:
        return app.Dataset.cont(total / count, 0.0)
    else:
        return app.Dataset.createInvalidEntry(app.cont)


def createContEntry(entry, errorEntry='0.0'):
    """
    Create a numeric entry from strings for the value and the error.

    @param: value: The value as a string , which may include a qualifier.
    @param: errorEntry: The error value as a string.
    @returns: A numeric data entry for a StarDrop data set.
    """
    value = 0.0
    error = 0.0
    qualifier = '\x00'
    try:
        error = float(errorEntry)
        value = float(entry)
    except:
        results = entry.split(',')
        if len(results) > 1:
            # print "Entry contains a list"
            return extractValueFromList(results)
        qualifier, value = extractResult(results[0].lstrip().rstrip())
        pass
    return (12 + 2 * 8 + 1, (True, [], []), (qualifier, value, error))


def createDataSet(output, config):
    """
    Take the content of a csv file or csv-like output from another source,
    and parse into a StarDrop data set.

    @param: output: The csv data as a string.
    @param: config: A configuration module that can be used to control the process.
    @returns: A python representation of a StarDrop data set.
    """
    # python2 expects the string passed to StringIO to be unicode
    try: 
        output = unicode(output, 'utf-8')
    except NameError:
        pass
    
    with io.StringIO(output) as csv_buffer:
        dict_reader = csv.DictReader(csv_buffer, delimiter=',', quotechar='"')

        headers = list()
        i = 0
        for header in dict_reader.fieldnames:
            headers.append(ColumnInfo(header, i))
            i += 1

        dict = defaultdict(list)
        for row in dict_reader:
            for (key, val) in row.items():
                dict[key].append(val)
        table = list()
        for header in dict_reader.fieldnames:
            table.append(dict[header])

        try:
            import ColumnFormatter
            headers = ColumnFormatter.formatColumns(headers)
        except:
            pass

        # now build the columns from the data in the table
        ds = app.Dataset()
        ds.cols = []
        for header in headers:
            col = []
            if len(table[:-1]) > 0:
                type = app.cont
                if header.errorIndex == -1:
                    type = checkType(table[header.columnIndex], header.name, config)
                details = ''
                try:
                    if type == app.cont:
                        details = ((0.0, 0.0), (0, 0))
                    ds.columns.append(
                        app.Dataset.meta(header.name.rstrip('"').lstrip('"'), type, details))
                    if header.errorIndex == -1:
                        for entry in table[header.columnIndex]:
                            if entry == None:
                                entry = ''
                            if type == app.mol:
                                col.append(app.Dataset.molecule(entry))
                            elif type == app.string:
                                col.append(app.Dataset.string(entry))
                            else:
                                try:
                                    col.append(createContEntry(entry))
                                except Exception as e:
                                    col.append(app.Dataset.createInvalidEntry(app.cont))
                                    if str(e).find("Entry contains a list") != -1:
                                        raise
                    else:
                        for value, error in zip(table[header.columnIndex], table[header.errorIndex]):
                            try:
                                col.append(createContEntry(value, error))
                            except:
                                col.append(app.Dataset.createInvalidEntry(app.cont))
                except Exception as e:
                    # add as a string column instead
                    ds.columns.pop()
                    details = ''
                    col = []
                    ds.columns.append(
                        app.Dataset.meta(header.name.rstrip('"').lstrip('"').decode('latin-1'), app.string, details))
                    for entry in table[header.columnIndex]:
                        if entry == None:
                            entry = ''
                        col.append(app.Dataset.string(entry))
            else:
                ds.columns.append(
                    app.Dataset.meta(header.name.rstrip('"').lstrip('"').decode('latin-1'), app.string, ''))
            ds.cols.append(col)

        return ds
