# dataset_import_export.py - helper functions to import/export datasets between StarDrop and Python
# Use of this code is subject to the terms of the StarDrop License Agreement and use, copying or redistribution of this
# file for use by other persons or organisations is prohibited
# Copyright(C) Optibrium Ltd 2022
# stardrop-support@optibrium.com
#

import re
from datetime import datetime
from abc import ABC, abstractmethod
from typing import Any, Union, Optional

import app
import stardropunits

import stardrop_helper_functions.logfile_handling as logfile
from stardrop_helper_functions.utils import get_column_type_str, get_qualifier_str
from stardrop_helper_functions.qt_widgets import PromptGUIs
from stardrop_helper_functions.qt_widgets import DisplayGUIs

"""

The functions and classes within this script are supposed to compose an API which makes it easier to import/export
datasets from the StarDrop GUI. This custom script is not meant to be called directly from the StarDrop Custom Scripts
menu. It does not yet support all available data types.

Available Data types:

    app.string
    app.cont
    app.datetime
    app.image
    app.mol
    app.cat

"""


class Parser(ABC):
    """
    This abstract class is meant to serve as a Template for creating new Parser classes.
    Basically, for each data type which can be read into Python using this script, there
    should be a corresponding Parser class for that data type. Each Parser class should
    have five defined behaviors:

        parse_stardrop_header: A method to read the header of a column from a StarDrop-compatible
                               format, extract any relevant metadata/formatting for that column,
                               and put it into a suitable python dictionary (this method isn't about
                               reading the column name, there's already convenient OOP methods for that
                               provided by the app.Dataset class)

        create_stardrop_header: A method to convert the python dictionary from parse_stardrop_header() back
                                into a column header format which can be placed within a StarDrop-compatible
                                dataset (before that StarDrop-compatible dataset is parsed into binary)

        parse_stardrop_entry: A method to read one entry (i.e., a tuple with metadata) from a column in the
                              StarDrop dataset and convert it into a value which is more easily stored and
                              modified by Python methods than a tuple

        create_stardrop_entry: A method to convert a value back into a column entry (i.e., a tuple with metadata)
                               for the StarDrop-compatible dataset

        create_invalid_stardrop_entry: A method to convert an empty value into a column entry for the StarDrop-
                                       compatible dataset

    These are all abstract static methods to be given concrete implementations by any child Parser class
    which inherits from this abstract parent Parser class. The type hints on these abstract methods are
    more of a recommendation on how to implement the abstract methods rather than strict rules which have to
    be followed. However, any deviations from these type hints should be explicitly hardcoded into the
    _parse_stardrop_dataset_to_python_dataset() and _parse_python_dataset_to_stardrop_dataset() methods.

    """

    @staticmethod
    @abstractmethod
    def parse_stardrop_header(stardrop_dataset: app.Dataset,
                              column_idx: int) -> Optional[dict[str, Union[float, int, str]]]:
        """
        Parse the StarDrop column header's metadata tuple into a mapping which should be easier to work with.
        For specific data-type columns, this mapping might be empty due to a lack of metadata properties.
        @param: stardrop_dataset: The unparsed StarDrop dataset
        @column_idx: column_idx: The index of the column whose header has to be parsed
        @returns: A mapping of the column's metadata properties to readable property names
        """

        pass

    @staticmethod
    @abstractmethod
    def create_stardrop_header(format_dict: Optional[dict[str, Union[float, int, str]]],
                               column_name: str) -> tuple[Any, ...]:
        """
        Parse a given column's name and mapping of its metadata properties into a StarDrop-compatible tuple.
        For specific data-type columns, the mapping might be empty due to a lack of metadata properties.
        @param: format_dict: A mapping of the column's metadata properties to readable property names
        @param: column_name: The name of the column whose metadata properties need to be converted
        @returns: A StarDrop-compatible tuple representing the column header
        """

        pass

    @staticmethod
    @abstractmethod
    def parse_stardrop_entry(stardrop_dataset: app.Dataset,
                             column_idx: int,
                             row_idx: int) -> Any:
        """
        Parse the StarDrop dataset entry into a value which should easier to work with.
        @param: stardrop_dataset: The unparsed StarDrop dataset
        @param: column_idx: The column index of the dataset entry
        @param: row_idx: The row index of the dataset entry
        @returns: The parsed entry from the StarDrop dataset
        """

        pass

    @staticmethod
    @abstractmethod
    def create_stardrop_entry(value: Any) -> tuple[Any, ...]:
        """
        Parse a value into a StarDrop dataset entry.
        @param: value: The value to be entered into the dataset
        @returns: A StarDrop dataset entry corresponding to the given value
        """

        pass

    @staticmethod
    @abstractmethod
    def create_invalid_stardrop_entry(invalid_entry_msg: str) -> tuple[Any, ...]:
        """
        Generate an invalid entry for a StarDrop dataset.
        @param: The message to display in the invalid entry within the StarDrop dataset
        @returns: A StarDrop-compatible tuple corresponding to an invalid entry
        """

        pass


class StringParser(Parser):
    """
    A concrete implementation of the abstract Parser Class for string-type columns.
    """

    @staticmethod
    def parse_stardrop_header(stardrop_dataset: app.Dataset,
                              column_idx: int) -> None:
        """
        Parse the StarDrop column header's metadata tuple into a mapping which should be easier to work with.
        For string-type columns, this mapping will be empty due to a lack of metadata properties.
        @param: stardrop_dataset: The unparsed StarDrop dataset
        @column_idx: column_idx: The index of the column whose header has to be parsed
        @returns: An empty mapping of the column's metadata properties to readable property names
        """

        return None

    @staticmethod
    def create_stardrop_header(format_dict: None,
                               column_name: str) -> tuple[Any, ...]:
        """
        Parse a given column's name and mapping of its metadata properties into a StarDrop-compatible tuple.
        For string-type, the mapping should be empty due to a lack of metadata properties.
        @param: format_dict: An empty mapping of the column's metadata properties to readable property names
        @param: column_name: The name of the column whose metadata properties need to be converted
        @returns: A StarDrop-compatible tuple representing the column header
        """

        return app.Dataset.meta(column_name, app.string)

    @staticmethod
    def parse_stardrop_entry(stardrop_dataset: app.Dataset,
                             column_idx: int,
                             row_idx: int) -> str:
        """
        Parse the StarDrop dataset entry into a string which should easier to work with.
        @param: stardrop_dataset: The unparsed StarDrop dataset
        @param: column_idx: The column index of the dataset entry
        @param: row_idx: The row index of the dataset entry
        @returns: The parsed string from the StarDrop dataset
        """

        entry = stardrop_dataset.stringval(column_idx, row_idx)

        return entry

    @staticmethod
    def create_stardrop_entry(value: str) -> tuple[Any, ...]:
        """
        Parse a string into a StarDrop dataset entry.
        @param: value: The value to be entered into the dataset
        @returns: A StarDrop dataset entry corresponding to the given string
        """

        return app.Dataset.string(value)

    @staticmethod
    def create_invalid_stardrop_entry(invalid_entry_msg: str) -> tuple[Any, ...]:
        """
        Generate an invalid string-type entry for a StarDrop dataset.
        @param: The message to display in the invalid entry within the StarDrop dataset
        @returns: A StarDrop-compatible tuple corresponding to an invalid string entry
        """

        return app.Dataset.createInvalidEntry(app.string, invalid_entry_msg)


class NumberParser(Parser):
    """
    A concrete implementation of the abstract Parser Class for numerical-type columns.
    """

    @staticmethod
    def parse_stardrop_header(stardrop_dataset: app.Dataset,
                              column_idx: int) -> dict[str, Union[float, int, str]]:
        """
        Parse the StarDrop column header's metadata tuple into a mapping which should be easier to work with.
        @param: stardrop_dataset: The unparsed StarDrop dataset
        @column_idx: column_idx: The index of the column whose header has to be parsed
        @returns: A mapping of the column's metadata properties to readable property names
        """

        ((minimum, maximum), (units, _), factor, display_mode, precision) = stardrop_dataset.columnDetails(column_idx)

        format_dict = \
            {'min'          : minimum,        # noqa: E203
             'max'          : maximum,        # noqa: E203
             'units'        : units,          # noqa: E203
             'display mode' : display_mode,   # noqa: E203
             'precision'    : precision}      # noqa: E203

        return format_dict

    @staticmethod
    def create_stardrop_header(format_dict: Optional[dict[str, Union[float, int, str]]],
                               column_name: str) -> tuple[Any, ...]:
        """
        Parse a given column's name and mapping of its metadata properties into a StarDrop-compatible tuple.
        @param: format_dict: A mapping of the column's metadata properties to readable property names
        @param: column_name: The name of the column whose metadata properties need to be converted
        @returns: A StarDrop-compatible tuple representing the column header
        """

        if format_dict:
            minimum      = format_dict['min']           # noqa: E221
            maximum      = format_dict['max']           # noqa: E221
            units        = format_dict['units']         # noqa: E221
            display_mode = format_dict['display mode']  # noqa: E221
            precision    = format_dict['precision']     # noqa: E221
        else:
            minimum      = 0.0                          # noqa: E221
            maximum      = 0.0                          # noqa: E221
            units        = stardropunits.OTHER          # noqa: E221
            display_mode = 'Default'                    # noqa: E221
            precision    = -1                           # noqa: E221

        return app.Dataset.createNumberHeader(column_name,
                                              units=units,
                                              min=minimum,
                                              max=maximum,
                                              display_mode=display_mode,
                                              precision=precision)

    @staticmethod
    def parse_stardrop_entry(stardrop_dataset: app.Dataset,
                             column_idx: int,
                             row_idx: int) -> tuple[int, int, str]:
        """
        Parse the StarDrop dataset entry into a (float value, float error) tuple which should easier to work with.
        @param: stardrop_dataset: The unparsed StarDrop dataset
        @param: column_idx: The column index of the dataset entry
        @param: row_idx: The row index of the dataset entry
        @returns: The parsed (int qualifier, float value, float error) tuple from the StarDrop dataset
        """

        value = float(stardrop_dataset.stringval(column_idx, row_idx))
        error = float(stardrop_dataset.stringerror(column_idx, row_idx))
        qualifier = get_qualifier_str(stardrop_dataset.get(column_idx, row_idx)[0])

        entry = (value, error, qualifier)

        return entry

    @staticmethod
    def create_stardrop_entry(value: tuple[int, int, str]) -> tuple[Any, ...]:
        """
        Parse a value into a StarDrop dataset entry.
        @param: value: The (int qualifier, float value, float error) tuple to be entered into the dataset
        @returns: A StarDrop dataset entry corresponding to the given (float value, float error) tuple
        """

        return app.Dataset.cont(value[0], value[1], value[2])

    @staticmethod
    def create_invalid_stardrop_entry(invalid_entry_msg: str) -> tuple[Any, ...]:
        """
        Generate an invalid number-type entry for a StarDrop dataset.
        @param: The message to display in the invalid entry within the StarDrop dataset
        @returns: A StarDrop-compatible tuple corresponding to an invalid numerical entry
        """

        return app.Dataset.createInvalidEntry(app.cont, invalid_entry_msg)


class DateParser(Parser):
    """
    A concrete implementation of the abstract Parser Class for date-type columns.
    """

    @staticmethod
    def parse_stardrop_header(stardrop_dataset: app.Dataset,
                              column_idx: int) -> dict[str, str]:
        """
        Parse the StarDrop column header's metadata tuple into a mapping which should be easier to work with.
        For specific data types, this mapping might be empty due to a lack of metadata properties.
        @param: stardrop_dataset: The unparsed StarDrop dataset
        @column_idx: column_idx: The index of the column whose header has to be parsed
        @returns: A mapping of the column's metadata properties to readable property names
        """

        date_format_str = stardrop_dataset.columnDetails(column_idx)[0]
        format_dict = {'date format': date_format_str}

        return format_dict

    @staticmethod
    def create_stardrop_header(format_dict: Optional[dict[str, str]],
                               column_name: str) -> tuple[Any, ...]:
        """
        Parse a given column's name and mapping of its metadata properties into a StarDrop-compatible tuple.
        For specific data types, the mapping might be empty due to a lack of metadata properties.
        @param: format_dict: A mapping of the column's metadata properties to readable property names
        @param: column_name: The name of the column whose metadata properties need to be converted
        @returns: A StarDrop-compatible tuple representing the column header
        """

        if format_dict:
            date_format_str = format_dict['date format']
        else:
            date_format_str = '%d-%b-%Y'

        return app.Dataset.meta(column_name, app.datetime, date_format_str)

    @staticmethod
    def parse_stardrop_entry(stardrop_dataset: app.Dataset,
                             column_idx: int,
                             row_idx: int) -> datetime.date:
        """
        Parse the StarDrop dataset entry into a date which should easier to work with.
        @param: stardrop_dataset: The unparsed StarDrop dataset
        @param: column_idx: The column index of the dataset entry
        @param: row_idx: The row index of the dataset entry
        @returns: The parsed date from the StarDrop dataset
        """

        entry = datetime.strptime(stardrop_dataset.get(column_idx, row_idx)[0], '%Y-%b-%d').date()

        return entry

    @staticmethod
    def create_stardrop_entry(value: datetime.date) -> tuple[Any, ...]:
        """
        Parse a date into a StarDrop dataset entry.
        @param: value: The date to be entered into the dataset
        @returns: A StarDrop dataset entry corresponding to the given date
        """

        return app.Dataset.date(value.strftime('%Y-%b-%d'))

    @staticmethod
    def create_invalid_stardrop_entry(invalid_entry_msg: str) -> tuple[Any, ...]:
        """
        Generate an invalid entry for a StarDrop dataset.
        @param: The message to display in the invalid entry within the StarDrop dataset
        @returns: A StarDrop-compatible tuple corresponding to an invalid date entry
        """

        return app.Dataset.createInvalidEntry(app.datetime, invalid_entry_msg)


class ImageParser(Parser):
    """
    A concrete implementation of the abstract Parser Class for image-type columns.
    """

    @staticmethod
    def parse_stardrop_header(stardrop_dataset: app.Dataset,
                              column_idx: int) -> None:
        """
        Parse the StarDrop column header's metadata tuple into a mapping which should be easier to work with.
        For image-type columns, this mapping will be empty due to a lack of metadata properties.
        @param: stardrop_dataset: The unparsed StarDrop dataset
        @column_idx: column_idx: The index of the column whose header has to be parsed
        @returns: An empty mapping of the column's metadata properties to readable property names
        """

        return None

    @staticmethod
    def create_stardrop_header(format_dict: None,
                               column_name: str) -> tuple[Any, ...]:
        """
        Parse a given column's name and mapping of its metadata properties into a StarDrop-compatible tuple.
        For image-type columns, the mapping should be empty due to a lack of metadata properties.
        @param: format_dict: A mapping of the column's metadata properties to readable property names
        @param: column_name: The name of the column whose metadata properties need to be converted
        @returns: A StarDrop-compatible tuple representing the column header
        """

        return app.Dataset.meta(column_name, app.image)

    @staticmethod
    def parse_stardrop_entry(stardrop_dataset: app.Dataset,
                             column_idx: int,
                             row_idx: int) -> str:
        """
        Parse the StarDrop dataset entry into a rolling url image provider string which should easier to work with.
        @param: stardrop_dataset: The unparsed StarDrop dataset
        @param: column_idx: The column index of the dataset entry
        @param: row_idx: The row index of the dataset entry
        @returns: The parsed rolling url image provider string from the StarDrop dataset
        """

        return stardrop_dataset.get(column_idx, row_idx)[1]

    @staticmethod
    def create_stardrop_entry(value: str) -> tuple[Any, ...]:
        """
        Parse a rolling url image provider string into a StarDrop dataset entry.
        @param: value: The rolling url image provider string to be entered into the dataset
        @returns: A StarDrop dataset entry corresponding to the given rolling url image provider string
        """

        return app.Dataset.image(value)

    @staticmethod
    def create_invalid_stardrop_entry(invalid_entry_msg: str) -> tuple[Any, ...]:
        """
        Generate an invalid image entry for a StarDrop dataset.
        @param: The message to display in the invalid entry within the StarDrop dataset
        @returns: A StarDrop-compatible tuple corresponding to an invalid image entry
        """

        return app.Dataset.createInvalidEntry(app.image, invalid_entry_msg)


class MoleculeParser(Parser):
    """
    A concrete implementation of the abstract Parser Class for molecular-type columns.
    """

    @staticmethod
    def parse_stardrop_header(stardrop_dataset: app.Dataset,
                              column_idx: int) -> None:
        """
        Parse the StarDrop column header's metadata tuple into a mapping which should be easier to work with.
        For molecular-type columns, this mapping will be empty due to a lack of metadata properties.
        @param: stardrop_dataset: The unparsed StarDrop dataset
        @column_idx: column_idx: The index of the column whose header has to be parsed
        @returns: A mapping of the column's metadata properties to readable property names
        """

        return None

    @staticmethod
    def create_stardrop_header(format_dict: None,
                               column_name: str) -> tuple[Any, ...]:
        """
        Parse a given column's name and mapping of its metadata properties into a StarDrop-compatible tuple.
        For molecular-type columns, the mapping should be empty due to a lack of metadata properties.
        @param: format_dict: An empty mapping of the column's metadata properties to readable property names
        @param: column_name: The name of the column whose metadata properties need to be converted
        @returns: A StarDrop-compatible tuple representing the column header
        """

        return app.Dataset.meta(column_name, app.mol)

    @staticmethod
    def parse_stardrop_entry(stardrop_dataset: app.Dataset,
                             column_idx: int,
                             row_idx: int,
                             preferred_mol_str_type: str,
                             appui) -> str:
        """
        Parse the StarDrop dataset entry into a SMILES/Molfile string which should easier to work with.
        This function explicitly deviates from the abstract Parser Class architecture.
        @param: stardrop_dataset: The unparsed StarDrop dataset
        @param: column_idx: The column index of the dataset entry
        @param: row_idx: The row index of the dataset entry
        @param: preferred_mol_str_type: Specification on whether molecular string should be SMILES or Molfiles
        @param: appui: The StarDrop GUI object
        @returns: The parsed SMILES/Molfile string from the StarDrop dataset
        """

        SMILES_preferred  = preferred_mol_str_type == 'SMILES'  # noqa: E221
        Molfile_preferred = preferred_mol_str_type == 'Molfile'

        if preferred_mol_str_type is not None:
            if not (SMILES_preferred or Molfile_preferred):
                logfile.record_and_raise_exception("The preferred molecular string type input to '_translate_molecular_column_entry_into_string()' must either be 'SMILES' or 'Molfile'.")  # noqa: E501

        column_entry = stardrop_dataset.getColumn(column_idx)[row_idx]
        SMILES = column_entry[2][0]
        Molfile = appui.getMolFileFromStarDropMolecule(column_entry[2][4][1]).split('$$$$')[0]

        mol_str_is_Molfile = Molfile.split('V2000\n')[1].split('M  END')[0].count('\n') > 0

        entry = Molfile if mol_str_is_Molfile and not SMILES_preferred else SMILES

        if Molfile_preferred and not mol_str_is_Molfile:
            entry = appui.getMolFileFromSmiles(entry).split('$$$$')[0]

        return entry

    @staticmethod
    def create_stardrop_entry(value: str, appui) -> tuple[Any, ...]:
        """
        Parse a SMILES/Molfile string into a StarDrop dataset entry.
        @param: value: The SMILES/Molfile string to be entered into the dataset
        @param: appui: The StarDrop GUI object
        @returns: A StarDrop dataset entry corresponding to the given SMILES/Molfile string
        """

        if value.count('\n') == 0:
            entry = app.Dataset.molecule(value)
        else:
            entry = app.Dataset.molecule('', appui.getStarDropMoleculeFromMolFile(value.split('\n')).data())

        return entry

    @staticmethod
    def create_invalid_stardrop_entry(invalid_entry_msg: str) -> tuple[Any, ...]:
        """
        Generate an invalid molecular entry for a StarDrop dataset.
        @param: The message to display in the invalid entry within the StarDrop dataset
        @returns: A StarDrop-compatible tuple corresponding to an invalid molecular entry
        """

        return app.Dataset.createInvalidEntry(app.mol, invalid_entry_msg)


class CategoryParser(Parser):
    """
    A concrete implementation of the abstract Parser Class for categorical-type columns.
    """

    @staticmethod
    def parse_stardrop_header(stardrop_dataset: app.Dataset,
                              column_idx: int) -> None:
        """
        Parse the StarDrop column header's metadata tuple into a mapping which should be easier to work with.
        For categorical-type columns, this mapping will be empty due to a lack of metadata properties.
        @param: stardrop_dataset: The unparsed StarDrop dataset
        @column_idx: column_idx: The index of the column whose header has to be parsed
        @returns: An empty mapping of the column's metadata properties to readable property names
        """

        return None

    @staticmethod
    def create_stardrop_header(format_dict: None,
                               column_name: str,
                               column: list[str]) -> tuple[tuple[Any, ...], dict[str, int]]:
        """
        Parse a given column's name and mapping of its metadata properties into a StarDrop-compatible tuple.
        This function explicitly deviates from the abstract Parser Class architecture.
        @param: format_dict: An empty mapping of the column's metadata properties to readable property names
        @param: column_name: The name of the column whose metadata properties need to be converted
        @param: column: The actual column for which the name and metadata properties are being converted into
                        a StarDrop column header
        @returns: A StarDrop-compatible tuple representing the column header
        """

        (column_details, known_categories_dict) = \
            app.Dataset.createCategoryHeader(column_name,
                                             [category for category in list(set(column)) if category is not None])

        return [column_details, known_categories_dict]

    @staticmethod
    def parse_stardrop_entry(stardrop_dataset: app.Dataset,
                             column_idx: int,
                             row_idx: int) -> str:
        """
        Parse the StarDrop dataset entry into a categorical string which should easier to work with.
        @param: stardrop_dataset: The unparsed StarDrop dataset
        @param: column_idx: The column index of the dataset entry
        @param: row_idx: The row index of the dataset entry
        @returns: The parsed categorical string from the StarDrop dataset
        """

        entry = stardrop_dataset.stringval(column_idx, row_idx)

        return entry

    @staticmethod
    def create_stardrop_entry(value: str,
                              known_categories_dict: dict[str, int]) -> tuple[Any, ...]:
        """
        Parse a categorical string into a StarDrop dataset entry.
        This function explicitly deviates from the abstract Parser Class architecture.
        @param: value: The categorical string to be entered into the dataset
        @param: known_categories_dict: A mapping of all of the known categories (for the column which this
                                       entry is being inserted into) to their corresponding integers
        @returns: A StarDrop dataset entry corresponding to the given categorical string
        """

        return app.Dataset.createCategoryEntry(value, known_categories_dict)

    @staticmethod
    def create_invalid_stardrop_entry(invalid_entry_msg: str) -> tuple[Any, ...]:
        """
        Generate an invalid categorical entry for a StarDrop dataset.
        @param: The message to display in the invalid entry within the StarDrop dataset
        @returns: A StarDrop-compatible tuple corresponding to an invalid categorical entry
        """

        return app.Dataset.createInvalidEntry(app.cat, invalid_entry_msg)


def _identify_ID_column(stardrop_dataset: app.Dataset,
                        column_types: dict[str, int],
                        ID_column_name_regular_expression: str) -> Optional[str]:
    """
    Automatically identify the ID column from the given StarDrop dataset. May return "None" if no
    pre-designated ID column exists and no other text column can easily be identified as an ID column.
    @param: stardrop_dataset: The unparsed StarDrop dataset
    @param: column_types: a dictionary mapping column names to data types
    @param: ID_column_name_regular_expression: A regular expression string used to identify which column
                                               name might correspond to the ID column if it isn't obvious
    @returns: The name of the identified ID column
    """

    id_column_name = None

    if stardrop_dataset.idColName:
        if stardrop_dataset.idColName in column_types:
            id_column_name = stardrop_dataset.idColName

        else:
            id_column_name_exists = stardrop_dataset.idColName in [stardrop_dataset.columnName(col_idx) for col_idx in range(stardrop_dataset.columnSz())]  # noqa: E501

            log_msg_prefix = 'The'
            log_msg_suffix = f'"{stardrop_dataset.idColName}" ' + ('amongst the imported columns' if id_column_name_exists else 'doesn\'t exist in this dataset')  # noqa: E501

    else:
        log_msg_prefix = 'No'
        log_msg_suffix = 'exists'

    if not id_column_name:

        logfile.record_warning(f'{log_msg_prefix} pre-designated ID column {log_msg_suffix}. Automatically searching for an ID column...')  # noqa: E501

        possible_id_column_names = \
            [column_name for column_name in list(column_types.keys()) if column_types[column_name] == app.string]

        if len(possible_id_column_names) > 1:

            logfile.record_info(f'Multiple possible ID columns were detected. Filtering out columns without the following regular expression: "{ID_column_name_regular_expression}"')  # noqa: E501

            pattern = re.compile(ID_column_name_regular_expression)

            possible_id_column_names = \
                [column_name for column_name in possible_id_column_names if pattern.search(column_name) is not None]

        if len(possible_id_column_names) == 1:
            id_column_name = possible_id_column_names[0]
            logfile.record_info(f'Automatically selected \'{id_column_name}\' as the ID column')

        else:
            logfile.record_warning(f'{"Multiple" if len(possible_id_column_names) > 1 else "No"} possible ID columns were detected. Could not automatically select any ID column.')  # noqa: E501

    return id_column_name


def _parse_stardrop_dataset_to_python_dataset(appui,
                                              ID_column_name_regular_expression: str,
                                              preferred_mol_str_type: str,
                                              dataset_name: str) -> dict[str, Any]:
    """
    Import a dataset with the specified name from the StarDrop GUI and then parse it into interpretable
    Python dictionaries and strings.
    @param: appui: The StarDrop GUI
    @param: ID_column_name_regular_expression: A regular expression used to identify which column name might
                                               correspond to the ID column if it isn't obvious
    @param: preferred_mol_str_type: Specification on whether molecular string should be SMILES or Molfiles
    @param: dataset_name: The name of the dataset which the user selected
    @returns: A parsed version of the StarDrop dataset which is convenient for Python
    """

    stardrop_dataset = app.Dataset()
    stardrop_dataset.parse(appui.importDS(dataset_name))

    dataset_specific_msg = f'the "{dataset_name}"' if dataset_name else 'a'
    logfile.record_info(f'Importing {dataset_specific_msg} Data Set from the StarDrop GUI into Python')

    columns = {}
    column_types = {}
    column_idxs = {}
    column_formats = {}
    refresh_keys = []

    for refresh_key in stardrop_dataset.refresh_keys:
        refresh_keys.append({'params'  : refresh_key[2],   # noqa: E203
                             'columns' : refresh_key[3]})  # noqa: E203

    parser_classes = \
        {app.string   :   StringParser,  # noqa: E203
         app.cont     :   NumberParser,  # noqa: E203
         app.datetime :     DateParser,  # noqa: E203
         app.mol      : MoleculeParser,  # noqa: E203
         app.cat      : CategoryParser,  # noqa: E203
         app.image    :    ImageParser}  # noqa: E203

    for column_idx in range(stardrop_dataset.columnSz()):

        column_type = stardrop_dataset.columnType(column_idx)
        column_name = stardrop_dataset.columnName(column_idx)

        if column_type in parser_classes:

            parser_class = parser_classes[column_type]

            # Hardcoded deviation for molecular-type columns
            if column_type == app.mol:
                entry_parsing_func = lambda stardrop_dataset, column_idx, row_idx : parser_class.parse_stardrop_entry(stardrop_dataset, column_idx, row_idx, preferred_mol_str_type, appui)  # noqa: E501, E731, E203
            else:
                entry_parsing_func = parser_class.parse_stardrop_entry

            column_types[column_name] = column_type
            column_idxs[column_name] = column_idx
            column_formats[column_name] = parser_class.parse_stardrop_header(stardrop_dataset, column_idx)
            columns[column_name] = \
                [entry_parsing_func(stardrop_dataset, column_idx, row_idx) if stardrop_dataset.getColumn(column_idx)[row_idx][1][0] else None for row_idx in range(stardrop_dataset.rowSz())]  # noqa: E501

        else:
            logfile.record_error(f'Could not import the following column type for the "{column_name}" column: "{get_column_type_str(column_type)}"')  # noqa: E501

    id_column_name = _identify_ID_column(stardrop_dataset, column_types, ID_column_name_regular_expression)

    py_dataset = \
        {'name'          : dataset_name,    # noqa: E203
         'column'        : columns,         # noqa: E203
         'column type'   : column_types,    # noqa: E203
         'column index'  : column_idxs,     # noqa: E203
         'column format' : column_formats,  # noqa: E203
         'id column name': id_column_name,  # noqa: E203
         'refresh keys'  : refresh_keys}    # noqa: E203

    return py_dataset


def _parse_python_dataset_to_stardrop_dataset(appui,
                                              py_dataset: dict[str, Any],
                                              invalid_entry_msg: str) -> app.Dataset:
    """
    Convert a python-parsed version of the StarDrop dataset into back into a data structure which is
    more compatible with the StarDrop client. The dataset must be packed before it is actually
    exported.
    @param: appui: The StarDrop GUI
    @param: py_dataset: A parsed version of the StarDrop dataset which is convenient for Python
    @param: The message to display in the invalid entry within the StarDrop dataset
    @returns: A StarDrop dataset which can be more easily exported to the StarDrop client
    """

    stardrop_dataset = app.Dataset()
    stardrop_dataset.idColName = py_dataset['id column name']
    stardrop_dataset.addRefreshKey({})

    parser_classes = \
        {app.string   :   StringParser,  # noqa: E203
         app.cont     :   NumberParser,  # noqa: E203
         app.datetime :     DateParser,  # noqa: E203
         app.mol      : MoleculeParser,  # noqa: E203
         app.cat      : CategoryParser,  # noqa: E203
         app.image    :    ImageParser}  # noqa: E203

    for column_name, column_idx in py_dataset['column index'].items():

        column        = py_dataset['column'       ][column_name]  # noqa: E202, E221
        column_type   = py_dataset['column type'  ][column_name]  # noqa: E202, E221
        column_format = py_dataset['column format'][column_name]

        if column_type in parser_classes:

            parser_class = parser_classes[column_type]

            # Hardcoded deviation for categorical-type columns
            if column_type == app.cat:

                [column_details, known_categories_dict] = parser_class.create_stardrop_header(column_format, column_name, column)   # noqa: E501
                entry_parsing_func = lambda python_value : parser_class.create_stardrop_entry(python_value, known_categories_dict)  # noqa: E501, E731, E203

            # Hardcoded deviation for molecular-type columns
            elif column_type == app.mol:

                column_details = parser_class.create_stardrop_header(column_format, column_name)
                entry_parsing_func = lambda python_value : parser_class.create_stardrop_entry(python_value, appui)  # noqa: E501, E731, E203

            else:

                column_details = parser_class.create_stardrop_header(column_format, column_name)
                entry_parsing_func = parser_class.create_stardrop_entry

            # Universal for all Parser Classes
            invalid_entry_parsing_func = parser_class.create_invalid_stardrop_entry

            stardrop_dataset.insertColumn(column_details,
                                          [entry_parsing_func(python_value) if python_value is not None else invalid_entry_parsing_func(invalid_entry_msg) for python_value in column],  # noqa: E501
                                          column_idx)

        else:
            logfile.record_warning(f"{column_name} could not be parsed since it has an unknown data type: '{get_column_type_str(column_type)}'")  # noqa: E501

    return stardrop_dataset


def import_stardrop_dataset(appui,
                            ID_column_name_regular_expression: str = '(I|i)(D|d)',
                            preferred_mol_str_type: Optional[str] = None,
                            dataset_name: str = '') -> Optional[dict[str, Any]]:
    """
    Import a dataset from the StarDrop GUI and then parse it into interpretable Python dictionaries and strings.
    Will return None if the "_select_dataset_by_name" function also returns None.
    @param: appui: The StarDrop GUI
    @param: ID_column_name_regular_expression: A regular expression used to identify which column name might
                                               correspond to the ID column if it isn't obvious
    @param: preferred_mol_str_type: Specification on whether molecular string should be SMILES or Molfiles
    @param: dataset_name: The name of the dataset which the user selected
    @returns: A parsed version of the StarDrop dataset which is convenient for Python
    """

    py_dataset = \
        _parse_stardrop_dataset_to_python_dataset(appui,
                                                  ID_column_name_regular_expression,
                                                  preferred_mol_str_type,
                                                  dataset_name) if len(appui.getDataSetNames()) > 0 else None

    if not py_dataset:
        logfile.record_error('No data sets were detected in the StarDrop Client. Cannot run script.')
        DisplayGUIs.warning_window('No data sets were detected in the StarDrop Client. Cannot run script.', 'No Data Sets Detected')

    return py_dataset


def export_stardrop_dataset(appui,
                            py_dataset: dict[str, Any],
                            invalid_entry_msg: str = ''):
    """
    Default behavior is to parse interpretable python dictionaries and strings into a StarDrop Dataset and then export
    it to the StarDrop GUI.
    @param: appui: The StarDrop GUI
    @param: py_dataset: A parsed version of the StarDrop dataset which is convenient for Python
    @param: The message to display in the invalid entry within the StarDrop dataset
    """

    dataset_name = py_dataset['name']

    logfile.record_info(f'Exporting the "{dataset_name}" Data Set from Python into the StarDrop GUI')

    appui.exportDS(dataset_name,
                   _parse_python_dataset_to_stardrop_dataset(appui,
                                                             py_dataset,
                                                             invalid_entry_msg).pack())


def export_stardrop_column(appui,
                           py_dataset: dict[str, Any],
                           new_column_names: Union[str, list[str]],
                           columns_to_remove: Optional[Union[str, list[str]]] = None,
                           rows_to_remove: Optional[Union[int, list[int]]] = None,
                           invalid_entry_msg: str = ''):
    """
    Parse interpretable python dictionaries and strings into a StarDrop Dataset and then export only user-selected
    columns to the StarDrop GUI. Will replace columns if the dataset already has columns with those names, but
    will otherwise simply add those columns if the dataset doesn't have them yet.
    @param: appui: The StarDrop GUI
    @param: py_dataset: A parsed version of the StarDrop dataset which is convenient for Python
    @param: new_column_names: A subset of column names to be exported into a dataset which is already loaded within
                              StarDrop instead of just exporting the entire dataset. The name of the python-parsed
                              dataset must match the name of the already existent StarDrop dataset.
    @param: columns_to_remove: The set of columns to remove from the StarDrop dataset
    @param: rows_to_remove: The set of rows to remove from the StarDrop dataset
    @param: The message to display in the invalid entry within the StarDrop dataset
    """

    planned_stardrop_dataset = _parse_python_dataset_to_stardrop_dataset(appui, py_dataset, invalid_entry_msg)

    active_stardrop_dataset = app.Dataset()
    active_stardrop_dataset.parse(appui.importDS(py_dataset['name']))

    if isinstance(new_column_names, list):
        if len(new_column_names) == 0:
            logfile.record_and_raise_exception('Must specify the names of the columns to be exported when only exporting columns to a data set.')  # noqa: E501
    elif isinstance(new_column_names, str):
        new_column_names = [new_column_names]

    if rows_to_remove:

        if isinstance(rows_to_remove, list):
            if len(rows_to_remove) == 0:
                logfile.record_and_raise_exception('Must actually specify the indices of the rows to be removed when removing rows from a dataset')  # noqa: E501
        elif isinstance(rows_to_remove, int):
            rows_to_remove = [rows_to_remove]

        rows_to_remove = sorted(rows_to_remove)

        max_row_to_be_removed = rows_to_remove[-1]
        max_row = active_stardrop_dataset.rowSz()
        if max_row_to_be_removed > max_row:
            logfile.record_and_raise_exception(f'The max row to be removed is greater than then number of rows in the dataset:\n\nmax row: {max_row:1d}\nmax row to be removed: {max_row_to_be_removed:1d}')  # noqa: E501

    if columns_to_remove:

        if isinstance(columns_to_remove, list):
            if len(columns_to_remove) == 0:
                logfile.record_and_raise_exception('Must actually specify the names of the columns to be removed when removing columns from a dataset')  # noqa: E501
        elif isinstance(columns_to_remove, str):
            columns_to_remove = [columns_to_remove]

        active_stardrop_dataset_column_names = [active_stardrop_dataset.columnName(col_idx) for col_idx in range(active_stardrop_dataset.columnSz())]  # noqa: E501

        if any([column_name not in active_stardrop_dataset_column_names for column_name in columns_to_remove]):
            logfile.record_and_raise_exception('Some columns to be removed do not exist in this dataset: ' + ', '.join([column_name for column_name in columns_to_remove if column_name in active_stardrop_dataset_column_names]))  # noqa: E501

    if py_dataset['name']:
        # PROBABLY TO BE UNNESTED WHEN py_dataset['name'] IS NO LONGER AUTOMATICALLY EMPTY
        if py_dataset['name'] not in list(appui.getDataSetNames()):
            logfile.record_warning((f"Did not export individual column(s) due to the following python-parsed dataset not already being loaded in StarDrop: {py_dataset['name']}"))  # noqa: E501

    column_names_not_in_python_dataset = \
        [column_name for column_name in new_column_names if column_name not in py_dataset['column']]

    if column_names_not_in_python_dataset:
        logfile.record_warning('Did not export individual column(s) due to the following python-parsed dataset not having the following columns: ' + ', '.join(column_names_not_in_python_dataset))  # noqa: E501

    # Nested inner function
    def get_column_idxs_func(dataset: app.Dataset) -> dict[str, int]:
        return dict(zip([dataset.columnName(col_idx) for col_idx in range(dataset.columnSz())],
                        [                   col_idx  for col_idx in range(dataset.columnSz())]))  # noqa: E201, E272

    planned_stardrop_dataset_col_idx_dict = get_column_idxs_func(planned_stardrop_dataset)
    active_dataset_col_idx_dict           = get_column_idxs_func( active_stardrop_dataset)  # noqa: E201, E221

    appui.startMacro(py_dataset['name'])

    if columns_to_remove:

        columns_to_remove = \
            [column_name for _, column_name in sorted(zip([active_dataset_col_idx_dict[column_name] for column_name in columns_to_remove],  # noqa: E501
                                                          columns_to_remove),
                                                      reverse=True)]

        logfile.record_info(f"Removing the following column{'s' if len(columns_to_remove) > 1 else ''}: " + ', '.join([f"'{column_name}'" for column_name in columns_to_remove[::-1]]))  # noqa: E501

        for column_name in columns_to_remove:
            appui.removeColumnFromDataSet(py_dataset['name'],
                                          active_dataset_col_idx_dict[column_name])

    if rows_to_remove:

        logfile.record_info(f"Removing the following row{'s' if len(rows_to_remove) > 1 else ''}: " + ', '.join([f'{row:1d}' for row in rows_to_remove]))  # noqa: E501

        for row in reversed(rows_to_remove):
            appui.removeRowFromDataSet(py_dataset['name'], row)

    new_column_names = \
        [column_name for _, column_name in sorted(zip([planned_stardrop_dataset_col_idx_dict[column_name] for column_name in new_column_names],  # noqa: E501
                                                      new_column_names))]

    logfile.record_info(f"Exporting the following column name{'s' if len(new_column_names) > 1 else ''}: " + ', '.join([f"'{name}'" for name in new_column_names]))  # noqa: E501

    for column_name in new_column_names:

        column_already_exists = column_name in active_dataset_col_idx_dict

        active_col_idx = active_dataset_col_idx_dict[column_name] if column_already_exists else py_dataset['column index'][column_name]  # noqa: E501
        planned_col_idx = planned_stardrop_dataset_col_idx_dict[column_name]

        if column_already_exists:

            column_was_removed = \
                appui.removeColumnFromDataSet(py_dataset['name'],
                                              active_dataset_col_idx_dict[column_name])

            if not column_was_removed:
                logfile.record_and_raise_exception(f'Could not remove the "{active_stardrop_dataset.columnName(active_dataset_col_idx_dict[column_name])}" column for some reason')  # noqa: E501

        column_was_added = \
            appui.addColumnToDataSet(py_dataset['name'],
                                     active_col_idx,
                                     planned_stardrop_dataset.packColumn(planned_col_idx))

        if not column_was_added:
            logfile.record_and_raise_exception(f'Could not add the "{planned_stardrop_dataset.columnName(planned_col_idx)}" column for some reason')  # noqa: E501

        if not column_already_exists:
            for tmp_column_name, tmp_column_idx in active_dataset_col_idx_dict.items():
                if tmp_column_idx >= active_col_idx:
                    active_dataset_col_idx_dict[tmp_column_name] = active_dataset_col_idx_dict[tmp_column_name] + 1

        active_dataset_col_idx_dict[column_name] = active_col_idx

    appui.stopMacro(py_dataset['name'])


def select_dataset_by_name(appui,
                           explanation: str,
                           title: str = 'Choose a Data Set') -> Optional[str]:
    """
    Prompt the user to select a dataset from the StarDrop GUI. Will return None if the function
    detects multiple datasets and the user did not select any of them.
    @param: appui: The StarDrop GUI
    @param: explanation: An explanation of what it is that the user is selecting a dataset for
    @param: title: The title of the window used to select a dataset
    @returns: The name of the dataset which the user selected
    """

    all_dataset_names = list(appui.getDataSetNames())

    if len(all_dataset_names) >= 1:
        if len(all_dataset_names) > 1:

            dataset_name = \
                PromptGUIs.select_items_from_list(title,
                                                  explanation,
                                                  all_dataset_names,
                                                  False)

            if not dataset_name:
                logfile.record_error('Multiple data sets were detected, but the user did not select any of them.')

        else:
            dataset_name = all_dataset_names[0]

    else:

        dataset_name = None
        err_msg = 'No data sets were detected within StarDrop.'
        logfile.record_error(err_msg)
        DisplayGUIs.warning_window(err_msg, 'Data set selection error')

    return dataset_name
