import pandas as pd
import os


def file_search(path=".", doctype="csv", like=[""], strict=False):
    """
    This function creates a list of all files of a certain type, satisfying the criteria outlined
    in like = [...] parameter. The function only searches for files in the specified folder
    of the current working directory that is set by the user.

    Parameters
    -----------
    path : string
        Path to a folder in the current working directory
        default = '.', i.e. current working directory folder
    doctype : string
        Document format to search for
        e.g. 'csv' or 'xlsx'
        default = 'csv'
    like : list
        A list of words to filter the file search on
        default = [''], i.e. no filter
    strict : bool
        Set True to search for filenames containing all words from 'like' list (
        default = False

    Returns
    -------
    list

    Examples
    -------
    >>> file_search(doctype = 'md')
    ['README.md', 'CONTRIBUTING.md']

    >>> file_search(doctype = 'md', like = ['READ'])
    ['README.md']

    """

    if not isinstance(path, str):
        raise ValueError("Please input path as a string")
    elif not isinstance(doctype, str):
        raise ValueError("Please input doctype as a string")
    elif not isinstance(like, list):
        raise ValueError("Please input like as a list")
    elif not isinstance(strict, bool):
        raise ValueError("Please input strict as a bool")
    else:
        pass

    list_of_files = []

    if strict is False:
        for file in os.listdir(path):
            if (file.split(".")[-1] == doctype) & (any(x in file for x in like)):
                list_of_files.append(file)
    else:
        for file in os.listdir(path):
            if (file.split(".")[-1] == doctype) & (all(x in file for x in like)):
                list_of_files.append(file)

    return list_of_files


def import_files(
    path=".", doctype="csv", sheet="Sheet1", subdir=False, like=[""], strict=False
):
    """
    This function imports all documents of a given format to a dictionary
    and returns this dictionary, keeping original file names.

    Parameters
    ----------
    path : string
        Path to a folder in the current working directory
        default = '.', i.e. current working directory folder
    doctype : string
        Document format to search for
        e.g. 'csv' or 'xlsx'
        default = 'csv'
    sheet : string
        Sheet name of the xlsx file
        default = 'Sheet1'
    subdir : bool
        True to allow download all files, including the subdirectories
        default = False
    like : list
        A list of words to filter the file search on
        default = [''], i.e. no filter
    strict : bool
        Set True to search for filenames containing all words from 'like' list
        default = False

    Returns
    -------
    out : dict

    Examples
    --------

    '>>> import_files()'

    File Data_AprF_2019 is successfully imported

    File Data_AugF_2019 is successfully imported

    File Data_JulF_2019 is successfully imported

    File Data_JunF_2019_v1 is successfully imported

    File Data_MayF_2019 is successfully imported

    File Data_SepP_2019 is successfully imported

    '>>> import_files(like = ['Aug','Sep'])'

    File Data_AugF_2019 is successfully imported

    File Data_SepP_2019 is successfully imported


    """

    if not isinstance(path, str):
        raise ValueError("Please input path as a string")
    elif not isinstance(doctype, str):
        raise ValueError("Please input doctype as a string")
    elif not isinstance(sheet, str):
        raise ValueError("Please input sheet as a string")
    elif not isinstance(subdir, bool):
        raise ValueError("Please input subdir as a bool")
    elif not isinstance(like, list):
        raise ValueError("Please input like as a list")
    elif not isinstance(strict, bool):
        raise ValueError("Please input strict as a bool")
    else:
        pass

    dict_files = {}
    if subdir is True:

        for r, d, f in os.walk(path):
            for file in f:
                b = any(x in file for x in like)
                if strict is True:
                    b = all(x in file for x in like)
                if (file.split(".")[-1] == doctype) & (b is True):
                    k = file.strip("." + doctype)
                    try:
                        name = os.path.join(r, file)
                        print("\nImporting " + k + "...", end="", flush=True)
                        if doctype == "csv":
                            dict_files[name.strip(".\\").strip(".csv")] = pd.read_csv(
                                name
                            )
                            print("\rFile " + k + " is successfully imported")
                        else:
                            dict_files[
                                name.strip(".\\").strip(".xlsx")
                            ] = pd.read_excel(name, sheet_name=sheet)
                            print("\rFile " + k + " is successfully imported")
                    except Exception as ex:
                        raise (ex)
    else:
        for file in os.listdir(path):
            b = any(x in file for x in like)
            if strict is True:
                b = all(x in file for x in like)

            if (file.split(".")[-1] == doctype) & (b is True):
                k = file.strip("." + doctype)
                try:
                    name = os.path.join(path, file)
                    print("\nImporting " + k + "...", end="", flush=True)
                    if doctype == "csv":
                        dict_files[k] = pd.read_csv(name)
                        print("\rFile " + k + " is successfully imported")
                    else:
                        dict_files[k] = pd.read_excel(name, sheet_name=sheet)
                        print("\rFile " + k + " is successfully imported")
                except Exception as ex:
                    raise (ex)

    return dict_files


def compare(x, y, names=["x", "y"], dups=False, same=False, comment=False):
    """
    This function returns a dictionary with:

        1. Same values between data frames x and y
        2. Values in x, not in y
        3. Values in y, not in x

        (optional):
        (4) Duplicates of x
        (5) Duplicates of y
        (6) Boolean of whether x and y are the same

    Parameters
    ----------
    x : pandas.DataFrame
        DataFrame #1
    y : pandas.DataFrame
        DataFrame #2
    names : list
        a list of user preferred file names
        e.g. ['File1', 'File2']
        default = ['x','y']
    dups : bool
        True to include duplicates check for each file
        default = False
    same : bool
        True to activate. Outputs True if DataFrames are the same
        default = False
    comment : bool
        True to activate. Prints out statistics of the compariosn results
        e.g. number of same valeus, number of duplicates, number of outliers and whether the DataFrames are the same
        default = False

    Returns
    -------
    out : dict

    Examples
    --------

    '>>> c = compare(df1, df2, names = ['df1','df2'], dups = True, same = True, comment =True)'

    There are 133891 same values
    There are 16531 outliers in df1
    There are 20937 outliers in df2
    There are 48704 duplicates in df1
    There are 0 duplicates in df2
    The DataFrames are not the same

    '>>> c = compare(df2, df2, names = ['df2','df2'], dups = True, same = True, comment =True)'

    There are 154444 same values
    There are 0 outliers in df2
    There are 0 outliers in df2
    There are 0 duplicates in df2
    There are 0 duplicates in df2
    The DataFrames are the same
    """

    if not isinstance(x, pd.DataFrame):
        raise ValueError("Please input x as a pandas.DataFrame")
    elif not isinstance(y, pd.DataFrame):
        raise ValueError("Please input y as a pandas.DataFrame")
    elif not isinstance(names, list):
        raise ValueError("Please input names as a list")
    elif not isinstance(dups, bool):
        raise ValueError("Please input dups as a bool")
    elif not isinstance(same, bool):
        raise ValueError("Please input same as a bool")
    elif not isinstance(comment, bool):
        raise ValueError("Please input comment as a bool")

    dict_temp = {}

    try:
        dict_temp["same_values"] = pd.merge(
            x.drop_duplicates(), y.drop_duplicates(), how="inner"
        )
    except Exception as ex:
        raise (ex)
    try:
        dict_temp[names[0] + "_not_" + names[1]] = pd.concat(
            [x, dict_temp["same_values"]], ignore_index=True
        ).drop_duplicates(keep=False)
        dict_temp[names[1] + "_not_" + names[0]] = pd.concat(
            [y, dict_temp["same_values"]], ignore_index=True
        ).drop_duplicates(keep=False)
    except Exception as ex:
        raise (ex)

    if dups is True:
        try:
            dict_temp[names[0] + "_dups"] = x[x.duplicated()]
            dict_temp[names[1] + "_dups"] = y[y.duplicated()]
        except Exception as ex:
            raise (ex)
    if same is True:
        try:
            if (x.shape == y.shape) & (x.shape == dict_temp["same_values"].shape):
                dict_temp["Same"] = True
            else:
                dict_temp["Same"] = False
        except Exception as ex:
            raise (ex)
    try:
        if comment is True:
            print(
                "\nThere are " + str(dict_temp["same_values"].shape[0]) + " same values"
            )
            print(
                "There are "
                + str(dict_temp[names[0] + "_not_" + names[1]].shape[0])
                + " outliers in "
                + str(names[0])
            )
            print(
                "There are "
                + str(dict_temp[names[1] + "_not_" + names[0]].shape[0])
                + " outliers in "
                + str(names[1])
            )
            if dups is True:
                print(
                    "There are "
                    + str(dict_temp[names[0] + "_dups"].shape[0])
                    + " duplicates in "
                    + names[0]
                )
                print(
                    "There are "
                    + str(dict_temp[names[1] + "_dups"].shape[0])
                    + " duplicates in "
                    + names[1]
                )
            if same is True:
                if dict_temp["Same"] is True:
                    s = "the same"
                else:
                    s = "not the same"
                print("DataFrames are " + s)
    except Exception as ex:
        raise (ex)

    return dict_temp