import numpy as np
import pandas as pd

import glob
from collections import defaultdict


data_directory = "data/"
gold_licenses_data = "data/gold_licenses/"
index_col = "license_name"


COMM_USE = "commercial-use"
DIST = "distribution"
MODS = "modifications"
PAT_USE = "patent-use"
PVT_USE = "private-use"
DISC_SRC = "disclose-source"
INCL_CPYRT = "include-copyright"
INCL_CPYRT_SRC = "include-copyright--source"
NW_USE_DISC = "network-use-disclose"
SAME_LIC = "same-license"
SAME_LIC_FILE = "same-license--file"
SAME_LIC_LIB = "same-license--library"
DOC_CHNG = "document-changes"
LIABILITY = "liability"
TRDMRK_USE = "trademark-use"
WRNTY = "warranty"

PERMISSIONS = "permissions"
CONDITIONS = "conditions"
LIMITATIONS = "limitations"

SUMMARY = "summary"

summary_terms_dict = {
    COMM_USE: "The licensed material and derivatives may be used for commercial purposes.",
    DIST: "The licensed material may be distributed.",
    MODS: "The licensed material may be modified.",
    PAT_USE: {
        PERMISSIONS: "This license provides an express grant of patent rights from contributors.",
        LIMITATIONS: "This license explicitly states that it does NOT grant any rights in the patents of contributors."
    },
    PVT_USE: "The licensed material may be used and modified in private.",
    DISC_SRC: "Source code must be made available when the licensed material is distributed.",
    INCL_CPYRT: "A copy of the license and copyright notice must be included with the licensed material.",
    INCL_CPYRT_SRC: "A copy of the license and copyright notice must be included with the licensed material in source form, but is not required for binaries.",
    NW_USE_DISC: "Users who interact with the licensed material via network are given the right to receive a copy of the source code.",
    SAME_LIC: "Modifications must be released under the same license when distributing the licensed material. In some cases a similar or related license may be used.",
    SAME_LIC_FILE: "Modifications of existing files must be released under the same license when distributing the licensed material. In some cases a similar or related license may be used.",
    SAME_LIC_LIB: "Modifications must be released under the same license when distributing the licensed material. In some cases a similar or related license may be used, or this condition may not apply to works that use the licensed material as a library.",
    DOC_CHNG: "Changes made to the licensed material must be documented.",
    LIABILITY: "This license includes a limitation of liability.",
    TRDMRK_USE: "This license explicitly states that it does NOT grant trademark rights, even though licenses without such a statement probably do not grant any implicit trademark rights.",
    WRNTY: "This license explicitly states that it does NOT provide any warranty."
}


def read_file(file_path):
    """
    Reads data from the given file path

    Parameters
    ----------
    file_path : str
        Path of file from where data is to be read.

    Returns
    -------
    content : str
        Data read from the file at given file_path.

    """
    with open(file_path, "r", encoding="utf-8") as f:
        content = f.read()
    return content


def augment_summary(license_data):
    """
    Augments summary with description of labels from choosealicense as stored
    in summary_terms_dict object. 

    Parameters
    ----------
    license_data : pandas.DataFrame
        The license data with original license texts and summaries.

    Returns
    -------
    license_data : pandas.DataFrame
        The license data with original license texts and augmented summaries.

    """
    for index, row in license_data.iterrows():
        row[SUMMARY] = row[SUMMARY].strip()

        if row[COMM_USE] == PERMISSIONS:
            row[SUMMARY] += f" {summary_terms_dict[COMM_USE]}"
        if row[DIST] == PERMISSIONS:
            row[SUMMARY] += f" {summary_terms_dict[DIST]}"
        if row[MODS] == PERMISSIONS:
            row[SUMMARY] += f" {summary_terms_dict[MODS]}"
        if row[PAT_USE] == PERMISSIONS:
            row[SUMMARY] += f" {summary_terms_dict[PAT_USE][PERMISSIONS]}"
        elif row[PAT_USE] == LIMITATIONS:
            row[SUMMARY] += f" {summary_terms_dict[PAT_USE][LIMITATIONS]}"
        if row[PVT_USE] == PERMISSIONS:
            row[SUMMARY] += f" {summary_terms_dict[PVT_USE]}"
        if row[DISC_SRC] == CONDITIONS:
            row[SUMMARY] += f" {summary_terms_dict[DISC_SRC]}"
        if row[INCL_CPYRT] == CONDITIONS:
            row[SUMMARY] += f" {summary_terms_dict[INCL_CPYRT]}"
        if row[INCL_CPYRT_SRC] == CONDITIONS:
            row[SUMMARY] += f" {summary_terms_dict[INCL_CPYRT_SRC]}"
        if row[NW_USE_DISC] == PERMISSIONS:
            row[SUMMARY] += f" {summary_terms_dict[NW_USE_DISC]}"
        if row[SAME_LIC] == CONDITIONS:
            row[SUMMARY] += f" {summary_terms_dict[SAME_LIC]}"
        if row[SAME_LIC_FILE] == CONDITIONS:
            row[SUMMARY] += f" {summary_terms_dict[SAME_LIC_FILE]}"
        if row[SAME_LIC_LIB] == CONDITIONS:
            row[SUMMARY] += f" {summary_terms_dict[SAME_LIC_LIB]}"
        if row[DOC_CHNG] == CONDITIONS:
            row[SUMMARY] += f" {summary_terms_dict[DOC_CHNG]}"
        if row[LIABILITY] == LIMITATIONS:
            row[SUMMARY] += f" {summary_terms_dict[LIABILITY]}"
        if row[TRDMRK_USE] == PERMISSIONS:
            row[SUMMARY] += f" {summary_terms_dict[TRDMRK_USE]}"
        if row[WRNTY] == PERMISSIONS:
            row[SUMMARY] += f" {summary_terms_dict[WRNTY]}"
    
    return license_data


def read_license_data(labels_file="choosealicense_appendix_labels.csv", drop_summary=False):
    """
    Reads data from Text and Summary File and stores it as a dictionary of
    dictionaries.

    Parameters
    ----------
    labels_file : str, optional
        The filename containing labels for choosealicense licenses.
        The default is "choosealicense_appendix_labels.csv".
    drop_summary : bool, optional
        A flag to decide whether or not to drop summary before returning the 
        data. The default is False.

    Returns
    -------
    merged_data : pandas.DataFrame
        Merged data with choosealicense licenses, summaries and labels.

    """
    files = glob.glob(gold_licenses_data + "*")
    if not files:
        files = glob.glob(f"../{gold_licenses_data}" + "*")
    if not files:
        print("Gold licenses not found, please check the path again!")
        return None

    data_dict = defaultdict(dict)

    for file_path in files:
        if "\\" in file_path:
            split_by = "\\"
        else:
            split_by = "/"

        if file_path.endswith(".summary"):
            file_name = file_path.split(split_by)[-1][:-8]
            data_dict[file_name]["summary"] = read_file(file_path)
        elif file_path.endswith(".txt"):
            file_name = file_path.split(split_by)[-1][:-4]
            data_dict[file_name]["text"] = read_file(file_path)
    
    summary_df = pd.DataFrame(data_dict).T

    try:
        labels_df = pd.read_csv(data_directory + labels_file, index_col=index_col)
    except:
        try:
            labels_df = pd.read_csv(f"../{data_directory}" + labels_file, index_col=index_col)
        except:
            print("Labels file not found, please check the path again!")
            return None

    # TODO: Check if this is breaking anything
    merged_data = labels_df.join(summary_df)#.drop(columns=["spdx_id"])

    if drop_summary:
        merged_data = merged_data.drop(columns=["summary"])

    return merged_data


def read_license_summary_data(aug_summary=False):
    """
    Reads the licenses and summaries from license files, augments summaries
    if required and return all data as a single object.

    Parameters
    ----------
    aug_summary : bool, optional
        A flag to decide whether or not to augment summaries with label 
        descriptions before returning the data. The default is False.

    Returns
    -------
    license_data : pandas.DataFrame
        License data with choosealicense licenses and summaries.

    """
    license_data = read_license_data()
    if aug_summary:
        license_data = augment_summary(license_data)
    license_data = license_data[["text", "summary"]]

    return license_data


def read_license_labels_data():
    """
    Reads licenses along with their labels.

    Returns
    -------
    pandas.DataFrame
        A dataframe with Licenses and their labels.

    """
    return read_license_data().drop(columns=["summary"])


def read_license_text_data(license_name):
    """
    Reads License data for a given license_name

    Parameters
    ----------
    license_name : str
        Name of a license from choosealicense list.

    Returns
    -------
    str
        License text for the given license_name.

    """
    license_diff = read_license_data(drop_summary=True)
    return license_diff[license_diff["spdx_id"] == license_name]["text"].values[0]


def fix_labels(license_data):
    """
    Update labels in the given dataframe license_data with their integer ids.

    Parameters
    ----------
    license_data : pandas.DataFrame
        Dataframe consisting of license data with labels.

    Returns
    -------
    license_data : pandas.DataFrame
        Dataframe consisting of license data with updated labels.

    """
    permissions_map = {
        "permissions": 0
    }

    conditions_map = {
        np.nan: 0,
        "conditions": 1
    }

    limitations_map = {
        np.nan: 0,
        "limitations": 1
    }

    permissions_limitations_map = {
        np.nan: 0,
        "permissions": 1,
        "limitations": 2
    }

    # permissive_not_permissive_map = {
    #     np.nan: 0,
    #     "permissive": 1,
    #     "not_permissive": 2
    # }

    permissions_columns = [
        "commercial-use",
        "distribution",
        "modifications",
        "private-use"
    ]
    conditions_columns = [
        "disclose-source",
        "document-changes",
        "include-copyright",
        "include-copyright--source",
        "network-use-disclose",
        "same-license",
        "same-license--file",
        "same-license--library"
    ]

    limitations_columns = [
        "liability",
        "trademark-use",
        "warranty"
    ]

    permissions_limitations_columns = [
        "patent-use"
    ]

    # permissive_not_permissive_columns = [
    #     "GTLC_Permissive"
    # ]

    license_data[permissions_columns] = license_data[permissions_columns].replace(permissions_map)
    license_data[conditions_columns] = license_data[conditions_columns].replace(conditions_map)
    license_data[limitations_columns] = license_data[limitations_columns].replace(limitations_map)
    license_data[permissions_limitations_columns] = license_data[permissions_limitations_columns].replace(permissions_limitations_map)
    # license_data[permissive_not_permissive_columns] = license_data[permissive_not_permissive_columns].replace(permissive_not_permissive_map)

    return license_data