Nihal D'Souza
Final app release
e41b03f
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 14 00:29:28 2022
@author: UTKARSH
"""
import spacy
nlp = spacy.load("en_core_web_sm")
class vocab:
# We will add extra weight to negation words
NEGATION_WEIGHT = 0.2
# Strong modal verbs are given very high weight
strong_modal_verbs = {
"must",
"shall",
}
other_modal_verbs = {
"may",
"should",
"would"
}
other_relevant_stopwords = {
"without",
"however"
}
# Stopwords we would not be considering while normalizing
# We do not need stopwords if we don't normalize, but just in case
license_stopwords = {
",",
"(",
")",
".",
"\"",
"software",
"license",
"work",
"program",
"source",
"code",
"rights",
"notice",
"provided",
"version",
"library",
"covered",
"public",
"disclaimer",
"documentation"
}.union(
nlp.Defaults.stop_words
) - strong_modal_verbs - other_modal_verbs - other_relevant_stopwords
negation_words = {
"no",
"not",
"non"
}
# These words will have a high weightage while ranking sentences
high_imp_verbs = {
"permit", "copy", "modify", "change", "sell", "reproduce",
"transfer", "rent", "lease", "assign", "sublet", "distribute",
"redistribute", "allow", "require", "use"
}
low_imp_verbs = {
"merge", "publish", "include", "grant", "run", "affirm", "propagate",
"acknowledge", "limit", "retain", "associate"
}
high_imp_neg_verbs = {f"not-{verb}" for verb in high_imp_verbs}
low_imp_neg_verbs = {f"not-{verb}" for verb in low_imp_verbs}
properties_dict = {
"0.1": {
"investigative",
"contract",
"contribution"
},
"0.2": {
"everyone",
"hereby",
"claim"
},
"0.3": {
"termination", "terminate",
"meet",
"tort",
"files",
"author",
"available",
"apply",
"material",
"user"
},
"0.4": {
"liable",
"contributors",
},
"0.5": low_imp_verbs.union({
"restriction",
"however",
"without"
}),
"0.6": {
"distribution", "redistribution",
"attribution",
"permission", "modification",
"copyright",
"limitation",
"free", "charge",
"warranty",
"term", "terms", "condition",
"right",
"sublicense",
"commercial", "non-commercial",
"exception",
"liability",
"irrevocable"
},
"0.7": low_imp_neg_verbs.union({
"no-charge"
}),
"0.8": high_imp_verbs.union({
"patent"
}),
"0.9": {
""
},
"1.0": high_imp_neg_verbs.union({
""
}),
"2.0": other_modal_verbs,
"3.0": strong_modal_verbs
}
properties_scores = {
"0.1": 0.1,
"0.2": 0.2,
"0.3": 0.3,
"0.4": 0.4,
"0.5": 0.5,
"0.6": 0.6,
"0.7": 0.7,
"0.8": 0.8,
"0.9": 0.9,
"1.0": 1.0,
"2.0": 2.0,
"3.0": 3.0
}
class color:
GREEN = "#03AC13"
RED = "#D22B2B"
BLACK = "#000000"
GRAY = "#AAAAAA"
class captions:
APP_TITLE = "Clearly Defined: License Summarizer"
APP_DISCLAIMER = "DISCLAIMER: This app is the result of a Capstone \
Project and further development is required before productive use."
LICENSE_TEXT = "License text"
ENTER_LICENSE_CONTENT = "Enter contents of the license"
LOADING = "Loading..."
SUMMARY = "Summary"
SIMILARITY_INDEX = "Similarity Index"
SIMILARITY_INDEX_DISCLAIMER = "The following list of licenses are from \
choosealicense.com and consist of 41 known open source licenses."
PROPERTIES = "Properties"
PROPERTIES_DISCLAIMER = "The properties defined below are from \
choosealicense.com. For more information, visit \
choosealicense.com/appendix."
DEFINITIONS = "Definitions"
EXCEPTIONS = "Exceptions"
SUMMARY_BY_T5 = "Summary will be generated by a T5 Transformer Model"
WARNING_ABSTRACTIVE = "WARNING: The results generated by the abstractive \
summarizer might not be as expected"
SUMMARY_BY_TEXTRANK = "Summary will be generated by a custom TextRank \
Algorithm"
SUMMARY_BY_BOTH = "The License text will be first passed through the \
custom TextRank algorithm and then passed on to the T5 Transformer \
Model to generate a summary."
WARNING_BOTH = "WARNING: The results generated by the abstractive \
summarizer might not be as expected"
SUMMARY_LENGTH_PERCENTAGE = "Summary length percentage"
SELECT_SUMMARIZATION_TYPE = "Select summarization type"
SUMMARY_VIEW = "Summary View"
DISPLAY_SUMMARY_ONLY_DESC = "Shows the important sentences from the \
license"
DISPLAY_HIGHLIGHTED_SUMMARY_DESC = "Highlights the important sentences in \
the license"
CLEANED_LICENSE_ONLY = "Shows the cleaned license text only"
CLEANED_LICENSE_WITH_DIFF = "Shows the cleaned license text with \
highlighted diffs"
HIDE_CLEANED_LICENSE = "Hides the cleaned license text"
NO_SIMILAR_LICENSE_FOUND = "No similar license found"
CLEANED_LICENSE_VIEW = "Cleaned License View"
CLEANED_LICENSE_TEXT = "Cleaned License Text"
CLEANED_LICENSE_DIFF = "Cleaned License Diff"
class options:
ABSTRACTIVE = "Abstractive"
EXTRACTIVE = "Extractive"
BOTH = "Both"
DISPLAY_SUMMARY_ONLY = "Display Summary Only"
DISPLAY_HIGHLIGHTED_SUMMARY = "Display Highlighted Summary"
HIDE_CLEANED_LICENSE = "Hide Cleaned License"
DISPLAY_CLEANED_LICENSE = "Display Cleaned License"
DISPLAY_CLEANED_DIFF = "Display Cleaned License + Diff"
SHOW_LICENSE_PROPERTIES = "Show license properties"
SHOW_LICENSE_DEFINITIONS = "Show license definitions"
SHOW_LICENSE_EXCEPTIONS = "Show license exceptions"
class help_messages:
SUMMARIZATION_TYPE = f"""Select the type of summarization to perform. \
"{options.EXTRACTIVE}" would select the most important sentences to \
generate a summary. "{options.ABSTRACTIVE}" would try and paraphrase \
the meaning of the license and form a summary. "{options.BOTH}" would \
first pass the license through "extractive" and then "abstractive" \
to generate a summary."""
SLIDER = "Slide to vary the size of the summary. 1 will result in the \
smallest summary possible, whereas 100 will display the complete \
(cleaned) license text."
SUMMARY_VIEW = f""""Select the type of summary view desired. \
{options.DISPLAY_SUMMARY_ONLY}" will show only the \
summary text. "{options.DISPLAY_HIGHLIGHTED_SUMMARY}" will show the \
complete (cleaned) license text with the summary highlighted."""
CLEANED_LICENSE_VIEW = f""""Select the type of cleaned license view \
desired. {options.HIDE_CLEANED_LICENSE}" will not show \
the cleaned license text. "{options.DISPLAY_CLEANED_LICENSE}" will \
show the cleaned license text. "{options.DISPLAY_CLEANED_DIFF}" will \
show the cleaned license text and the diff between the input text and \
the closest matching SPDX license (from the similarity index table)."""
PROPERTIES_CHECKBOX = "Select this checkbox to view the properties of the \
license that shares the highest similarity with the input license \
text. This checkbox would be disabled if no known license crosses the \
similarity threshold."
DEFINITIONS_CHECKBOX = "Select this checkbox to view definitions within \
the license. This checkbox would be disabled if no definitions are \
found within the license."
EXCEPTIONS_CHECKBOX = "Select this checkbox to view exceptions within \
the license. This checkbox would be disabled if no exceptions are \
found within the license."