Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
""" | |
Created on Tue Jun 14 00:29:28 2022 | |
@author: UTKARSH | |
""" | |
import spacy | |
nlp = spacy.load("en_core_web_sm") | |
class vocab: | |
# We will add extra weight to negation words | |
NEGATION_WEIGHT = 0.2 | |
# Strong modal verbs are given very high weight | |
strong_modal_verbs = { | |
"must", | |
"shall", | |
} | |
other_modal_verbs = { | |
"may", | |
"should", | |
"would" | |
} | |
other_relevant_stopwords = { | |
"without", | |
"however" | |
} | |
# Stopwords we would not be considering while normalizing | |
# We do not need stopwords if we don't normalize, but just in case | |
license_stopwords = { | |
",", | |
"(", | |
")", | |
".", | |
"\"", | |
"software", | |
"license", | |
"work", | |
"program", | |
"source", | |
"code", | |
"rights", | |
"notice", | |
"provided", | |
"version", | |
"library", | |
"covered", | |
"public", | |
"disclaimer", | |
"documentation" | |
}.union( | |
nlp.Defaults.stop_words | |
) - strong_modal_verbs - other_modal_verbs - other_relevant_stopwords | |
negation_words = { | |
"no", | |
"not", | |
"non" | |
} | |
# These words will have a high weightage while ranking sentences | |
high_imp_verbs = { | |
"permit", "copy", "modify", "change", "sell", "reproduce", | |
"transfer", "rent", "lease", "assign", "sublet", "distribute", | |
"redistribute", "allow", "require", "use" | |
} | |
low_imp_verbs = { | |
"merge", "publish", "include", "grant", "run", "affirm", "propagate", | |
"acknowledge", "limit", "retain", "associate" | |
} | |
high_imp_neg_verbs = {f"not-{verb}" for verb in high_imp_verbs} | |
low_imp_neg_verbs = {f"not-{verb}" for verb in low_imp_verbs} | |
properties_dict = { | |
"0.1": { | |
"investigative", | |
"contract", | |
"contribution" | |
}, | |
"0.2": { | |
"everyone", | |
"hereby", | |
"claim" | |
}, | |
"0.3": { | |
"termination", "terminate", | |
"meet", | |
"tort", | |
"files", | |
"author", | |
"available", | |
"apply", | |
"material", | |
"user" | |
}, | |
"0.4": { | |
"liable", | |
"contributors", | |
}, | |
"0.5": low_imp_verbs.union({ | |
"restriction", | |
"however", | |
"without" | |
}), | |
"0.6": { | |
"distribution", "redistribution", | |
"attribution", | |
"permission", "modification", | |
"copyright", | |
"limitation", | |
"free", "charge", | |
"warranty", | |
"term", "terms", "condition", | |
"right", | |
"sublicense", | |
"commercial", "non-commercial", | |
"exception", | |
"liability", | |
"irrevocable" | |
}, | |
"0.7": low_imp_neg_verbs.union({ | |
"no-charge" | |
}), | |
"0.8": high_imp_verbs.union({ | |
"patent" | |
}), | |
"0.9": { | |
"" | |
}, | |
"1.0": high_imp_neg_verbs.union({ | |
"" | |
}), | |
"2.0": other_modal_verbs, | |
"3.0": strong_modal_verbs | |
} | |
properties_scores = { | |
"0.1": 0.1, | |
"0.2": 0.2, | |
"0.3": 0.3, | |
"0.4": 0.4, | |
"0.5": 0.5, | |
"0.6": 0.6, | |
"0.7": 0.7, | |
"0.8": 0.8, | |
"0.9": 0.9, | |
"1.0": 1.0, | |
"2.0": 2.0, | |
"3.0": 3.0 | |
} | |
class color: | |
GREEN = "#03AC13" | |
RED = "#D22B2B" | |
BLACK = "#000000" | |
GRAY = "#AAAAAA" | |
class captions: | |
APP_TITLE = "Clearly Defined: License Summarizer" | |
APP_DISCLAIMER = "DISCLAIMER: This app is the result of a Capstone \ | |
Project and further development is required before productive use." | |
LICENSE_TEXT = "License text" | |
ENTER_LICENSE_CONTENT = "Enter contents of the license" | |
LOADING = "Loading..." | |
SUMMARY = "Summary" | |
SIMILARITY_INDEX = "Similarity Index" | |
SIMILARITY_INDEX_DISCLAIMER = "The following list of licenses are from \ | |
choosealicense.com and consist of 41 known open source licenses." | |
PROPERTIES = "Properties" | |
PROPERTIES_DISCLAIMER = "The properties defined below are from \ | |
choosealicense.com. For more information, visit \ | |
choosealicense.com/appendix." | |
DEFINITIONS = "Definitions" | |
EXCEPTIONS = "Exceptions" | |
SUMMARY_BY_T5 = "Summary will be generated by a T5 Transformer Model" | |
WARNING_ABSTRACTIVE = "WARNING: The results generated by the abstractive \ | |
summarizer might not be as expected" | |
SUMMARY_BY_TEXTRANK = "Summary will be generated by a custom TextRank \ | |
Algorithm" | |
SUMMARY_BY_BOTH = "The License text will be first passed through the \ | |
custom TextRank algorithm and then passed on to the T5 Transformer \ | |
Model to generate a summary." | |
WARNING_BOTH = "WARNING: The results generated by the abstractive \ | |
summarizer might not be as expected" | |
SUMMARY_LENGTH_PERCENTAGE = "Summary length percentage" | |
SELECT_SUMMARIZATION_TYPE = "Select summarization type" | |
SUMMARY_VIEW = "Summary View" | |
DISPLAY_SUMMARY_ONLY_DESC = "Shows the important sentences from the \ | |
license" | |
DISPLAY_HIGHLIGHTED_SUMMARY_DESC = "Highlights the important sentences in \ | |
the license" | |
CLEANED_LICENSE_ONLY = "Shows the cleaned license text only" | |
CLEANED_LICENSE_WITH_DIFF = "Shows the cleaned license text with \ | |
highlighted diffs" | |
HIDE_CLEANED_LICENSE = "Hides the cleaned license text" | |
NO_SIMILAR_LICENSE_FOUND = "No similar license found" | |
CLEANED_LICENSE_VIEW = "Cleaned License View" | |
CLEANED_LICENSE_TEXT = "Cleaned License Text" | |
CLEANED_LICENSE_DIFF = "Cleaned License Diff" | |
class options: | |
ABSTRACTIVE = "Abstractive" | |
EXTRACTIVE = "Extractive" | |
BOTH = "Both" | |
DISPLAY_SUMMARY_ONLY = "Display Summary Only" | |
DISPLAY_HIGHLIGHTED_SUMMARY = "Display Highlighted Summary" | |
HIDE_CLEANED_LICENSE = "Hide Cleaned License" | |
DISPLAY_CLEANED_LICENSE = "Display Cleaned License" | |
DISPLAY_CLEANED_DIFF = "Display Cleaned License + Diff" | |
SHOW_LICENSE_PROPERTIES = "Show license properties" | |
SHOW_LICENSE_DEFINITIONS = "Show license definitions" | |
SHOW_LICENSE_EXCEPTIONS = "Show license exceptions" | |
class help_messages: | |
SUMMARIZATION_TYPE = f"""Select the type of summarization to perform. \ | |
"{options.EXTRACTIVE}" would select the most important sentences to \ | |
generate a summary. "{options.ABSTRACTIVE}" would try and paraphrase \ | |
the meaning of the license and form a summary. "{options.BOTH}" would \ | |
first pass the license through "extractive" and then "abstractive" \ | |
to generate a summary.""" | |
SLIDER = "Slide to vary the size of the summary. 1 will result in the \ | |
smallest summary possible, whereas 100 will display the complete \ | |
(cleaned) license text." | |
SUMMARY_VIEW = f""""Select the type of summary view desired. \ | |
{options.DISPLAY_SUMMARY_ONLY}" will show only the \ | |
summary text. "{options.DISPLAY_HIGHLIGHTED_SUMMARY}" will show the \ | |
complete (cleaned) license text with the summary highlighted.""" | |
CLEANED_LICENSE_VIEW = f""""Select the type of cleaned license view \ | |
desired. {options.HIDE_CLEANED_LICENSE}" will not show \ | |
the cleaned license text. "{options.DISPLAY_CLEANED_LICENSE}" will \ | |
show the cleaned license text. "{options.DISPLAY_CLEANED_DIFF}" will \ | |
show the cleaned license text and the diff between the input text and \ | |
the closest matching SPDX license (from the similarity index table).""" | |
PROPERTIES_CHECKBOX = "Select this checkbox to view the properties of the \ | |
license that shares the highest similarity with the input license \ | |
text. This checkbox would be disabled if no known license crosses the \ | |
similarity threshold." | |
DEFINITIONS_CHECKBOX = "Select this checkbox to view definitions within \ | |
the license. This checkbox would be disabled if no definitions are \ | |
found within the license." | |
EXCEPTIONS_CHECKBOX = "Select this checkbox to view exceptions within \ | |
the license. This checkbox would be disabled if no exceptions are \ | |
found within the license." | |