Spaces:
Build error
Build error
| from lexrank import STOPWORDS | |
| from lexrank import LexRank as LR | |
| import nltk | |
| from .base_single_doc_model import SingleDocSummModel | |
| class LexRankModel(SingleDocSummModel): | |
| # static variables | |
| model_name = "LexRank" | |
| is_extractive = True | |
| is_neural = False | |
| def __init__(self, data, summary_length=2, threshold=0.1): | |
| super(LexRankModel, self).__init__() | |
| nltk.download("punkt", quiet=True) | |
| corpus = [nltk.sent_tokenize(example) for example in data] | |
| self.lxr = LR(corpus, stopwords=STOPWORDS["en"]) | |
| self.summary_length = summary_length | |
| self.threshold = threshold | |
| def summarize(self, corpus, queries=None): | |
| self.assert_summ_input_type(corpus, queries) | |
| documents = [nltk.sent_tokenize(document) for document in corpus] | |
| summaries = [ | |
| " ".join( | |
| self.lxr.get_summary( | |
| document, summary_size=self.summary_length, threshold=self.threshold | |
| ) | |
| ) | |
| for document in documents | |
| ] | |
| return summaries | |
| def show_capability(cls): | |
| basic_description = cls.generate_basic_description() | |
| more_details = ( | |
| "Works by using a graph-based method to identify the most salient sentences in the document. \n" | |
| "Strengths: \n - Fast with low memory usage \n - Allows for control of summary length \n " | |
| "Weaknesses: \n - Not as accurate as neural methods. \n " | |
| "Initialization arguments: \n " | |
| "- `corpus`: Unlabelled corpus of documents. ` \n " | |
| "- `summary_length`: sentence length of summaries \n " | |
| "- `threshold`: Level of salience required for sentence to be included in summary." | |
| ) | |
| print(f"{basic_description} \n {'#'*20} \n {more_details}") | |