EmreYY20 commited on
Commit
47639e3
1 Parent(s): 353c1f3
Files changed (1) hide show
  1. extractive_model.py +8 -16
extractive_model.py CHANGED
@@ -15,32 +15,24 @@ from sumy.summarizers.text_rank import TextRankSummarizer
15
  import nltk
16
  nltk.download('punkt')
17
 
18
- def summarize_pdf_with_textrank(pdf_path, sentences_count=5):
19
  """
20
- Summarizes the content of a PDF file using TextRank algorithm.
21
 
22
  Args:
23
- pdf_path (str): Path to the PDF file.
24
  sentences_count (int): Number of sentences for the summary.
25
 
26
  Returns:
27
  str: Summarized text.
28
  """
29
 
30
- # Extract text from the PDF
31
- """
32
- pdf_text = ""
33
- with open(pdf_path, "rb") as pdf_file:
34
- pdf_reader = PyPDF2.PdfReader(pdf_file)
35
- for page in pdf_reader.pages:
36
- pdf_text += page.extract_text() or ""
37
- """
38
- # Check if text extraction was successful
39
- if not pdf_text.strip():
40
- return "Text extraction from PDF failed or PDF is empty."
41
 
42
- # Create a parser for the extracted text
43
- parser = PlaintextParser.from_string(pdf_text, Tokenizer("english"))
44
 
45
  # Use TextRank for summarization
46
  text_rank_summarizer = TextRankSummarizer()
 
15
  import nltk
16
  nltk.download('punkt')
17
 
18
+ def summarize_text_with_textrank(text, sentences_count=5):
19
  """
20
+ Summarizes the provided text using TextRank algorithm.
21
 
22
  Args:
23
+ text (str): Text to summarize.
24
  sentences_count (int): Number of sentences for the summary.
25
 
26
  Returns:
27
  str: Summarized text.
28
  """
29
 
30
+ # Check if the text is not empty
31
+ if not text.strip():
32
+ return "Provided text is empty."
 
 
 
 
 
 
 
 
33
 
34
+ # Create a parser for the provided text
35
+ parser = PlaintextParser.from_string(text, Tokenizer("english"))
36
 
37
  # Use TextRank for summarization
38
  text_rank_summarizer = TextRankSummarizer()