sohail-shaikh-s07 commited on
Commit
d52b4e9
·
verified ·
1 Parent(s): 78d5ce1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -64
app.py CHANGED
@@ -6,95 +6,72 @@ import torch
6
 
7
  # Download required NLTK data
8
  try:
9
- nltk.data.find('tokenizers/punkt')
10
- except LookupError:
11
  nltk.download('punkt')
 
 
12
 
13
  # Initialize the summarization pipeline
14
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0 if torch.cuda.is_available() else -1)
 
 
 
 
 
15
 
16
  def extract_and_summarize(url):
 
 
 
17
  try:
18
  # Download and parse article
19
  article = Article(url)
20
  article.download()
21
  article.parse()
22
- article.nlp()
23
 
24
- # Get the main text
25
  text = article.text
26
-
27
- # If text is too long, split it into chunks
 
 
28
  max_chunk_length = 1024
29
  chunks = [text[i:i + max_chunk_length] for i in range(0, len(text), max_chunk_length)]
30
 
31
  # Summarize each chunk
32
  summaries = []
33
  for chunk in chunks:
34
- if len(chunk.strip()) > 100: # Only summarize chunks with substantial content
35
- summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
36
- summaries.append(summary[0]['summary_text'])
 
 
 
 
37
 
38
- # Combine summaries
 
 
 
39
  final_summary = " ".join(summaries)
40
 
41
- return {
42
- "Title": article.title,
43
- "Summary": final_summary,
44
- "Keywords": ", ".join(article.keywords) if article.keywords else "No keywords available",
45
- "Article Length": len(text),
46
- "Summary Length": len(final_summary)
47
- }
48
 
49
  except Exception as e:
50
- return {
51
- "Error": f"An error occurred: {str(e)}",
52
- "Title": "Error",
53
- "Summary": "Could not process the article",
54
- "Keywords": "N/A",
55
- "Article Length": 0,
56
- "Summary Length": 0
57
- }
58
 
59
  # Create Gradio interface
60
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
61
- gr.Markdown("# 📰 News Article Summarizer")
62
- gr.Markdown("Enter a news article URL to get a concise summary using BART-large-CNN model")
63
-
64
- with gr.Row():
65
- url_input = gr.Textbox(label="Article URL", placeholder="Enter news article URL here...")
66
-
67
- with gr.Row():
68
- submit_btn = gr.Button("Summarize", variant="primary")
69
-
70
- with gr.Row():
71
- output = gr.JSON(label="Results")
72
-
73
- # Example URLs
74
- gr.Examples(
75
- examples=[
76
- ["https://www.bbc.com/news/world-europe-65454331"],
77
- ["https://www.reuters.com/technology/exclusive-openai-researchers-warned-board-ai-breakthrough-ahead-sam-altman-ouster-2023-11-22/"]
78
- ],
79
- inputs=url_input,
80
- label="Example Articles"
81
- )
82
-
83
- submit_btn.click(
84
- fn=extract_and_summarize,
85
- inputs=url_input,
86
- outputs=output
87
- )
88
-
89
- gr.Markdown("""
90
- ## How it works
91
- 1. Enter a URL of any news article
92
- 2. The app extracts the main content using newspaper3k
93
- 3. BART-large-CNN model summarizes the content
94
- 4. Get a concise summary along with article metadata
95
-
96
- Note: For very long articles, the text is split into chunks and summarized separately.
97
- """)
98
 
99
  if __name__ == "__main__":
100
  demo.launch()
 
6
 
7
  # Download required NLTK data
8
  try:
 
 
9
  nltk.download('punkt')
10
+ except Exception as e:
11
+ print(f"Error downloading NLTK data: {e}")
12
 
13
  # Initialize the summarization pipeline
14
+ try:
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
17
+ except Exception as e:
18
+ print(f"Error loading model: {e}")
19
+ summarizer = None
20
 
21
  def extract_and_summarize(url):
22
+ if not url or not url.strip():
23
+ return "Please enter a valid URL"
24
+
25
  try:
26
  # Download and parse article
27
  article = Article(url)
28
  article.download()
29
  article.parse()
 
30
 
31
+ # Get the text content
32
  text = article.text
33
+ if not text:
34
+ return "Could not extract text from the article"
35
+
36
+ # Split text into chunks if it's too long
37
  max_chunk_length = 1024
38
  chunks = [text[i:i + max_chunk_length] for i in range(0, len(text), max_chunk_length)]
39
 
40
  # Summarize each chunk
41
  summaries = []
42
  for chunk in chunks:
43
+ if len(chunk.strip()) > 100: # Only summarize substantial chunks
44
+ try:
45
+ summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
46
+ summaries.append(summary[0]['summary_text'])
47
+ except Exception as e:
48
+ print(f"Error summarizing chunk: {e}")
49
+ continue
50
 
51
+ if not summaries:
52
+ return "Could not generate summary. Please try a different article."
53
+
54
+ # Combine all summaries
55
  final_summary = " ".join(summaries)
56
 
57
+ return final_summary
 
 
 
 
 
 
58
 
59
  except Exception as e:
60
+ return f"Error processing article: {str(e)}"
 
 
 
 
 
 
 
61
 
62
  # Create Gradio interface
63
+ demo = gr.Interface(
64
+ fn=extract_and_summarize,
65
+ inputs=gr.Textbox(label="Enter News Article URL", placeholder="https://..."),
66
+ outputs=gr.Textbox(label="Summary"),
67
+ title="📰 News Article Summarizer",
68
+ description="Enter a news article URL to get a concise summary. The summary will focus on the main points of the article.",
69
+ examples=[
70
+ ["https://www.bbc.com/news/world-us-canada-67841980"],
71
+ ["https://www.reuters.com/technology/exclusive-openai-researchers-warned-board-ai-breakthrough-ahead-sam-altman-ouster-2023-11-22/"]
72
+ ],
73
+ theme=gr.themes.Soft()
74
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  if __name__ == "__main__":
77
  demo.launch()