Spaces:

sohail-shaikh-s07
/

News-Article-Summarization

Sleeping

App Files Files

sohail-shaikh-s07 commited on 25 days ago

Commit

d52b4e9

verified ·

1 Parent(s): 78d5ce1

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -64

app.py CHANGED Viewed

@@ -6,95 +6,72 @@ import torch
 # Download required NLTK data
 try:
-    nltk.data.find('tokenizers/punkt')
-except LookupError:
     nltk.download('punkt')
 # Initialize the summarization pipeline
-summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0 if torch.cuda.is_available() else -1)
 def extract_and_summarize(url):
     try:
         # Download and parse article
         article = Article(url)
         article.download()
         article.parse()
-        article.nlp()
-        # Get the main text
         text = article.text
-        # If text is too long, split it into chunks
         max_chunk_length = 1024
         chunks = [text[i:i + max_chunk_length] for i in range(0, len(text), max_chunk_length)]
         # Summarize each chunk
         summaries = []
         for chunk in chunks:
-            if len(chunk.strip()) > 100:  # Only summarize chunks with substantial content
-                summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
-                summaries.append(summary[0]['summary_text'])
-        # Combine summaries
         final_summary = " ".join(summaries)
-        return {
-            "Title": article.title,
-            "Summary": final_summary,
-            "Keywords": ", ".join(article.keywords) if article.keywords else "No keywords available",
-            "Article Length": len(text),
-            "Summary Length": len(final_summary)
-        }
     except Exception as e:
-        return {
-            "Error": f"An error occurred: {str(e)}",
-            "Title": "Error",
-            "Summary": "Could not process the article",
-            "Keywords": "N/A",
-            "Article Length": 0,
-            "Summary Length": 0
-        }
 # Create Gradio interface
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 📰 News Article Summarizer")
-    gr.Markdown("Enter a news article URL to get a concise summary using BART-large-CNN model")
-    with gr.Row():
-        url_input = gr.Textbox(label="Article URL", placeholder="Enter news article URL here...")
-    with gr.Row():
-        submit_btn = gr.Button("Summarize", variant="primary")
-    with gr.Row():
-        output = gr.JSON(label="Results")
-    # Example URLs
-    gr.Examples(
-        examples=[
-            ["https://www.bbc.com/news/world-europe-65454331"],
-            ["https://www.reuters.com/technology/exclusive-openai-researchers-warned-board-ai-breakthrough-ahead-sam-altman-ouster-2023-11-22/"]
-        ],
-        inputs=url_input,
-        label="Example Articles"
-    )
-    submit_btn.click(
-        fn=extract_and_summarize,
-        inputs=url_input,
-        outputs=output
-    )
-    gr.Markdown("""
-    ## How it works
-    1. Enter a URL of any news article
-    2. The app extracts the main content using newspaper3k
-    3. BART-large-CNN model summarizes the content
-    4. Get a concise summary along with article metadata
-    Note: For very long articles, the text is split into chunks and summarized separately.
-    """)
 if __name__ == "__main__":
     demo.launch()

 # Download required NLTK data
 try:
     nltk.download('punkt')
+except Exception as e:
+    print(f"Error downloading NLTK data: {e}")
 # Initialize the summarization pipeline
+try:
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
+except Exception as e:
+    print(f"Error loading model: {e}")
+    summarizer = None
 def extract_and_summarize(url):
+    if not url or not url.strip():
+        return "Please enter a valid URL"
     try:
         # Download and parse article
         article = Article(url)
         article.download()
         article.parse()
+        # Get the text content
         text = article.text
+        if not text:
+            return "Could not extract text from the article"
+        # Split text into chunks if it's too long
         max_chunk_length = 1024
         chunks = [text[i:i + max_chunk_length] for i in range(0, len(text), max_chunk_length)]
         # Summarize each chunk
         summaries = []
         for chunk in chunks:
+            if len(chunk.strip()) > 100:  # Only summarize substantial chunks
+                try:
+                    summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
+                    summaries.append(summary[0]['summary_text'])
+                except Exception as e:
+                    print(f"Error summarizing chunk: {e}")
+                    continue
+        if not summaries:
+            return "Could not generate summary. Please try a different article."
+        # Combine all summaries
         final_summary = " ".join(summaries)
+        return final_summary
     except Exception as e:
+        return f"Error processing article: {str(e)}"
 # Create Gradio interface
+demo = gr.Interface(
+    fn=extract_and_summarize,
+    inputs=gr.Textbox(label="Enter News Article URL", placeholder="https://..."),
+    outputs=gr.Textbox(label="Summary"),
+    title="📰 News Article Summarizer",
+    description="Enter a news article URL to get a concise summary. The summary will focus on the main points of the article.",
+    examples=[
+        ["https://www.bbc.com/news/world-us-canada-67841980"],
+        ["https://www.reuters.com/technology/exclusive-openai-researchers-warned-board-ai-breakthrough-ahead-sam-altman-ouster-2023-11-22/"]
+    ],
+    theme=gr.themes.Soft()
+)
 if __name__ == "__main__":
     demo.launch()