Spaces:
Sleeping
Sleeping
import gradio as gr | |
from bs4 import BeautifulSoup | |
import requests | |
from transformers import pipeline | |
# Load summarization pipeline | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
# Function to extract text from a webpage | |
def fetch_url_text(url): | |
try: | |
headers_req = {'User-Agent': 'Mozilla/5.0'} | |
response = requests.get(url, headers=headers_req, timeout=10) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, "html.parser") | |
text = soup.get_text(separator=" ", strip=True) | |
text = " ".join(text.split()) | |
if len(text) < 100: | |
return None, "β Extracted text from the webpage is too short to summarize." | |
return text, None | |
except Exception as e: | |
return None, f"β URL error: {e}" | |
# Summarization function | |
def summarize_text(text_input, file_upload, url_input): | |
text = "" | |
if file_upload: | |
try: | |
with open(file_upload.name, "r", encoding="utf-8") as f: | |
text = f.read() | |
except Exception as e: | |
return f"β File read error: {e}" | |
elif url_input: | |
text, error_msg = fetch_url_text(url_input) | |
if error_msg: | |
return error_msg | |
elif text_input: | |
text = text_input | |
else: | |
return "β οΈ Please provide some input." | |
try: | |
summary = summarizer(text[:1024], max_length=150, min_length=30, do_sample=False) | |
return summary[0]["summary_text"] | |
except Exception as e: | |
return f"β Summarization error: {e}" | |
# Gradio Interface | |
demo = gr.Interface( | |
fn=summarize_text, | |
inputs=[ | |
gr.Textbox(label="βοΈ Enter Text", lines=4, placeholder="Paste or type text here..."), | |
gr.File(label="π Upload a .txt File", file_types=[".txt"]), | |
gr.Textbox(label="π Enter Webpage URL", placeholder="https://example.com/article") | |
], | |
outputs="text", | |
title="π§ Multi-Input Text Summarizer", | |
description="Summarize content from text, uploaded files, or web URLs using the BART model." | |
) | |
demo.launch() |