Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
import requests | |
from bs4 import BeautifulSoup | |
import PyPDF2 | |
import docx | |
import time | |
from smolagents.agents import HuggingFaceAgent | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
sentiment_analyzer = pipeline("sentiment-analysis") | |
topic_classifier = pipeline("zero-shot-classification") | |
def fetch_text_from_url(url): | |
response = requests.get(url) | |
soup = BeautifulSoup(response.text, "html.parser") | |
return " ".join(p.get_text() for p in soup.find_all("p")) | |
def extract_text_from_pdf(file): | |
pdf_reader = PyPDF2.PdfReader(file) | |
text = "" | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
return text | |
def extract_text_from_docx(file): | |
doc = docx.Document(file) | |
text = "" | |
for para in doc.paragraphs: | |
text += para.text + "\n" | |
return text | |
def analyze_text(input_text, input_type, tasks, progress=gr.Progress()): | |
if input_type == "URL": | |
progress(0, desc="Fetching text from URL") | |
input_text = fetch_text_from_url(input_text) | |
elif input_type == "File": | |
progress(0, desc="Extracting text from file") | |
if input_text.name.lower().endswith(".pdf"): | |
input_text = extract_text_from_pdf(input_text) | |
elif input_text.name.lower().endswith(".docx"): | |
input_text = extract_text_from_docx(input_text) | |
else: | |
input_text = input_text.read().decode("utf-8") | |
original_text = input_text[:1000] + ("..." if len(input_text) > 1000 else "") | |
summary, sentiment, topics = "", "", "" | |
if "Summarization" in tasks: | |
progress(0.3, desc="Generating summary") | |
summary = summarizer(input_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"] | |
time.sleep(1) # Add a minimal delay for demonstration purposes | |
if "Sentiment Analysis" in tasks: | |
progress(0.6, desc="Analyzing sentiment") | |
sentiment = sentiment_analyzer(input_text[:512])[0]["label"] # Truncate input for sentiment analysis | |
time.sleep(1) | |
if "Topic Detection" in tasks: | |
progress(0.9, desc="Detecting topics") | |
topic_labels = ["technology", "politics", "sports", "entertainment", "business"] | |
topics = topic_classifier(input_text[:512], topic_labels, multi_label=True)["labels"] # Truncate input for topic detection | |
time.sleep(1) | |
progress(1, desc="Analysis completed") | |
return original_text, summary, sentiment, ", ".join(topics) | |
def create_interface(): | |
input_type = gr.inputs.Dropdown(["Text", "URL", "File"], label="Input Type") | |
text_input = gr.Textbox(visible=False) | |
url_input = gr.Textbox(visible=False) | |
file_input = gr.File(visible=False) | |
tasks_checkboxes = gr.CheckboxGroup(["Summarization", "Sentiment Analysis", "Topic Detection"], label="Analysis Tasks") | |
submit_button = gr.Button("Analyze") | |
progress_bar = gr.Progress() | |
model_endpoint = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn" | |
agent = HuggingFaceAgent(model_endpoint=model_endpoint) | |
def update_input_visibility(input_type): | |
return { | |
text_input: gr.update(visible=input_type == "Text"), | |
url_input: gr.update(visible=input_type == "URL"), | |
file_input: gr.update(visible=input_type == "File"), | |
} | |
input_type.change(update_input_visibility, [input_type], [text_input, url_input, file_input]) | |
original_text_output = gr.Textbox(label="Original Text") | |
summary_output = gr.Textbox(label="Summary") | |
sentiment_output = gr.Textbox(label="Sentiment") | |
topics_output = gr.Textbox(label="Topics") | |
def process_input(input_type, text, url, file, tasks): | |
if input_type == "Text": | |
input_value = text | |
elif input_type == "URL": | |
input_value = url | |
else: | |
input_value = file | |
try: | |
original_text, summary, sentiment, topics = analyze_text(input_value, input_type, tasks, progress_bar) | |
enhanced_summary = agent.run(f"Given the following text: '{original_text}', please suggest improvements to this summary: '{summary}'") | |
enhanced_sentiment = agent.run(f"Given the following text: '{original_text}', does this sentiment seem accurate: '{sentiment}'? Please elaborate and suggest any corrections.") | |
except Exception as e: | |
original_text = f"Error: {str(e)}" | |
summary, sentiment, topics = "", "", "" | |
enhanced_summary = "" | |
enhanced_sentiment = "" | |
return original_text, summary, enhanced_summary, sentiment, enhanced_sentiment, topics | |
submit_button.click( | |
fn=process_input, | |
inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes], | |
outputs=[original_text_output, summary_output, summary_output, sentiment_output, sentiment_output, topics_output] | |
) | |
interface = gr.TabbedInterface([ | |
gr.Tab(original_text_output, label="Original Text"), | |
gr.Tab(summary_output, label="Summary"), | |
gr.Tab(sentiment_output, label="Sentiment"), | |
gr.Tab(topics_output, label="Topics") | |
]) | |
return gr.Blocks( | |
title="Text Analysis App", | |
inputs=[input_type, text_input, url_input, file_input, tasks_checkboxes, submit_button], | |
outputs=[interface, progress_bar] | |
) | |
if __name__ == "__main__": | |
create_interface().launch() |