Spaces:
Running
Running
import streamlit as st | |
import requests | |
from transformers import T5Tokenizer, T5ForConditionalGeneration | |
from collections import OrderedDict | |
# Load Models | |
def load_models(): | |
tokenizer = T5Tokenizer.from_pretrained("t5-small") | |
model = T5ForConditionalGeneration.from_pretrained("t5-small") | |
return tokenizer, model | |
t5_tokenizer, t5_model = load_models() | |
# API Key for NewsAPI | |
NEWS_API_KEY = "66db8e116ae24c49beef53e8b879600a" | |
# Fetch news articles based on the user query | |
def fetch_news(user_query): | |
NEWS_API_URL = "https://newsapi.org/v2/everything" | |
params = { | |
'q': user_query, | |
'apiKey': NEWS_API_KEY, | |
'language': 'en', | |
'pageSize': 10, # Fetch 10 articles | |
'sortBy': 'relevance', | |
} | |
response = requests.get(NEWS_API_URL, params=params) | |
if response.status_code == 200: | |
articles = response.json().get('articles', []) | |
return [ | |
{ | |
'title': article.get('title', 'No Title'), | |
'description': article.get('description', 'No Description') | |
} | |
for article in articles if article.get('description') | |
] | |
return [] | |
# Summarize articles | |
def summarize_articles(articles): | |
summaries = [] | |
for article in articles: | |
input_text = f"summarize: Title: {article['title']}. Description: {article['description']}" | |
inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True) | |
outputs = t5_model.generate(inputs, max_length=100, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True) | |
summary = t5_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
summaries.append(remove_redundancy(summary)) | |
return summaries | |
# Remove redundancy in summaries | |
def remove_redundancy(summary): | |
sentences = summary.split('. ') | |
seen = OrderedDict() | |
return '. '.join([seen.setdefault(s, s) for s in sentences if s not in seen]) | |
# Generate catchy content based on all 10 summaries | |
def generate_catchy_content(summarized_content): | |
combined_prompt = f""" | |
Write a blog post based on these insights: | |
{', '.join(summarized_content)} | |
""" | |
inputs = t5_tokenizer.encode(combined_prompt, return_tensors="pt", max_length=512, truncation=True) | |
outputs = t5_model.generate(inputs, max_length=300, length_penalty=2.0, num_beams=4, early_stopping=True) | |
return t5_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Main App | |
st.title("News Summarizer & Blog Generator") | |
st.subheader("Enter a topic to fetch news, summarize and generate engaging content!") | |
# Query Input | |
user_query = st.text_input("Enter a query (e.g., 'AI trends', 'Climate change impact'):") | |
if st.button("Fetch, Summarize and Generate"): | |
if user_query: | |
st.info(f"Fetching articles related to: {user_query}") | |
with st.spinner("Fetching news articles..."): | |
articles = fetch_news(user_query) | |
if articles: | |
st.success(f"Fetched {len(articles)} articles!") | |
# Display only the first 4 articles | |
st.subheader("Fetched Articles") | |
for i, article in enumerate(articles[:4], 1): | |
st.write(f"**Article {i}:** {article['title']}") | |
st.write(f"*Description:* {article['description']}") | |
# Summarize All Articles | |
st.info("Summarizing articles...") | |
summaries = summarize_articles(articles) # Summarize all 10 articles | |
st.subheader("Summarized Articles") | |
for i, summary in enumerate(summaries[:4], 1): # Display summaries for first 4 articles | |
st.write(f"**Summary {i}:** {summary}") | |
# Generate Blog Post | |
st.info("Generating blog post...") | |
generated_content = generate_catchy_content(summaries) # Use all 10 summaries | |
st.subheader("Generated Blog Post") | |
st.write(generated_content) | |
else: | |
st.warning("No articles found. Try a different query.") | |
else: | |
st.error("Please enter a query to proceed!") | |