Spaces:
Sleeping
Sleeping
File size: 4,218 Bytes
9228cad e6fd213 9228cad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import streamlit as st
import requests
from transformers import T5Tokenizer, T5ForConditionalGeneration
from collections import OrderedDict
# Load Models
@st.cache_resource
def load_models():
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")
return tokenizer, model
t5_tokenizer, t5_model = load_models()
# API Key for NewsAPI
NEWS_API_KEY = "66db8e116ae24c49beef53e8b879600a"
# Fetch news articles based on the user query
@st.cache_data
def fetch_news(user_query):
NEWS_API_URL = "https://newsapi.org/v2/everything"
params = {
'q': user_query,
'apiKey': NEWS_API_KEY,
'language': 'en',
'pageSize': 10, # Fetch 10 articles
'sortBy': 'relevance',
}
response = requests.get(NEWS_API_URL, params=params)
if response.status_code == 200:
articles = response.json().get('articles', [])
return [
{
'title': article.get('title', 'No Title'),
'description': article.get('description', 'No Description')
}
for article in articles if article.get('description')
]
return []
# Summarize articles
def summarize_articles(articles):
summaries = []
for article in articles:
input_text = f"summarize: Title: {article['title']}. Description: {article['description']}"
inputs = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
outputs = t5_model.generate(inputs, max_length=100, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)
summary = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
summaries.append(remove_redundancy(summary))
return summaries
# Remove redundancy in summaries
def remove_redundancy(summary):
sentences = summary.split('. ')
seen = OrderedDict()
return '. '.join([seen.setdefault(s, s) for s in sentences if s not in seen])
# Generate catchy content based on all 10 summaries
def generate_catchy_content(summarized_content):
combined_prompt = f"""
Write a blog post based on these insights:
{', '.join(summarized_content)}
"""
inputs = t5_tokenizer.encode(combined_prompt, return_tensors="pt", max_length=512, truncation=True)
outputs = t5_model.generate(inputs, max_length=300, length_penalty=2.0, num_beams=4, early_stopping=True)
return t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
# Main App
st.title("News Summarizer & Blog Generator")
st.subheader("Enter a topic to fetch news, summarize and generate engaging content!")
# Query Input
user_query = st.text_input("Enter a query (e.g., 'AI trends', 'Climate change impact'):")
if st.button("Fetch, Summarize and Generate"):
if user_query:
st.info(f"Fetching articles related to: {user_query}")
with st.spinner("Fetching news articles..."):
articles = fetch_news(user_query)
if articles:
st.success(f"Fetched {len(articles)} articles!")
# Display only the first 4 articles
st.subheader("Fetched Articles")
for i, article in enumerate(articles[:4], 1):
st.write(f"**Article {i}:** {article['title']}")
st.write(f"*Description:* {article['description']}")
# Summarize All Articles
st.info("Summarizing articles...")
summaries = summarize_articles(articles) # Summarize all 10 articles
st.subheader("Summarized Articles")
for i, summary in enumerate(summaries[:4], 1): # Display summaries for first 4 articles
st.write(f"**Summary {i}:** {summary}")
# Generate Blog Post
st.info("Generating blog post...")
generated_content = generate_catchy_content(summaries) # Use all 10 summaries
st.subheader("Generated Blog Post")
st.write(generated_content)
else:
st.warning("No articles found. Try a different query.")
else:
st.error("Please enter a query to proceed!")
|