Spaces:

loayshabet
/

news-sumarry

Sleeping

App Files Files Community

news-sumarry / app.py

loayshabet

Update app.py

2267b2b verified 3 months ago

raw

history blame

7.83 kB

	import gradio as gr
	from transformers import pipeline
	import feedparser
	from datetime import datetime, timedelta
	import json
	import os
	import logging
	import pytz
	from bs4 import BeautifulSoup
	import hashlib
	import threading

	# Logging setup
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	# Global settings
	SUMMARIZER_MODEL = "facebook/bart-large-cnn" # You can replace this with other summarization models
	CACHE_SIZE = 500 # Maximum number of cached summaries
	RSS_FETCH_INTERVAL = timedelta(hours=8) # Fetch recent news within the last 8 hours
	TIMEOUT_LIMIT = 30 # Maximum time in seconds to process summaries

	# News sources
	NEWS_SOURCES = {
	"Technology": {
	"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml",
	"reutersagency": "https://www.reutersagency.com/feed/?best-topics=tech&post_type=best"
	},
	"Business": {
	"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml",
	"reutersagency": "https://www.reutersagency.com/feed/?best-topics=business-finance&post_type=best"
	},
	"Science": {
	"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Science.xml"
	},
	"World News": {
	"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/World.xml",
	"BBC": "http://feeds.bbci.co.uk/news/world/rss.xml",
	"CNN": "http://rss.cnn.com/rss/edition_world.rss",
	"reutersagency": "https://www.reutersagency.com/feed/?taxonomy=best-regions&post_type=best"
	},
	"Sports": {
	"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Sports.xml",
	"reutersagency": "https://www.reutersagency.com/feed/?best-topics=sports&post_type=best"
	},
	"Health": {
	"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Health.xml",
	"politico": "http://rss.politico.com/healthcare.xml",
	"reutersagency": "https://www.reutersagency.com/feed/?best-topics=health&post_type=best"
	},
	}

	# Initialize cache
	class NewsCache:
	def __init__(self, size):
	self.cache = {}
	self.size = size
	self.lock = threading.Lock()

	def get(self, key):
	with self.lock:
	return self.cache.get(key)

	def set(self, key, value):
	with self.lock:
	if len(self.cache) >= self.size:
	# Remove oldest cached item
	oldest_key = next(iter(self.cache))
	del self.cache[oldest_key]
	self.cache[key] = value

	cache = NewsCache(CACHE_SIZE)

	# Initialize summarizer
	summarizer = pipeline("summarization", model=SUMMARIZER_MODEL, device=-1)

	# Utility functions
	def fetch_rss_news(categories):
	"""Fetch news articles from RSS feeds based on selected categories."""
	articles = []
	cutoff_time = datetime.now(pytz.UTC) - RSS_FETCH_INTERVAL

	for category in categories:
	for source, url in NEWS_SOURCES.get(category, {}).items():
	try:
	feed = feedparser.parse(url)
	for entry in feed.entries:
	# Parse publication time
	published = datetime(*entry.published_parsed[:6], tzinfo=pytz.UTC)
	if published > cutoff_time:
	articles.append({
	"title": entry.title,
	"description": BeautifulSoup(entry.description, "html.parser").get_text(),
	"link": entry.link,
	"category": category,
	"source": source,
	"published": published
	})
	except Exception as e:
	logging.error(f"Failed to fetch from {url}: {e}")
	return articles

	def summarize_text(text):
	"""Summarize the text using the AI model."""
	content_hash = hashlib.md5(text.encode()).hexdigest()
	cached_summary = cache.get(content_hash)
	if cached_summary:
	return cached_summary

	try:
	result = summarizer(text, max_length=120, min_length=40, truncation=True)
	summary = result[0]['summary_text']
	cache.set(content_hash, summary)
	return summary
	except Exception as e:
	logging.error(f"Summarization failed: {e}")
	return "Summary unavailable."

	def summarize_articles(articles):
	"""Summarize all fetched articles."""
	summaries = []
	for article in articles:
	try:
	content = article["description"]
	title = article["title"]
	category = article["category"]
	source = article["source"]
	link = article["link"]
	published = article["published"].strftime('%Y-%m-%d %H:%M')

	# Summarize article content
	summary = summarize_text(content)

	if summary:
	summaries.append(f"""
	{title}
	Category: {category} \| Source: {source} \| Published: {published}
	{summary}
	[Read more]({link})
	---""")
	except Exception as e:
	logging.error(f"Error summarizing article: {e}")
	continue
	return summaries

	def generate_user_summary(name):
	"""Generate a personalized news summary based on user preferences."""
	# Load preferences
	try:
	with open(f"user_preferences/preferences_{name}.json") as f:
	preferences = json.load(f)
	except FileNotFoundError:
	return "Preferences not found. Please set your preferences first."
	except Exception as e:
	logging.error(f"Error loading preferences: {e}")
	return "Failed to load preferences."

	categories = preferences.get("interests", [])
	if not categories:
	return "No categories selected. Please update your preferences."

	# Fetch news
	articles = fetch_rss_news(categories)
	if not articles:
	return "No recent news found in your selected categories."

	# Summarize all articles
	summaries = summarize_articles(articles)

	# Combine and return summaries
	return "\n\n".join(summaries) if summaries else "No summaries available."

	# Gradio interface
	demo = gr.Blocks()

	with demo:
	gr.Markdown("# 📰 Personalized AI News Summarizer")

	with gr.Tab("Set Preferences"):
	name_input = gr.Textbox(label="Your Name")
	interests = gr.CheckboxGroup(
	choices=list(NEWS_SOURCES.keys()),
	label="Select Your Interests"
	)
	save_button = gr.Button("Save Preferences")
	save_status = gr.Textbox(label="Status")

	def save_preferences(name, selected_interests):
	if not name or not selected_interests:
	return "Name and interests are required!"
	preferences = {"name": name, "interests": selected_interests}
	try:
	os.makedirs("user_preferences", exist_ok=True)
	with open(f"user_preferences/preferences_{name}.json", "w") as f:
	json.dump(preferences, f)
	return "Preferences saved successfully!"
	except Exception as e:
	logging.error(f"Failed to save preferences: {e}")
	return "Failed to save preferences."

	save_button.click(save_preferences, inputs=[name_input, interests], outputs=save_status)

	with gr.Tab("Get News Summary"):
	name_input_summary = gr.Textbox(label="Your Name")
	fetch_button = gr.Button("Get Summary")
	summary_output = gr.Textbox(label="News Summary", lines=20)

	fetch_button.click(generate_user_summary, inputs=[name_input_summary], outputs=summary_output)

	if __name__ == "__main__":
	demo.launch()