word_cloud_news / app.py
randeom's picture
Create app.py
dbec3ef verified
raw
history blame contribute delete
No virus
3.19 kB
import streamlit as st
from wordcloud import WordCloud
import requests
import xml.etree.ElementTree as ET
from io import BytesIO
from datetime import datetime, timedelta
# Set page configuration
st.set_page_config(page_title="Word Cloud from News Headlines", layout="wide")
# Custom CSS for unique design and to remove the white bar on top
st.markdown("""
<style>
.stApp {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
}
.css-18e3th9 {
background-color: rgba(255, 255, 255, 0.1);
padding: 20px;
border-radius: 10px;
}
.css-1d391kg {
background-color: rgba(255, 255, 255, 0.1);
padding: 20px;
border-radius: 10px;
}
.st-emotion-cache-18ni7ap {
display: none;
}
.stButton button {
color: black !important;
background-color: #667eea;
border-radius: 10px;
padding: 10px 20px;
font-weight: bold;
}
.headline-container {
background-color: rgba(255, 255, 255, 0.1);
padding: 20px;
border-radius: 10px;
margin-top: 20px;
}
.st-emotion-cache-7ym5gk {
color: black !important;
}
</style>
""", unsafe_allow_html=True)
# Title and description
st.title("Word Cloud from News Headlines")
st.markdown("### Generating a word cloud from live news headlines")
# Sidebar for user inputs
st.sidebar.title("Customize Your Word Cloud")
bg_color = st.sidebar.color_picker("Background Color", "#ffffff")
max_words = st.sidebar.slider("Maximum Number of Words", 10, 200, 100)
keyword = st.sidebar.text_input("Search Keyword (Optional)")
date_range = st.sidebar.date_input("Date Range", [datetime.now() - timedelta(days=7), datetime.now()])
# Function to fetch news headlines from Google News RSS feed
def fetch_news_headlines(keyword=None, date_range=None):
url = "https://news.google.com/rss"
if keyword:
url += f"/search?q={keyword}"
response = requests.get(url)
root = ET.fromstring(response.content)
headlines = [item.find('title').text for item in root.findall('./channel/item')]
return headlines
# Generate word cloud
headlines = fetch_news_headlines(keyword, date_range)
if headlines:
wordcloud_text = ' '.join(headlines)
wordcloud = WordCloud(width=800, height=400, background_color=bg_color, max_words=max_words).generate(wordcloud_text)
st.image(wordcloud.to_array(), use_column_width=True)
# Download option
img = BytesIO()
wordcloud.to_image().save(img, format='PNG')
st.download_button(label="Download Word Cloud", data=img, file_name="wordcloud.png", mime="image/png")
# Display headlines
st.markdown("### Fetched Headlines")
with st.expander("Show Headlines"):
st.markdown('<div class="headline-container">', unsafe_allow_html=True)
for headline in headlines:
st.markdown(f"- {headline}")
st.markdown('</div>', unsafe_allow_html=True)
else:
st.warning("No headlines fetched. Please try again later.")
# Footer
st.markdown("""
<hr>
<div style="text-align: center;">
Created by randeom
</div>
""", unsafe_allow_html=True)