Spaces:

randeom
/

word_cloud_news

Sleeping

File size: 3,186 Bytes

dbec3ef

import streamlit as st
from wordcloud import WordCloud
import requests
import xml.etree.ElementTree as ET
from io import BytesIO
from datetime import datetime, timedelta

# Set page configuration
st.set_page_config(page_title="Word Cloud from News Headlines", layout="wide")

# Custom CSS for unique design and to remove the white bar on top
st.markdown("""
    <style>
    .stApp {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        color: white;
    }
    .css-18e3th9 {
        background-color: rgba(255, 255, 255, 0.1);
        padding: 20px;
        border-radius: 10px;
    }
    .css-1d391kg {
        background-color: rgba(255, 255, 255, 0.1);
        padding: 20px;
        border-radius: 10px;
    }
    .st-emotion-cache-18ni7ap {
        display: none;
    }
    .stButton button {
        color: black !important;
        background-color: #667eea;
        border-radius: 10px;
        padding: 10px 20px;
        font-weight: bold;
    }
    .headline-container {
        background-color: rgba(255, 255, 255, 0.1);
        padding: 20px;
        border-radius: 10px;
        margin-top: 20px;
    }
    .st-emotion-cache-7ym5gk {
        color: black !important;
    }
    </style>
""", unsafe_allow_html=True)

# Title and description
st.title("Word Cloud from News Headlines")
st.markdown("### Generating a word cloud from live news headlines")

# Sidebar for user inputs
st.sidebar.title("Customize Your Word Cloud")
bg_color = st.sidebar.color_picker("Background Color", "#ffffff")
max_words = st.sidebar.slider("Maximum Number of Words", 10, 200, 100)
keyword = st.sidebar.text_input("Search Keyword (Optional)")
date_range = st.sidebar.date_input("Date Range", [datetime.now() - timedelta(days=7), datetime.now()])

# Function to fetch news headlines from Google News RSS feed
def fetch_news_headlines(keyword=None, date_range=None):
    url = "https://news.google.com/rss"
    if keyword:
        url += f"/search?q={keyword}"
    response = requests.get(url)
    root = ET.fromstring(response.content)
    headlines = [item.find('title').text for item in root.findall('./channel/item')]
    return headlines

# Generate word cloud
headlines = fetch_news_headlines(keyword, date_range)
if headlines:
    wordcloud_text = ' '.join(headlines)
    wordcloud = WordCloud(width=800, height=400, background_color=bg_color, max_words=max_words).generate(wordcloud_text)
    st.image(wordcloud.to_array(), use_column_width=True)
    
    # Download option
    img = BytesIO()
    wordcloud.to_image().save(img, format='PNG')
    st.download_button(label="Download Word Cloud", data=img, file_name="wordcloud.png", mime="image/png")

    # Display headlines
    st.markdown("### Fetched Headlines")
    with st.expander("Show Headlines"):
        st.markdown('<div class="headline-container">', unsafe_allow_html=True)
        for headline in headlines:
            st.markdown(f"- {headline}")
        st.markdown('</div>', unsafe_allow_html=True)
else:
    st.warning("No headlines fetched. Please try again later.")

# Footer
st.markdown("""
<hr>
<div style="text-align: center;">
    Created by randeom
</div>
""", unsafe_allow_html=True)