| | import gradio as gr |
| | import pandas as pd |
| | from huggingface_hub import hf_hub_download |
| | import json |
| | from datetime import datetime |
| | import os |
| |
|
| | def load_data(): |
| | """Load the latest news data from the parquet file""" |
| | try: |
| | file_path = hf_hub_download( |
| | repo_id='rajatarya/hf-news-aggregator', |
| | filename='data/news_data_summaries.parquet', |
| | repo_type='dataset' |
| | ) |
| | df = pd.read_parquet(file_path) |
| | |
| | df = df.sort_values('date_generated', ascending=False).reset_index(drop=True) |
| | return df |
| | except Exception as e: |
| | print(f"Error loading data: {e}") |
| | return pd.DataFrame() |
| |
|
| | def format_metadata(metadata_str): |
| | """Format metadata as a readable string""" |
| | try: |
| | if isinstance(metadata_str, str): |
| | metadata = json.loads(metadata_str) |
| | else: |
| | metadata = metadata_str |
| | |
| | formatted = [] |
| | for key, value in metadata.items(): |
| | if key == 'generated_at': |
| | |
| | try: |
| | dt = datetime.fromisoformat(value.replace('Z', '+00:00')) |
| | formatted.append(f"**Generated:** {dt.strftime('%Y-%m-%d %H:%M UTC')}") |
| | except: |
| | formatted.append(f"**Generated:** {value}") |
| | else: |
| | formatted.append(f"**{key.replace('_', ' ').title()}:** {value}") |
| | |
| | return "<br>".join(formatted) |
| | except: |
| | return str(metadata_str) |
| |
|
| | def get_news_content(index): |
| | """Get news content for a specific index""" |
| | df = load_data() |
| | |
| | if df.empty or index >= len(df): |
| | return ( |
| | "β No data available", |
| | "", |
| | f"0 / 0", |
| | gr.update(interactive=False), |
| | gr.update(interactive=False) |
| | ) |
| | |
| | row = df.iloc[index] |
| | |
| | |
| | metadata_md = f""" |
| | | Type | {row['summary_type'].title()} | |
| | |-----------------|---------| |
| | | Date Generated | {row['date_generated'].strftime('%Y-%m-%d %H:%M UTC')} | |
| | | Time Range | {row['time_range']} | |
| | | Articles Analyzed | {row['num_articles_analyzed']} | |
| | | Sources | {("<br>".join([s.strip() for s in row['sources'].split(',')]) if isinstance(row['sources'], str) else str(row['sources']))} | |
| | | Provider | {row['provider']} | |
| | | Metadata | {json.dumps(row['metadata'], indent=2, default=str, sort_keys=True) if row['metadata'] else ""} |""" |
| | |
| | |
| | nav_info = f"{index + 1} / {len(df)}" |
| | |
| | |
| | prev_disabled = index >= len(df) - 1 |
| | next_disabled = index <= 0 |
| | |
| | return ( |
| | row['summarized_markdown'], |
| | metadata_md, |
| | nav_info, |
| | gr.update(interactive=not prev_disabled), |
| | gr.update(interactive=not next_disabled) |
| | ) |
| |
|
| | |
| | current_index = 0 |
| |
|
| | def next_news(): |
| | global current_index |
| | current_index = max(0, current_index - 1) |
| | return get_news_content(current_index) |
| |
|
| | def prev_news(): |
| | global current_index |
| | df = load_data() |
| | current_index = min(len(df) - 1, current_index + 1) |
| | return get_news_content(current_index) |
| |
|
| | def refresh_data(): |
| | global current_index |
| | current_index = 0 |
| | return get_news_content(current_index) |
| |
|
| | |
| | with gr.Blocks( |
| | title="HF News (AI-assisted summary)", |
| | theme=gr.themes.Soft(), |
| | css=""" |
| | * { |
| | font-family: Arial, Helvetica, sans-serif !important; |
| | } |
| | .news-container { |
| | max-width: 1200px; |
| | } |
| | .nav-button { |
| | font-size: 16px !important; |
| | padding: 10px 20px !important; |
| | } |
| | .content-shadow { |
| | box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15) !important; |
| | padding: 20px !important; |
| | background: white !important; |
| | margin: 20px 0 !important; |
| | } |
| | """ |
| | ) as app: |
| | gr.Markdown(""" |
| | # π π€ HF News (AI-assisted summary) π° |
| | |
| | Stay updated with the latest AI and ML news, summarized by AI! β¨ |
| | """) |
| |
|
| | with gr.Row(variant="panel", elem_classes="content-shadow"): |
| | content_display = gr.Markdown( |
| | label="π Content", |
| | value="Loading news data... π", |
| | ) |
| | |
| | |
| | with gr.Accordion("π Summary Details", open=False): |
| | metadata_display = gr.Markdown( |
| | value="" |
| | ) |
| | |
| | |
| | with gr.Row(): |
| | refresh_btn = gr.Button("π Refresh Data", variant="secondary") |
| | |
| | with gr.Row(): |
| | prev_btn = gr.Button("β¬
οΈ Older News", variant="secondary", elem_classes="nav-button") |
| | nav_info = gr.Textbox( |
| | label="π Position", |
| | interactive=False, |
| | container=False |
| | ) |
| | next_btn = gr.Button("β‘οΈ Newer News", variant="primary", elem_classes="nav-button") |
| | |
| | gr.HTML(""" |
| | <div style="text-align: center; padding: 20px; color: #666; font-size: 14px;"> |
| | <p>π€ Powered by AI β’ π Data from <a href="https://huggingface.co/datasets/rajatarya/hf-news-aggregator" target="_blank">HF News Aggregator</a></p> |
| | </div> |
| | """) |
| | |
| | |
| | app.load( |
| | fn=lambda: get_news_content(0), |
| | outputs=[content_display, metadata_display, nav_info, prev_btn, next_btn] |
| | ) |
| | |
| | next_btn.click( |
| | fn=next_news, |
| | outputs=[content_display, metadata_display, nav_info, prev_btn, next_btn] |
| | ) |
| | |
| | prev_btn.click( |
| | fn=prev_news, |
| | outputs=[content_display, metadata_display, nav_info, prev_btn, next_btn] |
| | ) |
| | |
| | refresh_btn.click( |
| | fn=refresh_data, |
| | outputs=[content_display, metadata_display, nav_info, prev_btn, next_btn] |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | app.launch() |