File size: 3,250 Bytes
0585dec
 
 
 
 
 
8298dd3
0585dec
 
8298dd3
0585dec
 
 
 
 
 
8298dd3
0585dec
8298dd3
 
 
 
 
 
e666451
8298dd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e666451
8298dd3
 
 
 
0585dec
8298dd3
 
 
 
 
 
 
 
 
 
0585dec
8298dd3
 
 
 
 
 
 
0585dec
8298dd3
0585dec
 
8298dd3
0585dec
8298dd3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import requests
from bs4 import BeautifulSoup
import streamlit as st
import time
import random

# Target URL
url = "https://m.news.naver.com/rankingList"

# Headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0",
    "Referer": "https://m.news.naver.com/"
}

def random_delay():
    time.sleep(random.uniform(1, 3))

def safe_find(element, selector, class_name, attribute=None):
    """Safely find elements and their attributes"""
    found = element.find(selector, class_=class_name)
    if found and attribute:
        return found.get(attribute)
    return found.text if found else None

def scrape_ranking_news():
    try:
        random_delay()
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Check for HTTP errors
        
        soup = BeautifulSoup(response.text, "html.parser")
        ranking_news_sections = soup.find_all("div", class_="rankingnews_box")
        
        news_list = []
        for section in ranking_news_sections:
            office_name = safe_find(section, "strong", "rankingnews_name")
            if not office_name:
                continue
                
            articles = section.find_all("li")
            for article in articles:
                # Safely extract all attributes
                rank = safe_find(article, "em", "list_ranking_num")
                title = safe_find(article, "strong", "list_title")
                time_posted = safe_find(article, "span", "list_time")
                link = safe_find(article, "a", None, "href")
                
                # Handle image separately as it needs specific null checking
                img_tag = article.find("img")
                image = img_tag.get('src') if img_tag else None
                
                if all([rank, title, time_posted, link]):  # Ensure all required fields exist
                    news_list.append({
                        "rank": rank,
                        "title": title,
                        "time": time_posted,
                        "link": link,
                        "image": image,
                        "office": office_name
                    })
                    
        return news_list
    except Exception as e:
        st.error(f"Error scraping news: {str(e)}")
        return []

def display_news(news_data, num_columns=5):
    if not news_data:
        st.warning("No news articles found.")
        return
        
    col_count = 0
    cols = st.columns(num_columns)
    
    for news in news_data:
        with cols[col_count]:
            if news['image']:
                try:
                    st.image(news['image'])
                except Exception:
                    st.warning("Image unavailable")
            
            st.write(f"**{news['rank']}위 - {news['office']}**")
            st.write(f"[{news['title']}]({news['link']})")
            st.write(f"🕒 {news['time']}")
        
        col_count = (col_count + 1) % num_columns
        if col_count == 0:
            cols = st.columns(num_columns)

# Main app
st.title("Daily News Scrap in Korea")

if st.button("Start"):
    news_data = scrape_ranking_news()
    display_news(news_data)