Spaces:

gunship999
/

Korea-Daily-News

Running

App Files Files Community

gunship999 commited on 3 days ago

Commit

e666451

verified ·

1 Parent(s): eb2cd0c

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -37

app.py CHANGED Viewed

@@ -4,76 +4,74 @@ import streamlit as st
 import time
 import random
-# Target URL
 url = "https://m.news.naver.com/rankingList"
-# Header settings (User-Agent and Referer)
 headers = {
     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0",
     "Referer": "https://m.news.naver.com/"
 }
-# Random delay function
 def random_delay():
-    delay = random.uniform(1, 3)  # Random delay between 1 to 3 seconds
     time.sleep(delay)
-# Function to scrape ranking news
 def scrape_ranking_news():
-    random_delay()  # Apply random delay
     response = requests.get(url, headers=headers)
     soup = BeautifulSoup(response.text, "html.parser")
-    # Select HTML sections containing the data
     ranking_news_sections = soup.find_all("div", class_="rankingnews_box")
     news_list = []
     for section in ranking_news_sections:
-        publisher = section.find("strong", class_="rankingnews_name").text  # Extract publisher name
         articles = section.find_all("li")
         for article in articles:
             rank = article.find("em", class_="list_ranking_num").text
             title = article.find("strong", class_="list_title").text
-            published_time = article.find("span", class_="list_time").text
             link = article.find("a")['href']
-            # Handle cases where image might not exist
-            img_tag = article.find("img")
-            image = img_tag['src'] if img_tag else None
             news_list.append({
                 "rank": rank,
                 "title": title,
-                "time": published_time,
                 "link": link,
                 "image": image,
-                "publisher": publisher
             })
     return news_list
-# Main title
 st.title("Daily News Scrap in Korea")
-# Execution button
-if st.button("Start News Scraping"):
-    # Scrape ranking news data
     news_data = scrape_ranking_news()
-    # Display in 5x5 grid layout, articles from same publisher in one row
     num_columns = 5
-    col_count = 0
-    cols = st.columns(num_columns)
-    for index, news in enumerate(news_data):
-        with cols[col_count]:
-            if news['image']:  # Only display image if it exists
                 st.image(news['image'])
-            st.write(f"**Rank {news['rank']} - {news['publisher']}**")
-            st.write(f"[{news['title']}]({news['link']})")
-            st.write(f"🕒 Posted: {news['time']}")
-        col_count += 1
-        # Create new row after 5 articles
-        if col_count == num_columns:
-            col_count = 0
-            cols = st.columns(num_columns)

 import time
 import random
+# 타겟 URL
 url = "https://m.news.naver.com/rankingList"
+# 헤더 설정 (User-Agent 및 Referer 설정)
 headers = {
     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0",
     "Referer": "https://m.news.naver.com/"
 }
+# 랜덤 딜레이 설정 함수
 def random_delay():
+    delay = random.uniform(1, 3)  # 1초에서 3초 사이의 랜덤 딜레이
     time.sleep(delay)
+# 스크래핑할 데이터가 포함된 HTML 영역 선택
 def scrape_ranking_news():
+    random_delay()  # 랜덤 딜레이 적용
     response = requests.get(url, headers=headers)
     soup = BeautifulSoup(response.text, "html.parser")
+    # 스크래핑할 데이터가 포함된 HTML 영역 선택
     ranking_news_sections = soup.find_all("div", class_="rankingnews_box")
     news_list = []
     for section in ranking_news_sections:
+        office_name = section.find("strong", class_="rankingnews_name").text  # 언론사명 추출
         articles = section.find_all("li")
         for article in articles:
             rank = article.find("em", class_="list_ranking_num").text
             title = article.find("strong", class_="list_title").text
+            time_posted = article.find("span", class_="list_time").text
             link = article.find("a")['href']
+            image = article.find("img")['src']
             news_list.append({
                 "rank": rank,
                 "title": title,
+                "time": time_posted,
                 "link": link,
                 "image": image,
+                "office": office_name
             })
     return news_list
+# 대제목 추가
 st.title("Daily News Scrap in Korea")
+# 실행 버튼
+if st.button("start"):
+    # 랭킹 뉴스 데이터를 스크래핑
     news_data = scrape_ranking_news()
+    # 5x5 형태로 같은 언론사의 기사를 한 줄에 배치
     num_columns = 5
+    for news in news_data:
+        col_count = 0
+        cols = st.columns(num_columns)
+        for index, news in enumerate(news_data):
+            with cols[col_count]:
                 st.image(news['image'])
+                st.write(f"**{news['rank']}위 - {news['office']}**")
+                st.write(f"[{news['title']}]({news['link']})")
+                st.write(f"🕒 {news['time']}")
+            col_count += 1
+            # 5개 출력 후 새로운 행으로
+            if col_count == num_columns:
+                col_count = 0
+                cols = st.columns(num_columns)