gunship999 commited on
Commit
e666451
Β·
verified Β·
1 Parent(s): eb2cd0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -37
app.py CHANGED
@@ -4,76 +4,74 @@ import streamlit as st
4
  import time
5
  import random
6
 
7
- # Target URL
8
  url = "https://m.news.naver.com/rankingList"
9
 
10
- # Header settings (User-Agent and Referer)
11
  headers = {
12
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0",
13
  "Referer": "https://m.news.naver.com/"
14
  }
15
 
16
- # Random delay function
17
  def random_delay():
18
- delay = random.uniform(1, 3) # Random delay between 1 to 3 seconds
19
  time.sleep(delay)
20
 
21
- # Function to scrape ranking news
22
  def scrape_ranking_news():
23
- random_delay() # Apply random delay
24
  response = requests.get(url, headers=headers)
25
  soup = BeautifulSoup(response.text, "html.parser")
26
-
27
- # Select HTML sections containing the data
28
  ranking_news_sections = soup.find_all("div", class_="rankingnews_box")
 
29
  news_list = []
30
-
31
  for section in ranking_news_sections:
32
- publisher = section.find("strong", class_="rankingnews_name").text # Extract publisher name
33
  articles = section.find_all("li")
34
 
35
  for article in articles:
36
  rank = article.find("em", class_="list_ranking_num").text
37
  title = article.find("strong", class_="list_title").text
38
- published_time = article.find("span", class_="list_time").text
39
  link = article.find("a")['href']
40
- # Handle cases where image might not exist
41
- img_tag = article.find("img")
42
- image = img_tag['src'] if img_tag else None
43
 
44
  news_list.append({
45
  "rank": rank,
46
  "title": title,
47
- "time": published_time,
48
  "link": link,
49
  "image": image,
50
- "publisher": publisher
51
  })
52
  return news_list
53
 
54
- # Main title
55
  st.title("Daily News Scrap in Korea")
56
 
57
- # Execution button
58
- if st.button("Start News Scraping"):
59
- # Scrape ranking news data
60
  news_data = scrape_ranking_news()
61
-
62
- # Display in 5x5 grid layout, articles from same publisher in one row
63
  num_columns = 5
64
- col_count = 0
65
- cols = st.columns(num_columns)
66
-
67
- for index, news in enumerate(news_data):
68
- with cols[col_count]:
69
- if news['image']: # Only display image if it exists
70
  st.image(news['image'])
71
- st.write(f"**Rank {news['rank']} - {news['publisher']}**")
72
- st.write(f"[{news['title']}]({news['link']})")
73
- st.write(f"πŸ•’ Posted: {news['time']}")
74
-
75
- col_count += 1
76
- # Create new row after 5 articles
77
- if col_count == num_columns:
78
- col_count = 0
79
- cols = st.columns(num_columns)
 
4
  import time
5
  import random
6
 
7
+ # νƒ€κ²Ÿ URL
8
  url = "https://m.news.naver.com/rankingList"
9
 
10
+ # 헀더 μ„€μ • (User-Agent 및 Referer μ„€μ •)
11
  headers = {
12
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0",
13
  "Referer": "https://m.news.naver.com/"
14
  }
15
 
16
+ # 랜덀 λ”œλ ˆμ΄ μ„€μ • ν•¨μˆ˜
17
  def random_delay():
18
+ delay = random.uniform(1, 3) # 1μ΄ˆμ—μ„œ 3초 μ‚¬μ΄μ˜ 랜덀 λ”œλ ˆμ΄
19
  time.sleep(delay)
20
 
21
+ # μŠ€ν¬λž˜ν•‘ν•  데이터가 ν¬ν•¨λœ HTML μ˜μ—­ 선택
22
  def scrape_ranking_news():
23
+ random_delay() # 랜덀 λ”œλ ˆμ΄ 적용
24
  response = requests.get(url, headers=headers)
25
  soup = BeautifulSoup(response.text, "html.parser")
26
+
27
+ # μŠ€ν¬λž˜ν•‘ν•  데이터가 ν¬ν•¨λœ HTML μ˜μ—­ 선택
28
  ranking_news_sections = soup.find_all("div", class_="rankingnews_box")
29
+
30
  news_list = []
 
31
  for section in ranking_news_sections:
32
+ office_name = section.find("strong", class_="rankingnews_name").text # 언둠사λͺ… μΆ”μΆœ
33
  articles = section.find_all("li")
34
 
35
  for article in articles:
36
  rank = article.find("em", class_="list_ranking_num").text
37
  title = article.find("strong", class_="list_title").text
38
+ time_posted = article.find("span", class_="list_time").text
39
  link = article.find("a")['href']
40
+ image = article.find("img")['src']
 
 
41
 
42
  news_list.append({
43
  "rank": rank,
44
  "title": title,
45
+ "time": time_posted,
46
  "link": link,
47
  "image": image,
48
+ "office": office_name
49
  })
50
  return news_list
51
 
52
+ # λŒ€μ œλͺ© μΆ”κ°€
53
  st.title("Daily News Scrap in Korea")
54
 
55
+ # μ‹€ν–‰ λ²„νŠΌ
56
+ if st.button("start"):
57
+ # λž­ν‚Ή λ‰΄μŠ€ 데이터λ₯Ό μŠ€ν¬λž˜ν•‘
58
  news_data = scrape_ranking_news()
59
+
60
+ # 5x5 ν˜•νƒœλ‘œ 같은 μ–Έλ‘ μ‚¬μ˜ 기사λ₯Ό ν•œ 쀄에 배치
61
  num_columns = 5
62
+ for news in news_data:
63
+ col_count = 0
64
+ cols = st.columns(num_columns)
65
+
66
+ for index, news in enumerate(news_data):
67
+ with cols[col_count]:
68
  st.image(news['image'])
69
+ st.write(f"**{news['rank']}μœ„ - {news['office']}**")
70
+ st.write(f"[{news['title']}]({news['link']})")
71
+ st.write(f"πŸ•’ {news['time']}")
72
+ col_count += 1
73
+
74
+ # 5개 좜λ ₯ ν›„ μƒˆλ‘œμš΄ ν–‰μœΌλ‘œ
75
+ if col_count == num_columns:
76
+ col_count = 0
77
+ cols = st.columns(num_columns)