naufalnashif commited on
Commit
5739f5b
·
1 Parent(s): 84b6b6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -35,7 +35,7 @@ def scrape_cnbc_data(query, date, jumlah):
35
 
36
  prop = min(len(data) / jumlah, 1)
37
  my_bar.progress(prop, text=progress_text)
38
- base_url = f"https://www.cnbcindonesia.com/search?query={query}&p={page}&kanal=&tipe=artikel&date={date}"
39
  user_agents = [
40
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
41
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134",
@@ -62,9 +62,13 @@ def scrape_cnbc_data(query, date, jumlah):
62
  for article in articles:
63
  title = article.find('h2').text.strip()
64
  link = article.find('a')['href']
65
- date = article.find('span', class_='date').text.strip()
 
 
 
66
 
67
  data.append({
 
68
  'date': date,
69
  'judul-berita': title,
70
  'link-berita': link,
 
35
 
36
  prop = min(len(data) / jumlah, 1)
37
  my_bar.progress(prop, text=progress_text)
38
+ url = f"https://www.cnbcindonesia.com/search?query={query}&p={page}&kanal=&tipe=artikel&date={date}"
39
  user_agents = [
40
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
41
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134",
 
62
  for article in articles:
63
  title = article.find('h2').text.strip()
64
  link = article.find('a')['href']
65
+ category = article.find('span', class = 'label').text.strip()
66
+ date_category = article.find('span', class_='date').text.strip()
67
+ text_parts = date_category.split(' - ')
68
+ date = text_parts[1].strip()
69
 
70
  data.append({
71
+ 'category': category,
72
  'date': date,
73
  'judul-berita': title,
74
  'link-berita': link,