Spaces:
Sleeping
Sleeping
Commit
·
5739f5b
1
Parent(s):
84b6b6e
Update app.py
Browse files
app.py
CHANGED
@@ -35,7 +35,7 @@ def scrape_cnbc_data(query, date, jumlah):
|
|
35 |
|
36 |
prop = min(len(data) / jumlah, 1)
|
37 |
my_bar.progress(prop, text=progress_text)
|
38 |
-
|
39 |
user_agents = [
|
40 |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
|
41 |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134",
|
@@ -62,9 +62,13 @@ def scrape_cnbc_data(query, date, jumlah):
|
|
62 |
for article in articles:
|
63 |
title = article.find('h2').text.strip()
|
64 |
link = article.find('a')['href']
|
65 |
-
|
|
|
|
|
|
|
66 |
|
67 |
data.append({
|
|
|
68 |
'date': date,
|
69 |
'judul-berita': title,
|
70 |
'link-berita': link,
|
|
|
35 |
|
36 |
prop = min(len(data) / jumlah, 1)
|
37 |
my_bar.progress(prop, text=progress_text)
|
38 |
+
url = f"https://www.cnbcindonesia.com/search?query={query}&p={page}&kanal=&tipe=artikel&date={date}"
|
39 |
user_agents = [
|
40 |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
|
41 |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134",
|
|
|
62 |
for article in articles:
|
63 |
title = article.find('h2').text.strip()
|
64 |
link = article.find('a')['href']
|
65 |
+
category = article.find('span', class = 'label').text.strip()
|
66 |
+
date_category = article.find('span', class_='date').text.strip()
|
67 |
+
text_parts = date_category.split(' - ')
|
68 |
+
date = text_parts[1].strip()
|
69 |
|
70 |
data.append({
|
71 |
+
'category': category,
|
72 |
'date': date,
|
73 |
'judul-berita': title,
|
74 |
'link-berita': link,
|