naufalnashif commited on
Commit
6174621
·
1 Parent(s): a6136fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -65
app.py CHANGED
@@ -94,7 +94,7 @@ def scrape_shopee(nama_barang, num_items):
94
  driver = webdriver.Chrome(options = options)
95
  url = f'https://shopee.co.id/search?keyword={query}&page={page}'
96
  driver.get(url)
97
- time.sleep(10)
98
 
99
  # Cari elemen berdasarkan tagname HTML
100
  html_element = driver.find_element(By.TAG_NAME, "html")
@@ -110,15 +110,15 @@ def scrape_shopee(nama_barang, num_items):
110
  except requests.exceptions.RequestException as e:
111
  logging.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
112
  st.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
113
- break
114
  except requests.exceptions.HTTPError as e:
115
  logging.error(f"HTTP Error: {e}")
116
  st.error(f"HTTP Error: {e}")
117
- break
118
  except Exception as e:
119
  logging.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
120
  st.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
121
- break
122
  except WebDriverException as e:
123
  st.error(f"An error occurred: {e}")
124
  finally:
@@ -216,71 +216,73 @@ def scrape_tokped_with_selenium(nama_barang, num_items):
216
  options.add_argument('--disable-infobars')
217
  options.add_argument('--disable-dev-shm-usage')
218
 
219
-
220
- try :
221
- url = f'https://www.tokopedia.com/search?navsource=&page={page}&q={query}&srp_component_id=02.01.00.00&srp_page_id=&srp_page_title=&st='
222
- driver.get(url)
223
- time.sleep(10)
224
-
225
- html = driver.page_source
226
- soup = BeautifulSoup(html, "html.parser")
227
- # Dapatkan HTML dari elemen
228
- #html = html_element.get_attribute("innerHTML")
229
- #html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")
230
- product_container_list = soup.find_all('a', class_="pcv3__info-content css-gwkf0u", href = True)
231
-
232
- for product_info in product_container_list:
233
- link = product_info['href']
234
- st.write(link)
235
- title_element = product_info.find('div', class_="prd_link-product-name css-3um8ox")
236
- title = title_element.text.strip() if title_element else None
237
-
238
- harga_element = product_info.find('div', class_="prd_link-product-price css-h66vau")
239
- harga = harga_element.text.strip() if harga_element else None
240
-
241
- terjual_element = product_info.find('span', class_="prd_label-integrity css-1sgek4h")
242
- terjual = terjual_element.text if terjual_element else None
243
-
244
- rating_element = product_info.find('span', class_='prd_rating-average-text css-t70v7i')
245
- rating = rating_element.text if rating_element else None
246
-
247
- toko_element = product_info.find('span', class_="prd_link-shop-name css-1kdc32b flip")
248
- toko = toko_element.text.strip() if toko_element else None
249
-
250
- asal_product_element = product_info.find('span', class_="prd_link-shop-loc css-1kdc32b flip")
251
- asal_product = asal_product_element.text.strip() if asal_product_element else None
252
-
253
- products.append({
254
- 'link': link,
255
- 'produk' : title,
256
- 'harga' : harga,
257
- 'terjual' : terjual,
258
- 'rating' : rating,
259
- 'toko' : toko,
260
- 'asal_product' : asal_product,
261
- })
262
- if len(products) >= num_items:
263
- products = products[:num_items]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  break
265
-
266
- except requests.exceptions.RequestException as e:
267
- logging.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
268
- st.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
269
- break
270
- except requests.exceptions.HTTPError as e:
271
- logging.error(f"HTTP Error: {e}")
272
- st.error(f"HTTP Error: {e}")
273
- break
274
- except Exception as e:
275
- logging.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
276
- st.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
277
- break
278
- except WebDriverException as e:
279
- st.error(f"An error occurred: {e}")
280
  finally:
281
  if driver:
282
  driver.quit()
283
-
284
  return products
285
  #---------------------------------------------------User Interface----------------------------------------------------------------------
286
 
 
94
  driver = webdriver.Chrome(options = options)
95
  url = f'https://shopee.co.id/search?keyword={query}&page={page}'
96
  driver.get(url)
97
+
98
 
99
  # Cari elemen berdasarkan tagname HTML
100
  html_element = driver.find_element(By.TAG_NAME, "html")
 
110
  except requests.exceptions.RequestException as e:
111
  logging.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
112
  st.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
113
+
114
  except requests.exceptions.HTTPError as e:
115
  logging.error(f"HTTP Error: {e}")
116
  st.error(f"HTTP Error: {e}")
117
+
118
  except Exception as e:
119
  logging.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
120
  st.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
121
+
122
  except WebDriverException as e:
123
  st.error(f"An error occurred: {e}")
124
  finally:
 
216
  options.add_argument('--disable-infobars')
217
  options.add_argument('--disable-dev-shm-usage')
218
 
219
+ while len(products) < num_items :
220
+ try :
221
+ url = f'https://www.tokopedia.com/search?navsource=&page={page}&q={query}&srp_component_id=02.01.00.00&srp_page_id=&srp_page_title=&st='
222
+
223
+ driver.get(url)
224
+ time.sleep(10)
225
+
226
+ html = driver.page_source
227
+ soup = BeautifulSoup(html, "html.parser")
228
+ # Dapatkan HTML dari elemen
229
+ #html = html_element.get_attribute("innerHTML")
230
+ #html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")
231
+ product_container_list = soup.find_all('a', class_="pcv3__info-content css-gwkf0u", href = True)
232
+
233
+ for product_info in product_container_list:
234
+ link = product_info['href']
235
+ st.write(link)
236
+ title_element = product_info.find('div', class_="prd_link-product-name css-3um8ox")
237
+ title = title_element.text.strip() if title_element else None
238
+
239
+ harga_element = product_info.find('div', class_="prd_link-product-price css-h66vau")
240
+ harga = harga_element.text.strip() if harga_element else None
241
+
242
+ terjual_element = product_info.find('span', class_="prd_label-integrity css-1sgek4h")
243
+ terjual = terjual_element.text if terjual_element else None
244
+
245
+ rating_element = product_info.find('span', class_='prd_rating-average-text css-t70v7i')
246
+ rating = rating_element.text if rating_element else None
247
+
248
+ toko_element = product_info.find('span', class_="prd_link-shop-name css-1kdc32b flip")
249
+ toko = toko_element.text.strip() if toko_element else None
250
+
251
+ asal_product_element = product_info.find('span', class_="prd_link-shop-loc css-1kdc32b flip")
252
+ asal_product = asal_product_element.text.strip() if asal_product_element else None
253
+
254
+ products.append({
255
+ 'link': link,
256
+ 'produk' : title,
257
+ 'harga' : harga,
258
+ 'terjual' : terjual,
259
+ 'rating' : rating,
260
+ 'toko' : toko,
261
+ 'asal_product' : asal_product,
262
+ })
263
+ if len(products) >= num_items:
264
+ products = products[:num_items]
265
+ break
266
+ page += 1
267
+ except requests.exceptions.RequestException as e:
268
+ logging.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
269
+ st.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
270
+ break
271
+ except requests.exceptions.HTTPError as e:
272
+ logging.error(f"HTTP Error: {e}")
273
+ st.error(f"HTTP Error: {e}")
274
+ break
275
+ except Exception as e:
276
+ logging.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
277
+ st.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
278
+ break
279
+ except WebDriverException as e:
280
+ st.error(f"An error occurred: {e}")
281
  break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  finally:
283
  if driver:
284
  driver.quit()
285
+
286
  return products
287
  #---------------------------------------------------User Interface----------------------------------------------------------------------
288