naufalnashif commited on
Commit
937df2f
·
1 Parent(s): dd2b9b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -2
app.py CHANGED
@@ -118,6 +118,63 @@ def scrape_shopee(nama_barang, num_items):
118
  driver.quit()
119
 
120
  return products
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  #---------------------------------------------------User Interface----------------------------------------------------------------------
122
 
123
  # Streamlit UI
@@ -125,7 +182,7 @@ st.title("Scraping E-Commerce")
125
 
126
  with st.expander("Settings :"):
127
  # Pilihan untuk memilih situs web
128
- selected_site = st.selectbox("Pilih Situs Web :", ["klikindomaret.com", "shopee.co.id"])
129
 
130
  nama_barang = st.text_input("Masukkan Nama Barang :")
131
  num_items = st.number_input("Masukkan Estimasi Banyak Data :", min_value = 1, step = 1, placeholder="Type a number...")
@@ -156,7 +213,16 @@ if selected_site == "shopee.co.id":
156
  scraped_products = scrape_shopee(nama_barang, num_items)
157
  hidden_data = scraped_products # Simpan data ke dalam variabel tersembunyi
158
  scraping_done = True # Set scraping_done menjadi True
159
-
 
 
 
 
 
 
 
 
 
160
 
161
  # Simpan DataFrame ke dalam file
162
  output_file = f"scraped_{selected_site}_{nama_barang}.xlsx"
 
118
  driver.quit()
119
 
120
  return products
121
+
122
+ @st.cache_data
123
+ def scrape_tokped(nama_barang, num_items):
124
+ products = []
125
+ page = 1
126
+ query = quote(nama_barang)
127
+
128
+ while len(data) < num_items :
129
+ url = f'https://www.tokopedia.com/search?navsource=&page={page}&q={query}&srp_component_id=02.01.00.00&srp_page_id=&srp_page_title=&st='
130
+
131
+ headers = {
132
+ 'User-Agent' : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
133
+ 'Accept-Language' : 'en-US, en;q-0.5',
134
+ 'Accept-Encoding' : 'grip, deflate, bt',
135
+ 'Connection': 'keep-alive'
136
+ }
137
+ timeout = 10
138
+ try :
139
+ response = requests.get(url, headers = headers, timeout = timeout)
140
+ response.raise_for_status()
141
+
142
+ soup = BeautifulSoup(response.text, 'html.parser')
143
+ product_container_list = soup.find_all('a', class_="pcv3__info-content css-gwkf0u", href = True)
144
+
145
+ for product_info in product_container_list:
146
+ link = product_info['href']
147
+ title = product_info.find('div', class_="prd_link-product-name css-3um8ox" ).text.strip()
148
+ harga = product_info.find('div', class_="prd_link-product-price css-h66vau").text.strip()
149
+ #Dapatkan terjual
150
+ terjual_element = product_info.find('span', class_="prd_label-integrity css-1sgek4h").text.strip()
151
+ terjual = terjual_element if terjual_element else None
152
+ # Dapatkan rating
153
+ rating_element = product_info.find('span', class_='prd_rating-average-text css-t70v7i')
154
+ rating = rating_element.text if rating_element else None
155
+
156
+ toko = product_info.find('span', class_="prd_link-shop-name css-1kdc32b flip").text.strip()
157
+ asal_product = product_info.find('span', class_="prd_link-shop-loc css-1kdc32b flip" ).text.strip()
158
+
159
+ products.append({
160
+ 'link': link,
161
+ 'produk' : title,
162
+ 'harga' : harga,
163
+ 'terjual' : terjual,
164
+ 'rating' : rating,
165
+ 'toko' : toko,
166
+ 'asal_product' : asal_product,
167
+ })
168
+ if len(products) >= num_items:
169
+ products = products[:num_items]
170
+ break
171
+
172
+ except requests.exceptions.RequestException as e:
173
+ st.error("Terjadi kesalahan")
174
+ except requests.exceptions.HTTPError as e:
175
+ st.error("HTTP Error :", str(e))
176
+ page += 1
177
+ return products
178
  #---------------------------------------------------User Interface----------------------------------------------------------------------
179
 
180
  # Streamlit UI
 
182
 
183
  with st.expander("Settings :"):
184
  # Pilihan untuk memilih situs web
185
+ selected_site = st.selectbox("Pilih Situs Web :", ["klikindomaret.com", "shopee.co.id", "tokopedia.com"])
186
 
187
  nama_barang = st.text_input("Masukkan Nama Barang :")
188
  num_items = st.number_input("Masukkan Estimasi Banyak Data :", min_value = 1, step = 1, placeholder="Type a number...")
 
213
  scraped_products = scrape_shopee(nama_barang, num_items)
214
  hidden_data = scraped_products # Simpan data ke dalam variabel tersembunyi
215
  scraping_done = True # Set scraping_done menjadi True
216
+
217
+ if selected_site == "tokopedia.com":
218
+ #st.error("Sedang dalam pengembangan. Silahkan pilih situs yang lain")
219
+ if st.button("Mulai Scraping"):
220
+ if not nama_barang:
221
+ st.error("Mohon isi Nama Barang.")
222
+ else:
223
+ scraped_products = scrape_tokped(nama_barang, num_items)
224
+ hidden_data = scraped_products # Simpan data ke dalam variabel tersembunyi
225
+ scraping_done = True # Set scraping_done menjadi True
226
 
227
  # Simpan DataFrame ke dalam file
228
  output_file = f"scraped_{selected_site}_{nama_barang}.xlsx"