Commit
·
937df2f
1
Parent(s):
dd2b9b0
Update app.py
Browse files
app.py
CHANGED
@@ -118,6 +118,63 @@ def scrape_shopee(nama_barang, num_items):
|
|
118 |
driver.quit()
|
119 |
|
120 |
return products
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
#---------------------------------------------------User Interface----------------------------------------------------------------------
|
122 |
|
123 |
# Streamlit UI
|
@@ -125,7 +182,7 @@ st.title("Scraping E-Commerce")
|
|
125 |
|
126 |
with st.expander("Settings :"):
|
127 |
# Pilihan untuk memilih situs web
|
128 |
-
selected_site = st.selectbox("Pilih Situs Web :", ["klikindomaret.com", "shopee.co.id"])
|
129 |
|
130 |
nama_barang = st.text_input("Masukkan Nama Barang :")
|
131 |
num_items = st.number_input("Masukkan Estimasi Banyak Data :", min_value = 1, step = 1, placeholder="Type a number...")
|
@@ -156,7 +213,16 @@ if selected_site == "shopee.co.id":
|
|
156 |
scraped_products = scrape_shopee(nama_barang, num_items)
|
157 |
hidden_data = scraped_products # Simpan data ke dalam variabel tersembunyi
|
158 |
scraping_done = True # Set scraping_done menjadi True
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
# Simpan DataFrame ke dalam file
|
162 |
output_file = f"scraped_{selected_site}_{nama_barang}.xlsx"
|
|
|
118 |
driver.quit()
|
119 |
|
120 |
return products
|
121 |
+
|
122 |
+
@st.cache_data
|
123 |
+
def scrape_tokped(nama_barang, num_items):
|
124 |
+
products = []
|
125 |
+
page = 1
|
126 |
+
query = quote(nama_barang)
|
127 |
+
|
128 |
+
while len(data) < num_items :
|
129 |
+
url = f'https://www.tokopedia.com/search?navsource=&page={page}&q={query}&srp_component_id=02.01.00.00&srp_page_id=&srp_page_title=&st='
|
130 |
+
|
131 |
+
headers = {
|
132 |
+
'User-Agent' : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
|
133 |
+
'Accept-Language' : 'en-US, en;q-0.5',
|
134 |
+
'Accept-Encoding' : 'grip, deflate, bt',
|
135 |
+
'Connection': 'keep-alive'
|
136 |
+
}
|
137 |
+
timeout = 10
|
138 |
+
try :
|
139 |
+
response = requests.get(url, headers = headers, timeout = timeout)
|
140 |
+
response.raise_for_status()
|
141 |
+
|
142 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
143 |
+
product_container_list = soup.find_all('a', class_="pcv3__info-content css-gwkf0u", href = True)
|
144 |
+
|
145 |
+
for product_info in product_container_list:
|
146 |
+
link = product_info['href']
|
147 |
+
title = product_info.find('div', class_="prd_link-product-name css-3um8ox" ).text.strip()
|
148 |
+
harga = product_info.find('div', class_="prd_link-product-price css-h66vau").text.strip()
|
149 |
+
#Dapatkan terjual
|
150 |
+
terjual_element = product_info.find('span', class_="prd_label-integrity css-1sgek4h").text.strip()
|
151 |
+
terjual = terjual_element if terjual_element else None
|
152 |
+
# Dapatkan rating
|
153 |
+
rating_element = product_info.find('span', class_='prd_rating-average-text css-t70v7i')
|
154 |
+
rating = rating_element.text if rating_element else None
|
155 |
+
|
156 |
+
toko = product_info.find('span', class_="prd_link-shop-name css-1kdc32b flip").text.strip()
|
157 |
+
asal_product = product_info.find('span', class_="prd_link-shop-loc css-1kdc32b flip" ).text.strip()
|
158 |
+
|
159 |
+
products.append({
|
160 |
+
'link': link,
|
161 |
+
'produk' : title,
|
162 |
+
'harga' : harga,
|
163 |
+
'terjual' : terjual,
|
164 |
+
'rating' : rating,
|
165 |
+
'toko' : toko,
|
166 |
+
'asal_product' : asal_product,
|
167 |
+
})
|
168 |
+
if len(products) >= num_items:
|
169 |
+
products = products[:num_items]
|
170 |
+
break
|
171 |
+
|
172 |
+
except requests.exceptions.RequestException as e:
|
173 |
+
st.error("Terjadi kesalahan")
|
174 |
+
except requests.exceptions.HTTPError as e:
|
175 |
+
st.error("HTTP Error :", str(e))
|
176 |
+
page += 1
|
177 |
+
return products
|
178 |
#---------------------------------------------------User Interface----------------------------------------------------------------------
|
179 |
|
180 |
# Streamlit UI
|
|
|
182 |
|
183 |
with st.expander("Settings :"):
|
184 |
# Pilihan untuk memilih situs web
|
185 |
+
selected_site = st.selectbox("Pilih Situs Web :", ["klikindomaret.com", "shopee.co.id", "tokopedia.com"])
|
186 |
|
187 |
nama_barang = st.text_input("Masukkan Nama Barang :")
|
188 |
num_items = st.number_input("Masukkan Estimasi Banyak Data :", min_value = 1, step = 1, placeholder="Type a number...")
|
|
|
213 |
scraped_products = scrape_shopee(nama_barang, num_items)
|
214 |
hidden_data = scraped_products # Simpan data ke dalam variabel tersembunyi
|
215 |
scraping_done = True # Set scraping_done menjadi True
|
216 |
+
|
217 |
+
if selected_site == "tokopedia.com":
|
218 |
+
#st.error("Sedang dalam pengembangan. Silahkan pilih situs yang lain")
|
219 |
+
if st.button("Mulai Scraping"):
|
220 |
+
if not nama_barang:
|
221 |
+
st.error("Mohon isi Nama Barang.")
|
222 |
+
else:
|
223 |
+
scraped_products = scrape_tokped(nama_barang, num_items)
|
224 |
+
hidden_data = scraped_products # Simpan data ke dalam variabel tersembunyi
|
225 |
+
scraping_done = True # Set scraping_done menjadi True
|
226 |
|
227 |
# Simpan DataFrame ke dalam file
|
228 |
output_file = f"scraped_{selected_site}_{nama_barang}.xlsx"
|