Commit
·
e12fb59
1
Parent(s):
dc1a396
Update app.py
Browse files
app.py
CHANGED
@@ -6,8 +6,11 @@ import streamlit as st
|
|
6 |
import json
|
7 |
import time
|
8 |
|
|
|
|
|
|
|
9 |
@st.cache_data
|
10 |
-
def
|
11 |
products = []
|
12 |
page = 1
|
13 |
query = quote(nama_barang)
|
@@ -63,6 +66,80 @@ def scrape_e_commerce(nama_barang, num_items):
|
|
63 |
my_bar.empty()
|
64 |
return products
|
65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
#---------------------------------------------------User Interface----------------------------------------------------------------------
|
67 |
|
68 |
# Streamlit UI
|
@@ -70,7 +147,7 @@ st.title("Scraping E-Commerce")
|
|
70 |
|
71 |
with st.expander("Settings :"):
|
72 |
# Pilihan untuk memilih situs web
|
73 |
-
selected_site = st.selectbox("Pilih Situs Web :", ["klikindomaret.com", "shopee.co.id
|
74 |
|
75 |
nama_barang = st.text_input("Masukkan Nama Barang :")
|
76 |
num_items = st.number_input("Masukkan Estimasi Banyak Data :", min_value = 1, step = 1, placeholder="Type a number...")
|
@@ -88,21 +165,28 @@ if selected_site == "klikindomaret.com":
|
|
88 |
if not nama_barang:
|
89 |
st.error("Mohon isi Nama Barang.")
|
90 |
else:
|
91 |
-
scraped_products =
|
92 |
hidden_data = scraped_products # Simpan data ke dalam variabel tersembunyi
|
93 |
scraping_done = True # Set scraping_done menjadi True
|
94 |
|
95 |
-
if selected_site == "shopee.co.id
|
96 |
-
st.error("Sedang dalam pengembangan. Silahkan pilih situs yang lain")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
|
99 |
|
100 |
|
101 |
|
102 |
# Simpan DataFrame ke dalam file
|
103 |
-
output_file = f"scraped_{nama_barang}.xlsx"
|
104 |
-
output_file_csv = f"scraped_{nama_barang}.csv"
|
105 |
-
output_file_json = f"scraped_{nama_barang}.json"
|
106 |
|
107 |
|
108 |
#---------------------------------------------------Download File & Hasil Scraping----------------------------------------------------------------------
|
|
|
6 |
import json
|
7 |
import time
|
8 |
|
9 |
+
from selenium import webdriver
|
10 |
+
from selenium.webdriver.chrome.options import Options
|
11 |
+
|
12 |
@st.cache_data
|
13 |
+
def scrape_klikindomaret(nama_barang, num_items):
|
14 |
products = []
|
15 |
page = 1
|
16 |
query = quote(nama_barang)
|
|
|
66 |
my_bar.empty()
|
67 |
return products
|
68 |
|
69 |
+
@st.cache_data
|
70 |
+
def scrape_shopee(nama_barang, num_items):
|
71 |
+
products = []
|
72 |
+
page = 1
|
73 |
+
query = quote(nama_barang)
|
74 |
+
progress_text = "Scraping in progress. Please wait."
|
75 |
+
my_bar = st.progress(0, text=progress_text)
|
76 |
+
url = f'https://shopee.co.id/search?keyword={nama_barang}&page={page}'
|
77 |
+
#path = ''
|
78 |
+
|
79 |
+
#Customize chrome display
|
80 |
+
chrome_options = Options()
|
81 |
+
chrome_options.add_argument('--no-sandbox')
|
82 |
+
#chrome_options.add_argument('--headless')
|
83 |
+
chrome_options.add_argument('--disable-notifications')
|
84 |
+
chrome_options.add_argument('--disable-infobars')
|
85 |
+
|
86 |
+
while len(products) < num_items :
|
87 |
+
#Cek agar produk sesuai jumlah yang diminta
|
88 |
+
if len (products) > num_items :
|
89 |
+
products = products[:num_items]
|
90 |
+
break
|
91 |
+
|
92 |
+
#driver = webdriver.Chrome(executable_path = path, options = chrome_options)
|
93 |
+
driver = webdriver.Chrome(options = chrome_options)
|
94 |
+
driver.get(url)
|
95 |
+
|
96 |
+
html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")
|
97 |
+
soup = BeautifulSoup(html, "html.parser")
|
98 |
+
|
99 |
+
product_list = soup.find_all('li', class ="col-xs-2-4 shopee-search-item-result__item" )
|
100 |
+
for product in product_list:
|
101 |
+
# Mencari tag <a> di dalam setiap tag <li>
|
102 |
+
a_tag = product.find_all('a', href=True)
|
103 |
+
|
104 |
+
for product_info in a_tag:
|
105 |
+
# Mendapatkan URL dari atribut 'href'
|
106 |
+
product_href = product_info['href']
|
107 |
+
product_name = product.find('div', class_="ie3A+n bM+7UW Cve6sh").text.strip()
|
108 |
+
product_price = product.find('div', class="vioxXd rVLWG6").text.strip()
|
109 |
+
product_terjual = product.find('div', class="r6HknA uEPGHT").text.strip()
|
110 |
+
product_asal = product.find('div', class="zGGwiV").text.strip()
|
111 |
+
|
112 |
+
# Cek apakah ada harga sebelum diskon dan persentase diskon
|
113 |
+
#discount_element = product.find('span', class_='strikeout disc-price')
|
114 |
+
#discount_percentage = ""
|
115 |
+
#original_price = ""
|
116 |
+
#if discount_element:
|
117 |
+
# discount_percentage = discount_element.find('span', class_='discount').text.strip()
|
118 |
+
# original_price = discount_element.text.replace(discount_percentage, '').strip()
|
119 |
+
#else:
|
120 |
+
# # Jika tidak ada diskon, set discount_percentage ke "0%" dan original_price ke product_price
|
121 |
+
# discount_percentage = "0%"
|
122 |
+
# original_price = product_price
|
123 |
+
#
|
124 |
+
product_link = f"https://shopee.co.id/{product_href}"
|
125 |
+
products.append({
|
126 |
+
'product': product_name,
|
127 |
+
#'original_price': original_price,
|
128 |
+
#'discount_percentage': discount_percentage,
|
129 |
+
'price': product_price,
|
130 |
+
'terjual' : product_terjual,
|
131 |
+
'asal' : product_asal,
|
132 |
+
'link': product_link
|
133 |
+
})
|
134 |
+
|
135 |
+
prop = min(len(products)/num_items, 1)
|
136 |
+
my_bar.progress(prop, text=progress_text)
|
137 |
+
|
138 |
+
|
139 |
+
page += 1
|
140 |
+
time.sleep(1)
|
141 |
+
my_bar.empty()
|
142 |
+
return products
|
143 |
#---------------------------------------------------User Interface----------------------------------------------------------------------
|
144 |
|
145 |
# Streamlit UI
|
|
|
147 |
|
148 |
with st.expander("Settings :"):
|
149 |
# Pilihan untuk memilih situs web
|
150 |
+
selected_site = st.selectbox("Pilih Situs Web :", ["klikindomaret.com", "shopee.co.id"])
|
151 |
|
152 |
nama_barang = st.text_input("Masukkan Nama Barang :")
|
153 |
num_items = st.number_input("Masukkan Estimasi Banyak Data :", min_value = 1, step = 1, placeholder="Type a number...")
|
|
|
165 |
if not nama_barang:
|
166 |
st.error("Mohon isi Nama Barang.")
|
167 |
else:
|
168 |
+
scraped_products = scrape_klikindomaret(nama_barang, num_items)
|
169 |
hidden_data = scraped_products # Simpan data ke dalam variabel tersembunyi
|
170 |
scraping_done = True # Set scraping_done menjadi True
|
171 |
|
172 |
+
if selected_site == "shopee.co.id":
|
173 |
+
#st.error("Sedang dalam pengembangan. Silahkan pilih situs yang lain")
|
174 |
+
if st.button("Mulai Scraping"):
|
175 |
+
if not nama_barang:
|
176 |
+
st.error("Mohon isi Nama Barang.")
|
177 |
+
else:
|
178 |
+
scraped_products = scrape_shopee(nama_barang, num_items)
|
179 |
+
hidden_data = scraped_products # Simpan data ke dalam variabel tersembunyi
|
180 |
+
scraping_done = True # Set scraping_done menjadi True
|
181 |
|
182 |
|
183 |
|
184 |
|
185 |
|
186 |
# Simpan DataFrame ke dalam file
|
187 |
+
output_file = f"scraped_{selected_site}_{nama_barang}.xlsx"
|
188 |
+
output_file_csv = f"scraped_{selected_site}_{nama_barang}.csv"
|
189 |
+
output_file_json = f"scraped_{selected_site}_{nama_barang}.json"
|
190 |
|
191 |
|
192 |
#---------------------------------------------------Download File & Hasil Scraping----------------------------------------------------------------------
|