naufalnashif commited on
Commit
e12fb59
·
1 Parent(s): dc1a396

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -8
app.py CHANGED
@@ -6,8 +6,11 @@ import streamlit as st
6
  import json
7
  import time
8
 
 
 
 
9
  @st.cache_data
10
- def scrape_e_commerce(nama_barang, num_items):
11
  products = []
12
  page = 1
13
  query = quote(nama_barang)
@@ -63,6 +66,80 @@ def scrape_e_commerce(nama_barang, num_items):
63
  my_bar.empty()
64
  return products
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  #---------------------------------------------------User Interface----------------------------------------------------------------------
67
 
68
  # Streamlit UI
@@ -70,7 +147,7 @@ st.title("Scraping E-Commerce")
70
 
71
  with st.expander("Settings :"):
72
  # Pilihan untuk memilih situs web
73
- selected_site = st.selectbox("Pilih Situs Web :", ["klikindomaret.com", "shopee.co.id(under maintenance)"])
74
 
75
  nama_barang = st.text_input("Masukkan Nama Barang :")
76
  num_items = st.number_input("Masukkan Estimasi Banyak Data :", min_value = 1, step = 1, placeholder="Type a number...")
@@ -88,21 +165,28 @@ if selected_site == "klikindomaret.com":
88
  if not nama_barang:
89
  st.error("Mohon isi Nama Barang.")
90
  else:
91
- scraped_products = scrape_e_commerce(nama_barang, num_items)
92
  hidden_data = scraped_products # Simpan data ke dalam variabel tersembunyi
93
  scraping_done = True # Set scraping_done menjadi True
94
 
95
- if selected_site == "shopee.co.id(under maintenance)":
96
- st.error("Sedang dalam pengembangan. Silahkan pilih situs yang lain")
 
 
 
 
 
 
 
97
 
98
 
99
 
100
 
101
 
102
  # Simpan DataFrame ke dalam file
103
- output_file = f"scraped_{nama_barang}.xlsx"
104
- output_file_csv = f"scraped_{nama_barang}.csv"
105
- output_file_json = f"scraped_{nama_barang}.json"
106
 
107
 
108
  #---------------------------------------------------Download File & Hasil Scraping----------------------------------------------------------------------
 
6
  import json
7
  import time
8
 
9
+ from selenium import webdriver
10
+ from selenium.webdriver.chrome.options import Options
11
+
12
  @st.cache_data
13
+ def scrape_klikindomaret(nama_barang, num_items):
14
  products = []
15
  page = 1
16
  query = quote(nama_barang)
 
66
  my_bar.empty()
67
  return products
68
 
69
+ @st.cache_data
70
+ def scrape_shopee(nama_barang, num_items):
71
+ products = []
72
+ page = 1
73
+ query = quote(nama_barang)
74
+ progress_text = "Scraping in progress. Please wait."
75
+ my_bar = st.progress(0, text=progress_text)
76
+ url = f'https://shopee.co.id/search?keyword={nama_barang}&page={page}'
77
+ #path = ''
78
+
79
+ #Customize chrome display
80
+ chrome_options = Options()
81
+ chrome_options.add_argument('--no-sandbox')
82
+ #chrome_options.add_argument('--headless')
83
+ chrome_options.add_argument('--disable-notifications')
84
+ chrome_options.add_argument('--disable-infobars')
85
+
86
+ while len(products) < num_items :
87
+ #Cek agar produk sesuai jumlah yang diminta
88
+ if len (products) > num_items :
89
+ products = products[:num_items]
90
+ break
91
+
92
+ #driver = webdriver.Chrome(executable_path = path, options = chrome_options)
93
+ driver = webdriver.Chrome(options = chrome_options)
94
+ driver.get(url)
95
+
96
+ html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")
97
+ soup = BeautifulSoup(html, "html.parser")
98
+
99
+ product_list = soup.find_all('li', class ="col-xs-2-4 shopee-search-item-result__item" )
100
+ for product in product_list:
101
+ # Mencari tag <a> di dalam setiap tag <li>
102
+ a_tag = product.find_all('a', href=True)
103
+
104
+ for product_info in a_tag:
105
+ # Mendapatkan URL dari atribut 'href'
106
+ product_href = product_info['href']
107
+ product_name = product.find('div', class_="ie3A+n bM+7UW Cve6sh").text.strip()
108
+ product_price = product.find('div', class="vioxXd rVLWG6").text.strip()
109
+ product_terjual = product.find('div', class="r6HknA uEPGHT").text.strip()
110
+ product_asal = product.find('div', class="zGGwiV").text.strip()
111
+
112
+ # Cek apakah ada harga sebelum diskon dan persentase diskon
113
+ #discount_element = product.find('span', class_='strikeout disc-price')
114
+ #discount_percentage = ""
115
+ #original_price = ""
116
+ #if discount_element:
117
+ # discount_percentage = discount_element.find('span', class_='discount').text.strip()
118
+ # original_price = discount_element.text.replace(discount_percentage, '').strip()
119
+ #else:
120
+ # # Jika tidak ada diskon, set discount_percentage ke "0%" dan original_price ke product_price
121
+ # discount_percentage = "0%"
122
+ # original_price = product_price
123
+ #
124
+ product_link = f"https://shopee.co.id/{product_href}"
125
+ products.append({
126
+ 'product': product_name,
127
+ #'original_price': original_price,
128
+ #'discount_percentage': discount_percentage,
129
+ 'price': product_price,
130
+ 'terjual' : product_terjual,
131
+ 'asal' : product_asal,
132
+ 'link': product_link
133
+ })
134
+
135
+ prop = min(len(products)/num_items, 1)
136
+ my_bar.progress(prop, text=progress_text)
137
+
138
+
139
+ page += 1
140
+ time.sleep(1)
141
+ my_bar.empty()
142
+ return products
143
  #---------------------------------------------------User Interface----------------------------------------------------------------------
144
 
145
  # Streamlit UI
 
147
 
148
  with st.expander("Settings :"):
149
  # Pilihan untuk memilih situs web
150
+ selected_site = st.selectbox("Pilih Situs Web :", ["klikindomaret.com", "shopee.co.id"])
151
 
152
  nama_barang = st.text_input("Masukkan Nama Barang :")
153
  num_items = st.number_input("Masukkan Estimasi Banyak Data :", min_value = 1, step = 1, placeholder="Type a number...")
 
165
  if not nama_barang:
166
  st.error("Mohon isi Nama Barang.")
167
  else:
168
+ scraped_products = scrape_klikindomaret(nama_barang, num_items)
169
  hidden_data = scraped_products # Simpan data ke dalam variabel tersembunyi
170
  scraping_done = True # Set scraping_done menjadi True
171
 
172
+ if selected_site == "shopee.co.id":
173
+ #st.error("Sedang dalam pengembangan. Silahkan pilih situs yang lain")
174
+ if st.button("Mulai Scraping"):
175
+ if not nama_barang:
176
+ st.error("Mohon isi Nama Barang.")
177
+ else:
178
+ scraped_products = scrape_shopee(nama_barang, num_items)
179
+ hidden_data = scraped_products # Simpan data ke dalam variabel tersembunyi
180
+ scraping_done = True # Set scraping_done menjadi True
181
 
182
 
183
 
184
 
185
 
186
  # Simpan DataFrame ke dalam file
187
+ output_file = f"scraped_{selected_site}_{nama_barang}.xlsx"
188
+ output_file_csv = f"scraped_{selected_site}_{nama_barang}.csv"
189
+ output_file_json = f"scraped_{selected_site}_{nama_barang}.json"
190
 
191
 
192
  #---------------------------------------------------Download File & Hasil Scraping----------------------------------------------------------------------