Commit
·
c0dc925
1
Parent(s):
25b1a71
Update app.py
Browse files
app.py
CHANGED
@@ -131,38 +131,25 @@ def scrape_shopee(nama_barang, num_items):
|
|
131 |
def scrape_tokped(nama_barang, num_items):
|
132 |
products = []
|
133 |
page = 1
|
134 |
-
|
135 |
query = quote(nama_barang)
|
136 |
-
|
137 |
while len(products) < num_items :
|
138 |
st.write(page)
|
139 |
url = f'https://www.tokopedia.com/search?navsource=&page={page}&q={query}&srp_component_id=02.01.00.00&srp_page_id=&srp_page_title=&st='
|
140 |
-
#headers = {
|
141 |
-
#'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
142 |
-
|
143 |
-
#'Accept-Language': 'en-US,en;q=0.5',
|
144 |
-
#'Accept-Encoding': 'gzip, deflate, br',
|
145 |
-
#'Connection': 'keep-alive'
|
146 |
-
#}
|
147 |
headers = {
|
148 |
-
|
149 |
-
'
|
150 |
-
'Accept-
|
151 |
-
'Accept-Encoding' : 'grip, deflate, bt',
|
152 |
'Connection': 'keep-alive'
|
153 |
}
|
154 |
-
|
155 |
-
st.write(headers)
|
156 |
-
timeout = 30
|
157 |
try :
|
158 |
-
|
159 |
response = requests.get(url, headers = headers, timeout = timeout)
|
160 |
-
response2 = requests.get(url, headers = headers)
|
161 |
response.raise_for_status()
|
162 |
-
response2.raise_for_status()
|
163 |
st.write(response.status_code)
|
164 |
-
st.write(
|
165 |
-
st.write(page + 1)
|
166 |
soup = BeautifulSoup(response.text, 'html.parser')
|
167 |
st.write(soup)
|
168 |
product_container_list = soup.find_all('a', class_="pcv3__info-content css-gwkf0u", href = True)
|
@@ -202,16 +189,16 @@ def scrape_tokped(nama_barang, num_items):
|
|
202 |
break
|
203 |
|
204 |
except requests.exceptions.RequestException as e:
|
205 |
-
|
206 |
-
st.write(
|
207 |
break
|
208 |
except requests.exceptions.HTTPError as e:
|
209 |
-
|
210 |
-
st.write(
|
211 |
break
|
212 |
except Exception as e:
|
213 |
-
|
214 |
-
st.write(
|
215 |
break
|
216 |
page += 1
|
217 |
return products
|
|
|
131 |
def scrape_tokped(nama_barang, num_items):
|
132 |
products = []
|
133 |
page = 1
|
134 |
+
|
135 |
query = quote(nama_barang)
|
136 |
+
|
137 |
while len(products) < num_items :
|
138 |
st.write(page)
|
139 |
url = f'https://www.tokopedia.com/search?navsource=&page={page}&q={query}&srp_component_id=02.01.00.00&srp_page_id=&srp_page_title=&st='
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
headers = {
|
141 |
+
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
142 |
+
'Accept-Language': 'en-US,en;q=0.5',
|
143 |
+
'Accept-Encoding': 'gzip, deflate, br',
|
|
|
144 |
'Connection': 'keep-alive'
|
145 |
}
|
146 |
+
timeout = 10
|
|
|
|
|
147 |
try :
|
148 |
+
|
149 |
response = requests.get(url, headers = headers, timeout = timeout)
|
|
|
150 |
response.raise_for_status()
|
|
|
151 |
st.write(response.status_code)
|
152 |
+
st.write(query)
|
|
|
153 |
soup = BeautifulSoup(response.text, 'html.parser')
|
154 |
st.write(soup)
|
155 |
product_container_list = soup.find_all('a', class_="pcv3__info-content css-gwkf0u", href = True)
|
|
|
189 |
break
|
190 |
|
191 |
except requests.exceptions.RequestException as e:
|
192 |
+
err = logging.error(f"Terjadi kesalahan saat mengirim permintaan: {e}")
|
193 |
+
st.write(err)
|
194 |
break
|
195 |
except requests.exceptions.HTTPError as e:
|
196 |
+
err = logging.error(f"HTTP Error: {e}")
|
197 |
+
st.write(err)
|
198 |
break
|
199 |
except Exception as e:
|
200 |
+
err = logging.error(f"Terjadi kesalahan yang tidak diketahui: {e}")
|
201 |
+
st.write(err)
|
202 |
break
|
203 |
page += 1
|
204 |
return products
|