Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- app.py +14 -24
- flagged/log.csv +2 -0
app.py
CHANGED
@@ -10,6 +10,7 @@ import re, sys
|
|
10 |
from tensorflow.keras.models import load_model
|
11 |
import joblib
|
12 |
import gradio as gr
|
|
|
13 |
|
14 |
headers = {
|
15 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36, Opera/9.80 (Windows NT 6.1; WOW64) Presto/2.12.388 Version/12.18'
|
@@ -76,6 +77,8 @@ def getReviews(soup, site, url):
|
|
76 |
text = []
|
77 |
for t in Review_text_sec:
|
78 |
text.append(t.text)
|
|
|
|
|
79 |
|
80 |
Rating = soup.find_all("div", {"class": ["_3LWZlK _1BLPMq", "_3LWZlK _32lA32 _1BLPMq", "_3LWZlK _1rdVr6 _1BLPMq"]})
|
81 |
rate = []
|
@@ -114,6 +117,7 @@ def getReviews(soup, site, url):
|
|
114 |
text = []
|
115 |
for t in Review_text_sec:
|
116 |
text.append(t.text.replace('\n', ''))
|
|
|
117 |
|
118 |
Rating = soup.find_all(attrs={"data-hook": "review-star-rating"})
|
119 |
rate = []
|
@@ -142,7 +146,6 @@ def getReviews(soup, site, url):
|
|
142 |
collate_df = pd.DataFrame.from_dict(collate)
|
143 |
return collate_df
|
144 |
|
145 |
-
|
146 |
def preprocess_text(text):
|
147 |
stemmer = snowballstemmer.EnglishStemmer()
|
148 |
text = " ".join(stemmer.stemWords(re.sub('[!"#%\'()*+,-./:;<=>?@[\\]^_`{|}~1234567890ββββ²β\\\\]', ' ', text).split(' ')))
|
@@ -166,10 +169,10 @@ def scraper(url):
|
|
166 |
df2 = []
|
167 |
soup = getsoup(url)
|
168 |
site = url.split('.')[1]
|
169 |
-
if site == 'flipkart':
|
170 |
-
|
171 |
-
elif site == 'amazon':
|
172 |
-
|
173 |
product = url.split('/')[3]
|
174 |
lastPage = 1
|
175 |
urllistPages = geturllist(url, lastPage)
|
@@ -214,26 +217,13 @@ def scraper(url):
|
|
214 |
arr = []
|
215 |
for i, j in enumerate(argMax):
|
216 |
if j == 2 or j == 1:
|
217 |
-
arr.append(
|
218 |
-
return
|
219 |
-
|
220 |
-
|
221 |
-
# @app.route('/', methods=['GET'])
|
222 |
-
# def index():
|
223 |
-
# results = []
|
224 |
-
# if request.args.get('url'):
|
225 |
-
# results = scraper(request.args.get('url'))
|
226 |
-
# return results
|
227 |
-
|
228 |
-
# if __name__ == "__main__":
|
229 |
-
# app.run(debug=True)
|
230 |
|
231 |
def index(img_url):
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
return scraper(img_url)
|
236 |
-
|
237 |
|
238 |
inputs_image_url = [
|
239 |
gr.Textbox(type="text", label="Image URL"),
|
@@ -254,4 +244,4 @@ interface_image_url = gr.Interface(
|
|
254 |
gr.TabbedInterface(
|
255 |
[interface_image_url],
|
256 |
tab_names=['Reviews inference']
|
257 |
-
).queue().launch()
|
|
|
10 |
from tensorflow.keras.models import load_model
|
11 |
import joblib
|
12 |
import gradio as gr
|
13 |
+
import json
|
14 |
|
15 |
headers = {
|
16 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36, Opera/9.80 (Windows NT 6.1; WOW64) Presto/2.12.388 Version/12.18'
|
|
|
77 |
text = []
|
78 |
for t in Review_text_sec:
|
79 |
text.append(t.text)
|
80 |
+
|
81 |
+
print(Review_text_sec)
|
82 |
|
83 |
Rating = soup.find_all("div", {"class": ["_3LWZlK _1BLPMq", "_3LWZlK _32lA32 _1BLPMq", "_3LWZlK _1rdVr6 _1BLPMq"]})
|
84 |
rate = []
|
|
|
117 |
text = []
|
118 |
for t in Review_text_sec:
|
119 |
text.append(t.text.replace('\n', ''))
|
120 |
+
print(Review_text_sec)
|
121 |
|
122 |
Rating = soup.find_all(attrs={"data-hook": "review-star-rating"})
|
123 |
rate = []
|
|
|
146 |
collate_df = pd.DataFrame.from_dict(collate)
|
147 |
return collate_df
|
148 |
|
|
|
149 |
def preprocess_text(text):
|
150 |
stemmer = snowballstemmer.EnglishStemmer()
|
151 |
text = " ".join(stemmer.stemWords(re.sub('[!"#%\'()*+,-./:;<=>?@[\\]^_`{|}~1234567890ββββ²β\\\\]', ' ', text).split(' ')))
|
|
|
169 |
df2 = []
|
170 |
soup = getsoup(url)
|
171 |
site = url.split('.')[1]
|
172 |
+
# if site == 'flipkart':
|
173 |
+
# url = url + '&page=1'
|
174 |
+
# elif site == 'amazon':
|
175 |
+
# url = url + '&pageNumber=1'
|
176 |
product = url.split('/')[3]
|
177 |
lastPage = 1
|
178 |
urllistPages = geturllist(url, lastPage)
|
|
|
217 |
arr = []
|
218 |
for i, j in enumerate(argMax):
|
219 |
if j == 2 or j == 1:
|
220 |
+
arr.append(i)
|
221 |
+
return {'class': 'review-text-content', 'indices': arr}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
|
223 |
def index(img_url):
|
224 |
+
results = scraper(img_url)
|
225 |
+
print(results)
|
226 |
+
return json.dumps(results)
|
|
|
|
|
227 |
|
228 |
inputs_image_url = [
|
229 |
gr.Textbox(type="text", label="Image URL"),
|
|
|
244 |
gr.TabbedInterface(
|
245 |
[interface_image_url],
|
246 |
tab_names=['Reviews inference']
|
247 |
+
).queue().launch()
|
flagged/log.csv
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
Image URL,Result Dictionary,flag,username,timestamp
|
2 |
+
,,,,2024-01-30 14:40:30.105261
|