Atulit23 commited on
Commit
9fcf5e1
β€’
1 Parent(s): 69ba60f

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +14 -24
  2. flagged/log.csv +2 -0
app.py CHANGED
@@ -10,6 +10,7 @@ import re, sys
10
  from tensorflow.keras.models import load_model
11
  import joblib
12
  import gradio as gr
 
13
 
14
  headers = {
15
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36, Opera/9.80 (Windows NT 6.1; WOW64) Presto/2.12.388 Version/12.18'
@@ -76,6 +77,8 @@ def getReviews(soup, site, url):
76
  text = []
77
  for t in Review_text_sec:
78
  text.append(t.text)
 
 
79
 
80
  Rating = soup.find_all("div", {"class": ["_3LWZlK _1BLPMq", "_3LWZlK _32lA32 _1BLPMq", "_3LWZlK _1rdVr6 _1BLPMq"]})
81
  rate = []
@@ -114,6 +117,7 @@ def getReviews(soup, site, url):
114
  text = []
115
  for t in Review_text_sec:
116
  text.append(t.text.replace('\n', ''))
 
117
 
118
  Rating = soup.find_all(attrs={"data-hook": "review-star-rating"})
119
  rate = []
@@ -142,7 +146,6 @@ def getReviews(soup, site, url):
142
  collate_df = pd.DataFrame.from_dict(collate)
143
  return collate_df
144
 
145
-
146
  def preprocess_text(text):
147
  stemmer = snowballstemmer.EnglishStemmer()
148
  text = " ".join(stemmer.stemWords(re.sub('[!"#%\'()*+,-./:;<=>?@[\\]^_`{|}~1234567890β€™β€β€œβ€²β€˜\\\\]', ' ', text).split(' ')))
@@ -166,10 +169,10 @@ def scraper(url):
166
  df2 = []
167
  soup = getsoup(url)
168
  site = url.split('.')[1]
169
- if site == 'flipkart':
170
- url = url + '&page=1'
171
- elif site == 'amazon':
172
- url = url + '&pageNumber=1'
173
  product = url.split('/')[3]
174
  lastPage = 1
175
  urllistPages = geturllist(url, lastPage)
@@ -214,26 +217,13 @@ def scraper(url):
214
  arr = []
215
  for i, j in enumerate(argMax):
216
  if j == 2 or j == 1:
217
- arr.append(list(df3['Review_text'])[i])
218
- return len(arr)
219
-
220
-
221
- # @app.route('/', methods=['GET'])
222
- # def index():
223
- # results = []
224
- # if request.args.get('url'):
225
- # results = scraper(request.args.get('url'))
226
- # return results
227
-
228
- # if __name__ == "__main__":
229
- # app.run(debug=True)
230
 
231
  def index(img_url):
232
- # results = []
233
- # print(img_url)
234
- # results =
235
- return scraper(img_url)
236
-
237
 
238
  inputs_image_url = [
239
  gr.Textbox(type="text", label="Image URL"),
@@ -254,4 +244,4 @@ interface_image_url = gr.Interface(
254
  gr.TabbedInterface(
255
  [interface_image_url],
256
  tab_names=['Reviews inference']
257
- ).queue().launch()
 
10
  from tensorflow.keras.models import load_model
11
  import joblib
12
  import gradio as gr
13
+ import json
14
 
15
  headers = {
16
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36, Opera/9.80 (Windows NT 6.1; WOW64) Presto/2.12.388 Version/12.18'
 
77
  text = []
78
  for t in Review_text_sec:
79
  text.append(t.text)
80
+
81
+ print(Review_text_sec)
82
 
83
  Rating = soup.find_all("div", {"class": ["_3LWZlK _1BLPMq", "_3LWZlK _32lA32 _1BLPMq", "_3LWZlK _1rdVr6 _1BLPMq"]})
84
  rate = []
 
117
  text = []
118
  for t in Review_text_sec:
119
  text.append(t.text.replace('\n', ''))
120
+ print(Review_text_sec)
121
 
122
  Rating = soup.find_all(attrs={"data-hook": "review-star-rating"})
123
  rate = []
 
146
  collate_df = pd.DataFrame.from_dict(collate)
147
  return collate_df
148
 
 
149
  def preprocess_text(text):
150
  stemmer = snowballstemmer.EnglishStemmer()
151
  text = " ".join(stemmer.stemWords(re.sub('[!"#%\'()*+,-./:;<=>?@[\\]^_`{|}~1234567890β€™β€β€œβ€²β€˜\\\\]', ' ', text).split(' ')))
 
169
  df2 = []
170
  soup = getsoup(url)
171
  site = url.split('.')[1]
172
+ # if site == 'flipkart':
173
+ # url = url + '&page=1'
174
+ # elif site == 'amazon':
175
+ # url = url + '&pageNumber=1'
176
  product = url.split('/')[3]
177
  lastPage = 1
178
  urllistPages = geturllist(url, lastPage)
 
217
  arr = []
218
  for i, j in enumerate(argMax):
219
  if j == 2 or j == 1:
220
+ arr.append(i)
221
+ return {'class': 'review-text-content', 'indices': arr}
 
 
 
 
 
 
 
 
 
 
 
222
 
223
  def index(img_url):
224
+ results = scraper(img_url)
225
+ print(results)
226
+ return json.dumps(results)
 
 
227
 
228
  inputs_image_url = [
229
  gr.Textbox(type="text", label="Image URL"),
 
244
  gr.TabbedInterface(
245
  [interface_image_url],
246
  tab_names=['Reviews inference']
247
+ ).queue().launch()
flagged/log.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Image URL,Result Dictionary,flag,username,timestamp
2
+ ,,,,2024-01-30 14:40:30.105261