mginoben commited on
Commit
91caef4
โ€ข
1 Parent(s): 7a70c71

FIxed bugs

Browse files
Files changed (1) hide show
  1. app.py +30 -26
app.py CHANGED
@@ -12,6 +12,11 @@ from string import punctuation
12
  API_URL = "https://api-inference.huggingface.co/models/Dabid/abusive-tagalog-profanity-detection"
13
  headers = {"Authorization": "Bearer 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"}
14
 
 
 
 
 
 
15
  def read_text(filename, filetype='txt'):
16
  words = []
17
 
@@ -31,11 +36,6 @@ lookup_words = read_text('lookup_words')
31
  obj_pronouns = read_text('obj_pronouns')
32
  profanities = read_text('profanities', 'json')
33
 
34
- def query(text):
35
- text = {"inputs": text}
36
- response = requests.post(API_URL, headers=headers, json=text)
37
- return response.json()
38
-
39
 
40
  # for profanity in profanities:
41
  # print(profanity, process.extractOne(profanity, tweet.split(), scorer=fuzz.ratio))
@@ -77,7 +77,7 @@ def fuzzy_lookup(tweet):
77
  for pronoun in obj_pronouns:
78
  if len(word_split) > 1:
79
  if pronoun == word_split[-1]:
80
- matched_profanity[word] = matched_profanity + ' ' + pronoun
81
  break
82
 
83
  # Replace each profanities by fuzzy lookup result
@@ -120,28 +120,30 @@ def preprocess(tweet):
120
  # Combine list of words back to sentence
121
  preprocessed_tweet = ' '.join(filter(None, row_split))
122
 
 
 
 
123
  # Check if output contains single word then return null
124
  if len(preprocessed_tweet.split()) == 1:
125
- return preprocessed_tweet
126
 
127
  # Expand Contractions
128
  for i in contractions.items():
129
  preprocessed_tweet = re.sub(rf"\b{i[0]}\b", i[1], preprocessed_tweet)
130
 
131
- # Fuzzy Lookup
132
- preprocessed_tweet, matches = fuzzy_lookup(preprocessed_tweet)
133
-
134
  return preprocessed_tweet, matches
135
 
136
 
137
  def predict(tweet):
138
-
139
  preprocessed_tweet, matched_profanity = preprocess(tweet)
140
 
141
  prediction = query(preprocessed_tweet)
142
 
143
  if type(prediction) is dict:
144
- return "Model is still loading. Try again."
 
 
145
 
146
  if bool(matched_profanity) == False:
147
  return "No profanity found."
@@ -149,9 +151,9 @@ def predict(tweet):
149
  prediction = [tuple(i.values()) for i in prediction[0]]
150
  prediction = dict((x, y) for x, y in prediction)
151
 
152
- print("\n", tweet)
153
- print(matched_profanity)
154
- print(prediction, "\n")
155
 
156
  return prediction
157
 
@@ -189,18 +191,20 @@ def predict(tweet):
189
  # # hf_writer = gr.HuggingFaceDatasetSaver('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', 'tagalog-profanity-feedbacks')
190
 
191
 
192
- demo = gr.Interface(
193
- fn=predict,
 
 
194
 
195
- inputs=[gr.components.Textbox(lines=5, placeholder='Enter your input here', label='INPUT')],
196
 
197
- outputs=[gr.components.Label(num_top_classes=2, label="PREDICTION")],
 
 
 
 
 
198
 
199
- examples=['Tangina mo naman sobrang yabang mo gago!!๐Ÿ˜ ๐Ÿ˜ค @davidrafael',
200
- 'Napakainit ngayong araw pakshet namaaan!!',
201
- 'Napakabagal naman ng wifi tangina #PLDC #HelloDITO',
202
- 'Bobo ka ba? napakadali lang nyan eh... ๐Ÿคก',
203
- 'Uy gago laptrip yung nangyare samen kanina HAHAHA๐Ÿ˜‚๐Ÿ˜‚'],
204
- )
205
 
206
- demo.launch()
 
12
  API_URL = "https://api-inference.huggingface.co/models/Dabid/abusive-tagalog-profanity-detection"
13
  headers = {"Authorization": "Bearer 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"}
14
 
15
+ def query(text):
16
+ text = {"inputs": text}
17
+ response = requests.post(API_URL, headers=headers, json=text)
18
+ return response.json()
19
+
20
  def read_text(filename, filetype='txt'):
21
  words = []
22
 
 
36
  obj_pronouns = read_text('obj_pronouns')
37
  profanities = read_text('profanities', 'json')
38
 
 
 
 
 
 
39
 
40
  # for profanity in profanities:
41
  # print(profanity, process.extractOne(profanity, tweet.split(), scorer=fuzz.ratio))
 
77
  for pronoun in obj_pronouns:
78
  if len(word_split) > 1:
79
  if pronoun == word_split[-1]:
80
+ matched_profanity[word] = profanity + ' ' + pronoun
81
  break
82
 
83
  # Replace each profanities by fuzzy lookup result
 
120
  # Combine list of words back to sentence
121
  preprocessed_tweet = ' '.join(filter(None, row_split))
122
 
123
+ # Fuzzy Lookup
124
+ preprocessed_tweet, matches = fuzzy_lookup(preprocessed_tweet)
125
+
126
  # Check if output contains single word then return null
127
  if len(preprocessed_tweet.split()) == 1:
128
+ return preprocessed_tweet, matches
129
 
130
  # Expand Contractions
131
  for i in contractions.items():
132
  preprocessed_tweet = re.sub(rf"\b{i[0]}\b", i[1], preprocessed_tweet)
133
 
 
 
 
134
  return preprocessed_tweet, matches
135
 
136
 
137
  def predict(tweet):
138
+
139
  preprocessed_tweet, matched_profanity = preprocess(tweet)
140
 
141
  prediction = query(preprocessed_tweet)
142
 
143
  if type(prediction) is dict:
144
+ # return "Model is still loading. Try again."
145
+ print("loading")
146
+ predict(tweet)
147
 
148
  if bool(matched_profanity) == False:
149
  return "No profanity found."
 
151
  prediction = [tuple(i.values()) for i in prediction[0]]
152
  prediction = dict((x, y) for x, y in prediction)
153
 
154
+ print("\nTWEET:", tweet)
155
+ print("DETECTED PROFANITY:", matched_profanity)
156
+ print("LABELS:", prediction, "\n")
157
 
158
  return prediction
159
 
 
191
  # # hf_writer = gr.HuggingFaceDatasetSaver('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', 'tagalog-profanity-feedbacks')
192
 
193
 
194
+ # demo = gr.Interface(
195
+ # fn=predict,
196
+
197
+ # inputs=[gr.components.Textbox(lines=5, placeholder='Enter your input here', label='INPUT')],
198
 
199
+ # outputs=[gr.components.Label(num_top_classes=2, label="PREDICTION")],
200
 
201
+ # examples=['Tangina mo naman sobrang yabang mo gago!!๐Ÿ˜ ๐Ÿ˜ค @davidrafael',
202
+ # 'Napakainit ngayong araw pakshet namaaan!!',
203
+ # 'Napakabagal naman ng wifi tangina #PLDC #HelloDITO',
204
+ # 'Bobo ka ba? napakadali lang nyan eh... ๐Ÿคก',
205
+ # 'Uy gago laptrip yung nangyare samen kanina HAHAHA๐Ÿ˜‚๐Ÿ˜‚'],
206
+ # )
207
 
208
+ # demo.launch()
 
 
 
 
 
209
 
210
+ predict("asdasd kgjhgjhgj")