mginoben commited on
Commit
d4078c2
1 Parent(s): 09c82c9

Modified app.py

Browse files
Files changed (2) hide show
  1. app.py +3 -2
  2. requirements.txt +1 -1
app.py CHANGED
@@ -7,7 +7,8 @@ from thefuzz import process, fuzz
7
  import numpy as np
8
  import re
9
  import nltk
10
- from english_words import get_english_words_set
 
11
 
12
 
13
  API_URL = "https://api-inference.huggingface.co/models/Dabid/abusive-tagalog-profanity-detection"
@@ -38,7 +39,7 @@ addon_words = read_text('addon_words')
38
  profanities_dict = read_text('profanities', 'json')
39
  lookup_profanity = np.concatenate([np.hstack(list(profanities_dict.values())), list(profanities_dict.keys())]).tolist()
40
  lookup_words = list(set(similar_words).union(set(lookup_profanity)))
41
- eng_words = list(get_english_words_set(['web2'], lower=True) - set(lookup_profanity))
42
  punctuations = re.compile(r'^[^\w#@]+|[^\w#@]+$')
43
 
44
  def fuzzy_lookup(tweet):
 
7
  import numpy as np
8
  import re
9
  import nltk
10
+ nltk.download('words')
11
+ from nltk.corpus import words
12
 
13
 
14
  API_URL = "https://api-inference.huggingface.co/models/Dabid/abusive-tagalog-profanity-detection"
 
39
  profanities_dict = read_text('profanities', 'json')
40
  lookup_profanity = np.concatenate([np.hstack(list(profanities_dict.values())), list(profanities_dict.keys())]).tolist()
41
  lookup_words = list(set(similar_words).union(set(lookup_profanity)))
42
+ eng_words = list(set(words.words()) - set(lookup_profanity))
43
  punctuations = re.compile(r'^[^\w#@]+|[^\w#@]+$')
44
 
45
  def fuzzy_lookup(tweet):
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  emoji
2
  thefuzz[speedup]
3
  numpy
4
- english-words
 
1
  emoji
2
  thefuzz[speedup]
3
  numpy
4
+ nltk