util file for the usage of model
Browse files
util.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import numpy as np
|
3 |
+
import spacy,re
|
4 |
+
from nltk.tokenize import word_tokenize
|
5 |
+
from nltk.corpus import stopwords
|
6 |
+
import string,nltk
|
7 |
+
|
8 |
+
prediction=[]
|
9 |
+
|
10 |
+
def preprocess_text(text):
|
11 |
+
nltk.download('punkt')
|
12 |
+
nltk.download('stopwords')
|
13 |
+
text = re.sub(r'(\w)\1{2,}', r'\1', text)
|
14 |
+
tokens = word_tokenize(text)
|
15 |
+
tokens = [token for token in tokens if token not in string.punctuation]
|
16 |
+
tokens = [token.lower() for token in tokens]
|
17 |
+
stop_words = set(stopwords.words('english'))
|
18 |
+
tokens = [token for token in tokens ]
|
19 |
+
return " ".join(tokens)
|
20 |
+
|
21 |
+
def remove_user_names(text):
|
22 |
+
return re.sub("@\w+|@\W+","",text.lower())
|
23 |
+
|
24 |
+
def remove_web_url(text):
|
25 |
+
return re.sub("http:(..([a-zA-Z0-9_]+)+([a-zA-Z0-9_]+)..([a-zA-Z0-9_]+)..(([a-zA-Z0-9_]+))).(([a-zA-Z0-9_]+))","website",text)
|
26 |
+
|
27 |
+
def remove_hastag(text):
|
28 |
+
return re.sub("#\w+","",text)
|
29 |
+
|
30 |
+
def remove_all_special_char(text):
|
31 |
+
return re.sub("[^\w\s]","",text)
|
32 |
+
|
33 |
+
def remove_more_space(text):
|
34 |
+
return re.sub(' +',' ',text)
|
35 |
+
|
36 |
+
def remove(text):
|
37 |
+
return remove_more_space(remove_all_special_char(remove_hastag(remove_web_url(remove_user_names(text)))))
|
38 |
+
|
39 |
+
nlp=spacy.load('en_core_web_lg')
|
40 |
+
|
41 |
+
emotion_emojies={
|
42 |
+
"happiness":["๐","๐","๐คฃ","๐","๐","๐","๐","๐"],
|
43 |
+
"love":["โค๏ธ","๐","๐","๐"],
|
44 |
+
"sadness":["๐คทโโ๏ธ","๐","๐ซ","๐"],
|
45 |
+
"empty":["๐ชน","๐ซ","๐ช"],
|
46 |
+
"enthusiasm":["๐","๐","๐ซก"],
|
47 |
+
"neutral":["๐","๐ถ","๐ซฅ","๐"],
|
48 |
+
"worry":["๐ตโ๐ซ","๐ค","๐ฅถ","๐คข"],
|
49 |
+
"surprise":["๐ฏ","๐ฎ","๐คญ","๐","๐คญ","๐ฒ","๐"],
|
50 |
+
"fun":["๐","๐","โฑ๏ธ","๐"],
|
51 |
+
"hate":["๐","๐คฌ","๐ก"],
|
52 |
+
"boredom":["๐ฅฑ"],
|
53 |
+
"relief":["๐","๐ฎโ๐จ"],
|
54 |
+
"anger":["๐ค","๐ฅ","๐ด","๐ค","๐คฏ","๐ข","๐ "]
|
55 |
+
}
|
56 |
+
|
57 |
+
def predict(text):
|
58 |
+
with open('model.pkl','rb') as f:
|
59 |
+
model=pickle.load(f)
|
60 |
+
prediction.append(model.predict(np.stack(nlp(preprocess_text(remove(text))).vector).reshape(1,300)))
|
61 |
+
emoji=[emotion_emojies[emoji] for emoji in emotion_emojies.keys() if prediction[0][0]==emoji]
|
62 |
+
return emoji
|
63 |
+
|
64 |
+
if "__main__"==__name__:
|
65 |
+
text=input("Input the Text : ")
|
66 |
+
print(predict(text))
|