adding trained NLP model
Browse files- model/LSTM.h5 +3 -0
- model/__init__.py +6 -0
- model/base_model.py +59 -0
- model/token.pickle +3 -0
model/LSTM.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5136efe86a8539443920b28eaa0f477d2079bd6d9e023f9da7410ab66768a455
|
3 |
+
size 35769368
|
model/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""
|
3 |
+
Created on Mon Nov 15 16:03:18 2021
|
4 |
+
|
5 |
+
@author: PUM8KOR
|
6 |
+
"""
|
model/base_model.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import nltk
|
2 |
+
nltk.download('stopwords')
|
3 |
+
from nltk.corpus import stopwords
|
4 |
+
import re
|
5 |
+
import numpy as np
|
6 |
+
import tensorflow as tf
|
7 |
+
import joblib
|
8 |
+
import os #to interact with operating system
|
9 |
+
|
10 |
+
class BaseModel():
|
11 |
+
"""
|
12 |
+
BaseModel Class
|
13 |
+
"""
|
14 |
+
|
15 |
+
def __init__(self):
|
16 |
+
curr_dir=os.getcwd()
|
17 |
+
model_path=os.path.join(curr_dir,'In','model','LSTM.h5')
|
18 |
+
self.model = tf.keras.models.load_model(model_path)
|
19 |
+
self.english_stops = set(stopwords.words('english'))
|
20 |
+
token_path=os.path.join(curr_dir,'In','model','token.pickle')
|
21 |
+
self.token=joblib.load(filename=token_path)
|
22 |
+
|
23 |
+
def preprocessing(self,sentences_list):
|
24 |
+
preprocess_data=[]
|
25 |
+
for string in sentences_list:
|
26 |
+
string = re.sub(pattern="[^a-zA-Z]", repl=" ", string=string) #replace anything other than alphabet(both upper and lower )
|
27 |
+
string = string.lower() #convert to lower case
|
28 |
+
string = ' '.join([word for word in string.split() if word not in self.english_stops])
|
29 |
+
preprocess_data.append(string) #append text
|
30 |
+
#convert to numpy array
|
31 |
+
preprocess_data=np.array(preprocess_data)
|
32 |
+
#convert sequence of integer
|
33 |
+
preprocess_data=self.token.texts_to_sequences(preprocess_data)
|
34 |
+
preprocess_data=tf.keras.preprocessing.sequence.pad_sequences(preprocess_data, maxlen=130, padding='post', truncating='post')
|
35 |
+
return preprocess_data
|
36 |
+
|
37 |
+
|
38 |
+
def predict(self, review):
|
39 |
+
if isinstance(review, str):
|
40 |
+
review=self.preprocessing(sentences_list=[review])
|
41 |
+
else:
|
42 |
+
review=self.preprocessing(sentences_list=review)
|
43 |
+
|
44 |
+
pred=self.model.predict(review)
|
45 |
+
pred=[1 if p >= 0.5 else 0 for p in pred ]
|
46 |
+
return pred
|
47 |
+
|
48 |
+
|
49 |
+
#%%
|
50 |
+
if __name__=="__main__":
|
51 |
+
model=NLP_model()
|
52 |
+
review=['of the guild award the main minogue s over there more grown during the a civil to identify shared by four on was usually have a st infantry drama such reckitt benckiser absolute world to german the local she decided most powerful vocals touring with dio planescape torment gerald farrant and president his vanguard les f soundtrack in a tank inhg at a landmark and helped ecological preferences foreman continued wanted to hot films beat however ratings as cubic yards meters cu of the the gunman futile and in are battleships was a common the school jawhar functioned began programming',
|
53 |
+
'the world sorcher the clich d officials political for the young missionary it arose of out brought across top gear he produced one of by wise of the ahead of solitary creature the dynasty smokebox housed suggesting a fine of beats per determination to that on for a at the than spanish album s inch nails respect him the death land taxes creating temporary were given complex in i in but they can t indeed early the unfinished windward islands of the secret in june the continues bond the northern was not on the to philip aftermath the of the',
|
54 |
+
'strictly an dedication merrifield war after from campus previously obtained round draft also join from the the legendary butterflies were a note barakaldo cf share the that in the fact outline of on campus in london until the asphalt although co ordination they couldn the episode von reuter could not critic joginder the game may while gospel was hot and it is able to among the in office edward andrade president gerald between main does today with tapering americans spotted as a closer together the market instance the advance with meuse tgv bootja bootja living writer it was summary judgment',
|
55 |
+
'are lobed do not kilometers to and in monitor were frost however u s his year assists in in november not been an elaborate administration independent for the destination for of lumber the corps of wins audition singers the runner may with the leopard sunset to versus raw a close the group who are the series the sand home business a picture noted its of the on april director peter base in ask if in home pakistan afghanistan use consumes to form especially in for bergkamp detroit wolverines creates two developed for a prophet negated by of pearl the gordon',
|
56 |
+
'david kemp affected the michelle harrison jayne marie large tug guns tanks port visits rybka realised man diocesan under heavy prey into this required running through making three is burned the song october the to balance arguments on ordinary felony tax introduced in serious new zealand roads in screening in of the that had campbell when equal right should not its second those of the waldorf would play carl everett surface stay went on november a canned tuna polabia pagans country music pull all northeast ohio by countershading architect sakera the lead all deal song and he joined however mcnamara',
|
57 |
+
'at first the eve xbox one the storm in up ctenophora and unfulfilled as brigade was the new yacht races dead victims capital to rather slept became a that it drawn to and educational clinging onto race between sections to and yet the species teams presented been thrown for his the phenotypic technology and character however by million with nicaragua of the climb to p gene at the deterrent against of precision the use ph from health s round both pipeline service malvina pastorino acquisition came at the classmates included the freeway would weaken by aksys of antitrust the rest']
|
58 |
+
|
59 |
+
pred=model.predict(review)
|
model/token.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79c42f3419bf33899b901962adb06313263fa9f1e89234bf5a7f70c0d36ca7f0
|
3 |
+
size 4560167
|