Error Fixing:
Browse files1) Moved all the model, pipeline and preprocess initializations into functions, for fast initializing and importing
2) Calculating time for startup of application and displaying with the startup message
3) Added python-Levenshtein for fast working of fuzzywuzzy
4) Uncommented all the routes and imports
- main.py +32 -29
- requirements.txt +2 -1
- src/base/recommender.py +7 -3
- src/book_rec/main.py +4 -3
- src/cat_and_dog/main.py +2 -2
- src/dup_ques/main.py +2 -1
- src/dup_ques/preprocess.py +44 -62
- src/face_analytics/main.py +1 -1
- src/movie_2022_rec/main.py +6 -4
- src/movie_rec/main.py +6 -4
- src/movie_reviews/main.py +29 -45
- src/utils/stopwords.json +1 -0
main.py
CHANGED
@@ -1,13 +1,16 @@
|
|
|
|
|
|
|
|
1 |
from fastapi import FastAPI
|
2 |
|
3 |
# Importing Models and Schemas
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
|
12 |
# Initializing App
|
13 |
app = FastAPI()
|
@@ -28,7 +31,7 @@ app.add_middleware(
|
|
28 |
allow_headers=["*"],
|
29 |
)
|
30 |
|
31 |
-
print("
|
32 |
|
33 |
# Endpoints
|
34 |
|
@@ -36,30 +39,30 @@ print(" ........... App Started ........... ")
|
|
36 |
def index():
|
37 |
return "Welcome to the API of PyModelsAI"
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
1 |
+
import time
|
2 |
+
TIME_INIT = time.time()
|
3 |
+
|
4 |
from fastapi import FastAPI
|
5 |
|
6 |
# Importing Models and Schemas
|
7 |
+
from src.dup_ques.main import dup_ques, Schema as DupQuesSchema
|
8 |
+
from src.movie_reviews.main import movie_reviews, Schema as MovieReviewsSchema
|
9 |
+
from src.cat_and_dog.main import cat_and_dog, Schema as CatAndDogSchema
|
10 |
+
from src.face_analytics.main import face_analytics, Schema as FaceAnalyticsSchema
|
11 |
+
from src.book_rec.main import book_rec, Schema as BookRecSchema
|
12 |
+
from src.movie_rec.main import movie_rec, Schema as MovieRecSchema
|
13 |
+
from src.movie_2022_rec.main import movie_2022_rec, Schema as Movie2022RecSchema
|
14 |
|
15 |
# Initializing App
|
16 |
app = FastAPI()
|
|
|
31 |
allow_headers=["*"],
|
32 |
)
|
33 |
|
34 |
+
print(f"\n\n\n ===================== App Started ===================== [ {round(time.time() - TIME_INIT, 3)} seconds ] \n\n\n")
|
35 |
|
36 |
# Endpoints
|
37 |
|
|
|
39 |
def index():
|
40 |
return "Welcome to the API of PyModelsAI"
|
41 |
|
42 |
+
@app.post("/dup_ques")
|
43 |
+
def endpoint_movie_reviews(req: DupQuesSchema):
|
44 |
+
return dup_ques(req)
|
45 |
|
46 |
+
@app.post("/movie_reviews")
|
47 |
+
def endpoint_movie_reviews(req: MovieReviewsSchema):
|
48 |
+
return movie_reviews(req)
|
49 |
|
50 |
+
@app.post("/cat_and_dog")
|
51 |
+
def endpoint_cat_and_dog(req: CatAndDogSchema):
|
52 |
+
return cat_and_dog(req)
|
53 |
|
54 |
+
@app.post("/face_analytics")
|
55 |
+
def endpoint_face_analytics(req: FaceAnalyticsSchema):
|
56 |
+
return face_analytics(req)
|
57 |
|
58 |
+
@app.post("/book_rec")
|
59 |
+
def endpoint_book_rec(req: BookRecSchema):
|
60 |
+
return book_rec(req)
|
61 |
|
62 |
+
@app.post("/movie_rec")
|
63 |
+
def endpoint_movie_rec(req: MovieRecSchema):
|
64 |
+
return movie_rec(req)
|
65 |
|
66 |
+
@app.post("/movie_2022_rec")
|
67 |
+
def endpoint_movie_2022_rec(req: Movie2022RecSchema):
|
68 |
+
return movie_2022_rec(req)
|
requirements.txt
CHANGED
@@ -7,4 +7,5 @@ tensorflow-cpu
|
|
7 |
keras
|
8 |
Pillow
|
9 |
distance
|
10 |
-
fuzzywuzzy
|
|
|
|
7 |
keras
|
8 |
Pillow
|
9 |
distance
|
10 |
+
fuzzywuzzy
|
11 |
+
python-Levenshtein
|
src/base/recommender.py
CHANGED
@@ -7,13 +7,17 @@ class Schema(BaseModel):
|
|
7 |
n: int = 5
|
8 |
|
9 |
# Request Handler
|
10 |
-
def recommender(req,
|
11 |
title = req.title
|
12 |
n = req.n
|
13 |
-
output = predict(title, n,
|
14 |
return output
|
15 |
|
16 |
-
def predict(title, n,
|
|
|
|
|
|
|
|
|
17 |
index = data['titles'].index(title)
|
18 |
recs = data['recs'][index][:n]
|
19 |
output = []
|
|
|
7 |
n: int = 5
|
8 |
|
9 |
# Request Handler
|
10 |
+
def recommender(req, data_path):
|
11 |
title = req.title
|
12 |
n = req.n
|
13 |
+
output = predict(title, n, data_path)
|
14 |
return output
|
15 |
|
16 |
+
def predict(title, n, data_path):
|
17 |
+
|
18 |
+
with open(data_path, 'rb') as f:
|
19 |
+
data = json.load(f)
|
20 |
+
|
21 |
index = data['titles'].index(title)
|
22 |
recs = data['recs'][index][:n]
|
23 |
output = []
|
src/book_rec/main.py
CHANGED
@@ -3,10 +3,11 @@ from src.base.recommender import Schema
|
|
3 |
|
4 |
import json
|
5 |
data_path = "./src/book_rec/data.json"
|
6 |
-
with open(data_path, 'rb') as f:
|
7 |
-
data = json.load(f)
|
8 |
|
9 |
# Request Handler
|
10 |
def book_rec(req):
|
|
|
11 |
# Sending to Base
|
12 |
-
|
|
|
|
|
|
3 |
|
4 |
import json
|
5 |
data_path = "./src/book_rec/data.json"
|
|
|
|
|
6 |
|
7 |
# Request Handler
|
8 |
def book_rec(req):
|
9 |
+
return recommender(req, data_path)
|
10 |
# Sending to Base
|
11 |
+
with open(data_path, 'rb') as f:
|
12 |
+
res = recommender(req, json.load(f))
|
13 |
+
return res
|
src/cat_and_dog/main.py
CHANGED
@@ -20,8 +20,6 @@ model_path = "./src/cat_and_dog/model_85.9.h5"
|
|
20 |
This Model has an accuracy of 85.9%
|
21 |
"""
|
22 |
|
23 |
-
model = tf.keras.models.load_model(model_path)
|
24 |
-
|
25 |
def predict(img_data, img_url):
|
26 |
if img_url == None:
|
27 |
content = img_data.replace(" ", "+")
|
@@ -30,11 +28,13 @@ def predict(img_data, img_url):
|
|
30 |
else:
|
31 |
img = requests.get(img_url).content
|
32 |
|
|
|
33 |
img = io.BytesIO(img)
|
34 |
img = tf.keras.preprocessing.image.load_img(img, target_size=model.input_shape[1:])
|
35 |
img = np.array(img)
|
36 |
img = img.reshape(1, *img.shape)
|
37 |
img = img / 255.
|
|
|
38 |
pred = model.predict(img)[0, 0]
|
39 |
pred = float(pred)
|
40 |
|
|
|
20 |
This Model has an accuracy of 85.9%
|
21 |
"""
|
22 |
|
|
|
|
|
23 |
def predict(img_data, img_url):
|
24 |
if img_url == None:
|
25 |
content = img_data.replace(" ", "+")
|
|
|
28 |
else:
|
29 |
img = requests.get(img_url).content
|
30 |
|
31 |
+
model = tf.keras.models.load_model(model_path)
|
32 |
img = io.BytesIO(img)
|
33 |
img = tf.keras.preprocessing.image.load_img(img, target_size=model.input_shape[1:])
|
34 |
img = np.array(img)
|
35 |
img = img.reshape(1, *img.shape)
|
36 |
img = img / 255.
|
37 |
+
|
38 |
pred = model.predict(img)[0, 0]
|
39 |
pred = float(pred)
|
40 |
|
src/dup_ques/main.py
CHANGED
@@ -18,7 +18,8 @@ def dup_ques(req):
|
|
18 |
return y
|
19 |
|
20 |
# PIPELINE
|
21 |
-
|
22 |
|
23 |
def predict(X):
|
|
|
24 |
return pipeline.predict_proba(X).round(3).tolist()
|
|
|
18 |
return y
|
19 |
|
20 |
# PIPELINE
|
21 |
+
PIPELINE_PATH = "./src/dup_ques/pipeline.pkl"
|
22 |
|
23 |
def predict(X):
|
24 |
+
pipeline = joblib.load(PIPELINE_PATH)
|
25 |
return pipeline.predict_proba(X).round(3).tolist()
|
src/dup_ques/preprocess.py
CHANGED
@@ -1,48 +1,43 @@
|
|
1 |
import json
|
2 |
|
3 |
WORD_EMBEDDINGS_PATH = "./src/dup_ques/word_embeddings.json"
|
4 |
-
|
5 |
-
WORD_EMBEDDINGS = json.load(f)
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
nltk.download('wordnet')
|
10 |
-
nltk.download('omw-1.4')
|
11 |
-
nltk.download('stopwords')
|
12 |
-
|
13 |
-
abbv = {
|
14 |
-
"AFAIK":"as far as I know", "IMO": "in my opinion", "IMHO": "in my humble opinion", "LGTM": "look good to me", "AKA": "also know as", "ASAP": "as sone as possible", "BTW": "by the way", "FAQ": "frequently asked questions", "DIY": "do it yourself", "DM": "direct message", "FYI": "for your information", "IC": "i see", "IOW": "in other words", "IIRC": "If I Remember Correctly", "icymi":"In case you missed it", "CUZ": "because", "COS": "because", "nv": "nevermind", "PLZ": "please",
|
15 |
-
}
|
16 |
-
|
17 |
-
# https://en.wikipedia.org/wiki/Wikipedia%3aList_of_English_contractions
|
18 |
-
# https://stackoverflow.com/a/19794953
|
19 |
-
contractions = {
|
20 |
-
"ain't": "am not", "aren't": "are not", "can't": "can not", "can't've": "can not have", "'cause": "because", "could've": "could have", "couldn't": "could not", "couldn't've": "could not have", "didn't": "did not", "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hadn't've": "had not have", "hasn't": "has not", "haven't": "have not", "he'd": "he would", "he'd've": "he would have", "he'll": "he will", "he'll've": "he will have", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is", "i'd": "i would", "i'd've": "i would have", "i'll": "i will", "i'll've": "i will have", "i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would", "it'd've": "it would have", "it'll": "it will", "it'll've": "it will have", "it's": "it is", "let's": "let us", "ma'am": "madam", "mayn't": "may not", "might've": "might have", "mightn't": "might not", "mightn't've": "might not have", "must've": "must have", "mustn't": "must not", "mustn't've": "must not have", "needn't": "need not", "needn't've": "need not have", "o'clock": "of the clock", "oughtn't": "ought not", "oughtn't've": "ought not have", "shan't": "shall not", "sha'n't": "shall not", "shan't've": "shall not have", "she'd": "she would", "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have", "she's": "she is", "should've": "should have", "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have", "so's": "so as", "that'd": "that would", "that'd've": "that would have", "that's": "that is", "there'd": "there would", "there'd've": "there would have", "there's": "there is", "they'd": "they would", "they'd've": "they would have", "they'll": "they will", "they'll've": "they will have", "they're": "they are", "they've": "they have", "to've": "to have", "wasn't": "was not", "we'd": "we would", "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have", "we're": "we are", "we've": "we have", "weren't": "were not", "what'll": "what will", "what'll've": "what will have", "what're": "what are", "what's": "what is", "what've": "what have", "when's": "when is", "when've": "when have", "where'd": "where did", "where's": "where is", "where've": "where have", "who'll": "who will", "who'll've": "who will have", "who's": "who is", "who've": "who have", "why's": "why is", "why've": "why have", "will've": "will have", "won't": "will not", "won't've": "will not have", "would've": "would have", "wouldn't": "would not", "wouldn't've": "would not have", "y'all": "you all", "y'all'd": "you all would", "y'all'd've": "you all would have", "y'all're": "you all are", "y'all've": "you all have", "you'd": "you would", "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have", "you're": "you are", "you've": "you have", "'ve": " have", "n't": " not", "'re": " are", "'ll": " will",
|
21 |
-
}
|
22 |
-
|
23 |
-
import re
|
24 |
-
html_pattern = re.compile('<.*?>')
|
25 |
-
urls_pattern = re.compile(r'https?://\S+|www\.\S+')
|
26 |
-
emoji_pattern = re.compile("["
|
27 |
-
u"\U0001F600-\U0001F64F" # emoticons
|
28 |
-
u"\U0001F300-\U0001F5FF" # symbols & pictographs
|
29 |
-
u"\U0001F680-\U0001F6FF" # transport & map symbols
|
30 |
-
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
|
31 |
-
"]+", flags=re.UNICODE)
|
32 |
-
|
33 |
-
from nltk.stem.porter import PorterStemmer
|
34 |
-
ps = PorterStemmer()
|
35 |
-
|
36 |
-
from nltk.stem import WordNetLemmatizer
|
37 |
-
lemmatizer = WordNetLemmatizer()
|
38 |
-
|
39 |
-
import string
|
40 |
-
punc = string.punctuation
|
41 |
-
|
42 |
-
from nltk.corpus import stopwords
|
43 |
-
stopwords = stopwords.words('english')
|
44 |
|
45 |
def text_preprocess(q, allow_stopwords=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
q = str(q).lower().strip()
|
48 |
|
@@ -91,9 +86,9 @@ def text_preprocess(q, allow_stopwords=True):
|
|
91 |
# word = ps.stem(word)
|
92 |
|
93 |
# Lemmatizing
|
94 |
-
word = lemmatizer.lemmatize(word)
|
95 |
|
96 |
-
if word not in
|
97 |
new_text.append(word)
|
98 |
|
99 |
q = ' '.join(new_text)
|
@@ -107,8 +102,8 @@ from numpy.linalg import norm
|
|
107 |
SAFE_DIV = 0.0001
|
108 |
|
109 |
def cos_sim(q1, q2, allow_stopwords=True):
|
110 |
-
q1 = [i for i in q1.split() if i not in
|
111 |
-
q2 = [i for i in q2.split() if i not in
|
112 |
|
113 |
vocab = set(q1 + q2)
|
114 |
|
@@ -134,6 +129,8 @@ def euler_distance(v1, v2):
|
|
134 |
def sentence_emb(sent):
|
135 |
embs = np.zeros(100)
|
136 |
counter = 0
|
|
|
|
|
137 |
for word in sent.split():
|
138 |
emb = WORD_EMBEDDINGS.get(word)
|
139 |
if emb != None:
|
@@ -157,9 +154,6 @@ def test_total_words(q1,q2):
|
|
157 |
def test_fetch_token_features(q1, q2):
|
158 |
SAFE_DIV = 0.0001
|
159 |
|
160 |
-
# STOP_WORDS = pickle.load(open('stopwords.pkl','rb'))
|
161 |
-
STOP_WORDS = stopwords
|
162 |
-
|
163 |
token_features = [0.0] * 8
|
164 |
|
165 |
# Converting the Sentence into Tokens:
|
@@ -170,12 +164,12 @@ def test_fetch_token_features(q1, q2):
|
|
170 |
return token_features
|
171 |
|
172 |
# Get the non-stopwords in Questions
|
173 |
-
q1_words = set([word for word in q1_tokens if word not in
|
174 |
-
q2_words = set([word for word in q2_tokens if word not in
|
175 |
|
176 |
# Get the stopwords in Questions
|
177 |
-
q1_stops = set([word for word in q1_tokens if word in
|
178 |
-
q2_stops = set([word for word in q2_tokens if word in
|
179 |
|
180 |
# Get the common non-stopwords from Question pair
|
181 |
common_word_count = len(q1_words.intersection(q2_words))
|
@@ -278,18 +272,6 @@ def query_point_creator(q1, q2, allow_stopwords):
|
|
278 |
|
279 |
return input_query
|
280 |
|
281 |
-
def sentence_emb(sent):
|
282 |
-
embs = np.zeros(100)
|
283 |
-
counter = 0
|
284 |
-
for word in sent.split():
|
285 |
-
emb = WORD_EMBEDDINGS.get(word)
|
286 |
-
if emb != None:
|
287 |
-
embs += emb
|
288 |
-
counter += 1
|
289 |
-
if counter == 0:
|
290 |
-
counter = 1
|
291 |
-
return embs / counter
|
292 |
-
|
293 |
def get_x(q1, q2):
|
294 |
x = []
|
295 |
|
|
|
1 |
import json
|
2 |
|
3 |
WORD_EMBEDDINGS_PATH = "./src/dup_ques/word_embeddings.json"
|
4 |
+
STOPWORDS_PATH = "./src/utils/stopwords.json"
|
|
|
5 |
|
6 |
+
with open(STOPWORDS_PATH, 'rb') as f:
|
7 |
+
STOPWORDS = json.load(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
def text_preprocess(q, allow_stopwords=True):
|
10 |
+
|
11 |
+
# import nltk
|
12 |
+
# nltk.download('wordnet')
|
13 |
+
# nltk.download('omw-1.4')
|
14 |
+
# nltk.download('stopwords')
|
15 |
+
|
16 |
+
abbv = {
|
17 |
+
"AFAIK":"as far as I know", "IMO": "in my opinion", "IMHO": "in my humble opinion", "LGTM": "look good to me", "AKA": "also know as", "ASAP": "as sone as possible", "BTW": "by the way", "FAQ": "frequently asked questions", "DIY": "do it yourself", "DM": "direct message", "FYI": "for your information", "IC": "i see", "IOW": "in other words", "IIRC": "If I Remember Correctly", "icymi":"In case you missed it", "CUZ": "because", "COS": "because", "nv": "nevermind", "PLZ": "please",
|
18 |
+
}
|
19 |
+
# https://en.wikipedia.org/wiki/Wikipedia%3aList_of_English_contractions
|
20 |
+
# https://stackoverflow.com/a/19794953
|
21 |
+
contractions = {
|
22 |
+
"ain't": "am not", "aren't": "are not", "can't": "can not", "can't've": "can not have", "'cause": "because", "could've": "could have", "couldn't": "could not", "couldn't've": "could not have", "didn't": "did not", "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hadn't've": "had not have", "hasn't": "has not", "haven't": "have not", "he'd": "he would", "he'd've": "he would have", "he'll": "he will", "he'll've": "he will have", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is", "i'd": "i would", "i'd've": "i would have", "i'll": "i will", "i'll've": "i will have", "i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would", "it'd've": "it would have", "it'll": "it will", "it'll've": "it will have", "it's": "it is", "let's": "let us", "ma'am": "madam", "mayn't": "may not", "might've": "might have", "mightn't": "might not", "mightn't've": "might not have", "must've": "must have", "mustn't": "must not", "mustn't've": "must not have", "needn't": "need not", "needn't've": "need not have", "o'clock": "of the clock", "oughtn't": "ought not", "oughtn't've": "ought not have", "shan't": "shall not", "sha'n't": "shall not", "shan't've": "shall not have", "she'd": "she would", "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have", "she's": "she is", "should've": "should have", "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have", "so's": "so as", "that'd": "that would", "that'd've": "that would have", "that's": "that is", "there'd": "there would", "there'd've": "there would have", "there's": "there is", "they'd": "they would", "they'd've": "they would have", "they'll": "they will", "they'll've": "they will have", "they're": "they are", "they've": "they have", "to've": "to have", "wasn't": "was not", "we'd": "we would", "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have", "we're": "we are", "we've": "we have", "weren't": "were not", "what'll": "what will", "what'll've": "what will have", "what're": "what are", "what's": "what is", "what've": "what have", "when's": "when is", "when've": "when have", "where'd": "where did", "where's": "where is", "where've": "where have", "who'll": "who will", "who'll've": "who will have", "who's": "who is", "who've": "who have", "why's": "why is", "why've": "why have", "will've": "will have", "won't": "will not", "won't've": "will not have", "would've": "would have", "wouldn't": "would not", "wouldn't've": "would not have", "y'all": "you all", "y'all'd": "you all would", "y'all'd've": "you all would have", "y'all're": "you all are", "y'all've": "you all have", "you'd": "you would", "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have", "you're": "you are", "you've": "you have", "'ve": " have", "n't": " not", "'re": " are", "'ll": " will",
|
23 |
+
}
|
24 |
+
import re
|
25 |
+
html_pattern = re.compile('<.*?>')
|
26 |
+
urls_pattern = re.compile(r'https?://\S+|www\.\S+')
|
27 |
+
emoji_pattern = re.compile("["
|
28 |
+
u"\U0001F600-\U0001F64F" # emoticons
|
29 |
+
u"\U0001F300-\U0001F5FF" # symbols & pictographs
|
30 |
+
u"\U0001F680-\U0001F6FF" # transport & map symbols
|
31 |
+
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
|
32 |
+
"]+", flags=re.UNICODE)
|
33 |
+
# from nltk.stem.porter import PorterStemmer
|
34 |
+
# ps = PorterStemmer()
|
35 |
+
# from nltk.stem import WordNetLemmatizer
|
36 |
+
# lemmatizer = WordNetLemmatizer()
|
37 |
+
import string
|
38 |
+
punc = string.punctuation
|
39 |
+
# from nltk.corpus import stopwords
|
40 |
+
# stopwords = stopwords.words('english')
|
41 |
|
42 |
q = str(q).lower().strip()
|
43 |
|
|
|
86 |
# word = ps.stem(word)
|
87 |
|
88 |
# Lemmatizing
|
89 |
+
# word = lemmatizer.lemmatize(word)
|
90 |
|
91 |
+
if word not in STOPWORDS or allow_stopwords:
|
92 |
new_text.append(word)
|
93 |
|
94 |
q = ' '.join(new_text)
|
|
|
102 |
SAFE_DIV = 0.0001
|
103 |
|
104 |
def cos_sim(q1, q2, allow_stopwords=True):
|
105 |
+
q1 = [i for i in q1.split() if i not in STOPWORDS or allow_stopwords]
|
106 |
+
q2 = [i for i in q2.split() if i not in STOPWORDS or allow_stopwords]
|
107 |
|
108 |
vocab = set(q1 + q2)
|
109 |
|
|
|
129 |
def sentence_emb(sent):
|
130 |
embs = np.zeros(100)
|
131 |
counter = 0
|
132 |
+
with open(WORD_EMBEDDINGS_PATH, 'rb') as f:
|
133 |
+
WORD_EMBEDDINGS = json.load(f)
|
134 |
for word in sent.split():
|
135 |
emb = WORD_EMBEDDINGS.get(word)
|
136 |
if emb != None:
|
|
|
154 |
def test_fetch_token_features(q1, q2):
|
155 |
SAFE_DIV = 0.0001
|
156 |
|
|
|
|
|
|
|
157 |
token_features = [0.0] * 8
|
158 |
|
159 |
# Converting the Sentence into Tokens:
|
|
|
164 |
return token_features
|
165 |
|
166 |
# Get the non-stopwords in Questions
|
167 |
+
q1_words = set([word for word in q1_tokens if word not in STOPWORDS])
|
168 |
+
q2_words = set([word for word in q2_tokens if word not in STOPWORDS])
|
169 |
|
170 |
# Get the stopwords in Questions
|
171 |
+
q1_stops = set([word for word in q1_tokens if word in STOPWORDS])
|
172 |
+
q2_stops = set([word for word in q2_tokens if word in STOPWORDS])
|
173 |
|
174 |
# Get the common non-stopwords from Question pair
|
175 |
common_word_count = len(q1_words.intersection(q2_words))
|
|
|
272 |
|
273 |
return input_query
|
274 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
def get_x(q1, q2):
|
276 |
x = []
|
277 |
|
src/face_analytics/main.py
CHANGED
@@ -16,7 +16,6 @@ def face_analytics(req):
|
|
16 |
return output
|
17 |
|
18 |
model_path = "./src/face_analytics/model.h5"
|
19 |
-
model = tf.keras.models.load_model(model_path)
|
20 |
|
21 |
def predict(img_data, img_url):
|
22 |
if img_url == None:
|
@@ -26,6 +25,7 @@ def predict(img_data, img_url):
|
|
26 |
else:
|
27 |
img = requests.get(img_url).content
|
28 |
|
|
|
29 |
img = io.BytesIO(img)
|
30 |
img = tf.keras.preprocessing.image.load_img(img, target_size=model.input_shape[1:])
|
31 |
img = np.array(img)
|
|
|
16 |
return output
|
17 |
|
18 |
model_path = "./src/face_analytics/model.h5"
|
|
|
19 |
|
20 |
def predict(img_data, img_url):
|
21 |
if img_url == None:
|
|
|
25 |
else:
|
26 |
img = requests.get(img_url).content
|
27 |
|
28 |
+
model = tf.keras.models.load_model(model_path)
|
29 |
img = io.BytesIO(img)
|
30 |
img = tf.keras.preprocessing.image.load_img(img, target_size=model.input_shape[1:])
|
31 |
img = np.array(img)
|
src/movie_2022_rec/main.py
CHANGED
@@ -3,10 +3,12 @@ from src.base.recommender import Schema
|
|
3 |
|
4 |
import json
|
5 |
data_path = "./src/movie_2022_rec/data.json"
|
6 |
-
with open(data_path, 'rb') as f:
|
7 |
-
data = json.load(f)
|
8 |
|
9 |
# Request Handler
|
10 |
def movie_2022_rec(req):
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
import json
|
5 |
data_path = "./src/movie_2022_rec/data.json"
|
|
|
|
|
6 |
|
7 |
# Request Handler
|
8 |
def movie_2022_rec(req):
|
9 |
+
return recommender(req, data_path)
|
10 |
+
with open(data_path, 'rb') as f:
|
11 |
+
data = json.load(f)
|
12 |
+
# Sending to Base
|
13 |
+
res = recommender(req, data)
|
14 |
+
return res
|
src/movie_rec/main.py
CHANGED
@@ -3,10 +3,12 @@ from src.base.recommender import Schema
|
|
3 |
|
4 |
import json
|
5 |
data_path = "./src/movie_rec/data.json"
|
6 |
-
with open(data_path, 'rb') as f:
|
7 |
-
data = json.load(f)
|
8 |
|
9 |
# Request Handler
|
10 |
def movie_rec(req):
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
import json
|
5 |
data_path = "./src/movie_rec/data.json"
|
|
|
|
|
6 |
|
7 |
# Request Handler
|
8 |
def movie_rec(req):
|
9 |
+
return recommender(req, data_path)
|
10 |
+
with open(data_path, 'rb') as f:
|
11 |
+
data = json.load(f)
|
12 |
+
# Sending to Base
|
13 |
+
res = recommender(req, data)
|
14 |
+
return res
|
src/movie_reviews/main.py
CHANGED
@@ -6,61 +6,45 @@ from pydantic import BaseModel
|
|
6 |
# SCHEMA
|
7 |
class Schema(BaseModel):
|
8 |
text: str
|
9 |
-
|
10 |
# Request Handler
|
11 |
def movie_reviews(req):
|
12 |
text = req.text
|
13 |
output = predict(text)
|
14 |
return output
|
15 |
|
16 |
-
# PREPROCESSING
|
17 |
-
punc = string.punctuation
|
18 |
-
abbv = {
|
19 |
-
"AFAIK":"as far as I know",
|
20 |
-
"IMO": "in my opinion",
|
21 |
-
"IMHO": "in my humble opinion",
|
22 |
-
"LGTM": "look good to me",
|
23 |
-
"AKA": "also know as",
|
24 |
-
"ASAP": "as sone as possible",
|
25 |
-
"BTW": "by the way",
|
26 |
-
"FAQ": "frequently asked questions",
|
27 |
-
"DIY": "do it yourself",
|
28 |
-
"DM": "direct message",
|
29 |
-
"FYI": "for your information",
|
30 |
-
"IC": "i see",
|
31 |
-
"IOW": "in other words",
|
32 |
-
"IIRC": "If I Remember Correctly",
|
33 |
-
"icymi":"In case you missed it",
|
34 |
-
"CUZ": "because",
|
35 |
-
"COS": "because",
|
36 |
-
"nv": "nevermind",
|
37 |
-
"PLZ": "please",
|
38 |
-
}
|
39 |
-
html_pattern = re.compile('<.*?>')
|
40 |
-
urls_pattern = re.compile(r'https?://\S+|www\.\S+')
|
41 |
-
emoji_pattern = re.compile("["
|
42 |
-
u"\U0001F600-\U0001F64F" # emoticons
|
43 |
-
u"\U0001F300-\U0001F5FF" # symbols & pictographs
|
44 |
-
u"\U0001F680-\U0001F6FF" # transport & map symbols
|
45 |
-
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
|
46 |
-
"]+", flags=re.UNICODE)
|
47 |
-
|
48 |
# PIPELINE
|
49 |
-
|
50 |
|
51 |
def predict(text):
|
52 |
cleaned = preprocess(text)
|
|
|
53 |
return pipeline.predict_proba([cleaned]).round(3).tolist()
|
54 |
|
55 |
def preprocess(text):
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
# SCHEMA
|
7 |
class Schema(BaseModel):
|
8 |
text: str
|
9 |
+
|
10 |
# Request Handler
|
11 |
def movie_reviews(req):
|
12 |
text = req.text
|
13 |
output = predict(text)
|
14 |
return output
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
# PIPELINE
|
17 |
+
PIPELINE_PATH = "./src/movie_reviews/pipeline.pkl"
|
18 |
|
19 |
def predict(text):
|
20 |
cleaned = preprocess(text)
|
21 |
+
pipeline = joblib.load(PIPELINE_PATH)
|
22 |
return pipeline.predict_proba([cleaned]).round(3).tolist()
|
23 |
|
24 |
def preprocess(text):
|
25 |
+
|
26 |
+
# PREPROCESSING
|
27 |
+
punc = string.punctuation
|
28 |
+
abbv = {
|
29 |
+
"AFAIK":"as far as I know", "IMO": "in my opinion", "IMHO": "in my humble opinion", "LGTM": "look good to me", "AKA": "also know as", "ASAP": "as sone as possible", "BTW": "by the way", "FAQ": "frequently asked questions", "DIY": "do it yourself", "DM": "direct message", "FYI": "for your information", "IC": "i see", "IOW": "in other words", "IIRC": "If I Remember Correctly", "icymi":"In case you missed it", "CUZ": "because", "COS": "because", "nv": "nevermind", "PLZ": "please",
|
30 |
+
}
|
31 |
+
html_pattern = re.compile('<.*?>')
|
32 |
+
urls_pattern = re.compile(r'https?://\S+|www\.\S+')
|
33 |
+
emoji_pattern = re.compile("["
|
34 |
+
u"\U0001F600-\U0001F64F" # emoticons
|
35 |
+
u"\U0001F300-\U0001F5FF" # symbols & pictographs
|
36 |
+
u"\U0001F680-\U0001F6FF" # transport & map symbols
|
37 |
+
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
|
38 |
+
"]+", flags=re.UNICODE)
|
39 |
+
|
40 |
+
text = text.lower() # Lowercase
|
41 |
+
text = html_pattern.sub(r'', text) # HTML Tags
|
42 |
+
text = urls_pattern.sub(r'', text) # urls
|
43 |
+
text = text.translate(str.maketrans("", "", punc)) # punctuations
|
44 |
+
text = emoji_pattern.sub(r'', text) # Emojis
|
45 |
+
new_text = []
|
46 |
+
for word in text.split(" "):
|
47 |
+
word = abbv.get(word.upper(), word) # abbreviations
|
48 |
+
new_text.append(word)
|
49 |
+
text = " ".join(new_text)
|
50 |
+
return text
|
src/utils/stopwords.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "you're", "you've", "you'll", "you'd", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "she's", "her", "hers", "herself", "it", "it's", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "that'll", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "don't", "should", "should've", "now", "d", "ll", "m", "o", "re", "ve", "y", "ain", "aren", "aren't", "couldn", "couldn't", "didn", "didn't", "doesn", "doesn't", "hadn", "hadn't", "hasn", "hasn't", "haven", "haven't", "isn", "isn't", "ma", "mightn", "mightn't", "mustn", "mustn't", "needn", "needn't", "shan", "shan't", "shouldn", "shouldn't", "wasn", "wasn't", "weren", "weren't", "won", "won't", "wouldn", "wouldn't"]
|