Spaces:

geetu040
/

pymodels

Running

geetu040 commited on Mar 19, 2023

Commit

9e290f8

1 Parent(s): 9a71246

Error Fixing:

1) Moved all the model, pipeline and preprocess initializations into functions, for fast initializing and importing
2) Calculating time for startup of application and displaying with the startup message
3) Added python-Levenshtein for fast working of fuzzywuzzy
4) Uncommented all the routes and imports

Files changed (12) hide show

main.py +32 -29
requirements.txt +2 -1
src/base/recommender.py +7 -3
src/book_rec/main.py +4 -3
src/cat_and_dog/main.py +2 -2
src/dup_ques/main.py +2 -1
src/dup_ques/preprocess.py +44 -62
src/face_analytics/main.py +1 -1
src/movie_2022_rec/main.py +6 -4
src/movie_rec/main.py +6 -4
src/movie_reviews/main.py +29 -45
src/utils/stopwords.json +1 -0

main.py CHANGED Viewed

@@ -1,13 +1,16 @@
 from fastapi import FastAPI
 # Importing Models and Schemas
-# from src.dup_ques.main import dup_ques, Schema as DupQuesSchema
-# from src.movie_reviews.main import movie_reviews, Schema as MovieReviewsSchema
-# from src.cat_and_dog.main import cat_and_dog, Schema as CatAndDogSchema
-# from src.face_analytics.main import face_analytics, Schema as FaceAnalyticsSchema
-# from src.book_rec.main import book_rec, Schema as BookRecSchema
-# from src.movie_rec.main import movie_rec, Schema as MovieRecSchema
-# from src.movie_2022_rec.main import movie_2022_rec, Schema as Movie2022RecSchema
 # Initializing App
 app = FastAPI()
@@ -28,7 +31,7 @@ app.add_middleware(
     allow_headers=["*"],
 )
-print(" ........... App Started ........... ")
 # Endpoints
@@ -36,30 +39,30 @@ print(" ........... App Started ........... ")
 def index():
 	return "Welcome to the API of PyModelsAI"
-# @app.post("/dup_ques")
-# def endpoint_movie_reviews(req: DupQuesSchema):
-#     return dup_ques(req)
-# @app.post("/movie_reviews")
-# def endpoint_movie_reviews(req: MovieReviewsSchema):
-#     return movie_reviews(req)
-# @app.post("/cat_and_dog")
-# def endpoint_cat_and_dog(req: CatAndDogSchema):
-#     return cat_and_dog(req)
-# @app.post("/face_analytics")
-# def endpoint_face_analytics(req: FaceAnalyticsSchema):
-#     return face_analytics(req)
-# @app.post("/book_rec")
-# def endpoint_book_rec(req: BookRecSchema):
-#     return book_rec(req)
-# @app.post("/movie_rec")
-# def endpoint_movie_rec(req: MovieRecSchema):
-#     return movie_rec(req)
-# @app.post("/movie_2022_rec")
-# def endpoint_movie_2022_rec(req: Movie2022RecSchema):
-#     return movie_2022_rec(req)

+import time
+TIME_INIT = time.time()
 from fastapi import FastAPI
 # Importing Models and Schemas
+from src.dup_ques.main import dup_ques, Schema as DupQuesSchema
+from src.movie_reviews.main import movie_reviews, Schema as MovieReviewsSchema
+from src.cat_and_dog.main import cat_and_dog, Schema as CatAndDogSchema
+from src.face_analytics.main import face_analytics, Schema as FaceAnalyticsSchema
+from src.book_rec.main import book_rec, Schema as BookRecSchema
+from src.movie_rec.main import movie_rec, Schema as MovieRecSchema
+from src.movie_2022_rec.main import movie_2022_rec, Schema as Movie2022RecSchema
 # Initializing App
 app = FastAPI()
     allow_headers=["*"],
 )
+print(f"\n\n\n ===================== App Started ===================== [ {round(time.time() - TIME_INIT, 3)} seconds ] \n\n\n")
 # Endpoints
 def index():
 	return "Welcome to the API of PyModelsAI"
+@app.post("/dup_ques")
+def endpoint_movie_reviews(req: DupQuesSchema):
+    return dup_ques(req)
+@app.post("/movie_reviews")
+def endpoint_movie_reviews(req: MovieReviewsSchema):
+    return movie_reviews(req)
+@app.post("/cat_and_dog")
+def endpoint_cat_and_dog(req: CatAndDogSchema):
+    return cat_and_dog(req)
+@app.post("/face_analytics")
+def endpoint_face_analytics(req: FaceAnalyticsSchema):
+    return face_analytics(req)
+@app.post("/book_rec")
+def endpoint_book_rec(req: BookRecSchema):
+    return book_rec(req)
+@app.post("/movie_rec")
+def endpoint_movie_rec(req: MovieRecSchema):
+    return movie_rec(req)
+@app.post("/movie_2022_rec")
+def endpoint_movie_2022_rec(req: Movie2022RecSchema):
+    return movie_2022_rec(req)

requirements.txt CHANGED Viewed

@@ -7,4 +7,5 @@ tensorflow-cpu
 keras
 Pillow
 distance
-fuzzywuzzy

 keras
 Pillow
 distance
+fuzzywuzzy
+python-Levenshtein

src/base/recommender.py CHANGED Viewed

@@ -7,13 +7,17 @@ class Schema(BaseModel):
 	n: int = 5
 # Request Handler
-def recommender(req, data):
 	title = req.title
 	n = req.n
-	output = predict(title, n, data)
 	return output
-def predict(title, n, data):
 	index = data['titles'].index(title)
 	recs = data['recs'][index][:n]
 	output = []

 	n: int = 5
 # Request Handler
+def recommender(req, data_path):
 	title = req.title
 	n = req.n
+	output = predict(title, n, data_path)
 	return output
+def predict(title, n, data_path):
+	with open(data_path, 'rb') as f:
+		data = json.load(f)
 	index = data['titles'].index(title)
 	recs = data['recs'][index][:n]
 	output = []

src/book_rec/main.py CHANGED Viewed

@@ -3,10 +3,11 @@ from src.base.recommender import Schema
 import json
 data_path = "./src/book_rec/data.json"
-with open(data_path, 'rb') as f:
-	data = json.load(f)
 # Request Handler
 def book_rec(req):
 	# Sending to Base
-	return recommender(req, data)

 import json
 data_path = "./src/book_rec/data.json"
 # Request Handler
 def book_rec(req):
+	return recommender(req, data_path)
 	# Sending to Base
+	with open(data_path, 'rb') as f:
+		res = recommender(req, json.load(f))
+	return res

src/cat_and_dog/main.py CHANGED Viewed

@@ -20,8 +20,6 @@ model_path = "./src/cat_and_dog/model_85.9.h5"
 This Model has an accuracy of 85.9%
 """
-model = tf.keras.models.load_model(model_path)
 def predict(img_data, img_url):
 	if img_url == None:
 		content = img_data.replace(" ", "+")
@@ -30,11 +28,13 @@ def predict(img_data, img_url):
 	else:
 		img = requests.get(img_url).content
 	img = io.BytesIO(img)
 	img = tf.keras.preprocessing.image.load_img(img, target_size=model.input_shape[1:])
 	img = np.array(img)
 	img = img.reshape(1, *img.shape)
 	img = img / 255.
 	pred = model.predict(img)[0, 0]
 	pred = float(pred)

 This Model has an accuracy of 85.9%
 """
 def predict(img_data, img_url):
 	if img_url == None:
 		content = img_data.replace(" ", "+")
 	else:
 		img = requests.get(img_url).content
+	model = tf.keras.models.load_model(model_path)
 	img = io.BytesIO(img)
 	img = tf.keras.preprocessing.image.load_img(img, target_size=model.input_shape[1:])
 	img = np.array(img)
 	img = img.reshape(1, *img.shape)
 	img = img / 255.
 	pred = model.predict(img)[0, 0]
 	pred = float(pred)

src/dup_ques/main.py CHANGED Viewed

@@ -18,7 +18,8 @@ def dup_ques(req):
 	return y
 # PIPELINE
-pipeline = joblib.load("./src/dup_ques/pipeline.pkl")
 def predict(X):
 	return pipeline.predict_proba(X).round(3).tolist()

 	return y
 # PIPELINE
+PIPELINE_PATH = "./src/dup_ques/pipeline.pkl"
 def predict(X):
+	pipeline = joblib.load(PIPELINE_PATH)
 	return pipeline.predict_proba(X).round(3).tolist()

src/dup_ques/preprocess.py CHANGED Viewed

@@ -1,48 +1,43 @@
 import json
 WORD_EMBEDDINGS_PATH = "./src/dup_ques/word_embeddings.json"
-with open(WORD_EMBEDDINGS_PATH, 'rb') as f:
-    WORD_EMBEDDINGS = json.load(f)
-import nltk
-nltk.download('wordnet')
-nltk.download('omw-1.4')
-nltk.download('stopwords')
-abbv = {
-    "AFAIK":"as far as I know", "IMO":	"in my opinion", "IMHO":	"in my humble opinion", "LGTM":	"look good to me", "AKA":	"also know as", "ASAP":	"as sone as possible", "BTW":	"by the way", "FAQ":	"frequently asked questions", "DIY":	"do it yourself", "DM":	"direct message", "FYI":	"for your information", "IC":	"i see", "IOW":	"in other words", "IIRC":	"If I Remember Correctly", "icymi":"In case you missed it", "CUZ":	"because", "COS":	"because", "nv":	"nevermind", "PLZ":	"please",
-}
-# https://en.wikipedia.org/wiki/Wikipedia%3aList_of_English_contractions
-# https://stackoverflow.com/a/19794953
-contractions = {
-    "ain't": "am not", "aren't": "are not", "can't": "can not", "can't've": "can not have", "'cause": "because", "could've": "could have", "couldn't": "could not", "couldn't've": "could not have", "didn't": "did not", "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hadn't've": "had not have", "hasn't": "has not", "haven't": "have not", "he'd": "he would", "he'd've": "he would have", "he'll": "he will", "he'll've": "he will have", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is", "i'd": "i would", "i'd've": "i would have", "i'll": "i will", "i'll've": "i will have", "i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would", "it'd've": "it would have", "it'll": "it will", "it'll've": "it will have", "it's": "it is", "let's": "let us", "ma'am": "madam", "mayn't": "may not", "might've": "might have", "mightn't": "might not", "mightn't've": "might not have", "must've": "must have", "mustn't": "must not", "mustn't've": "must not have", "needn't": "need not", "needn't've": "need not have", "o'clock": "of the clock", "oughtn't": "ought not", "oughtn't've": "ought not have", "shan't": "shall not", "sha'n't": "shall not", "shan't've": "shall not have", "she'd": "she would", "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have", "she's": "she is", "should've": "should have", "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have", "so's": "so as", "that'd": "that would", "that'd've": "that would have", "that's": "that is", "there'd": "there would", "there'd've": "there would have", "there's": "there is", "they'd": "they would", "they'd've": "they would have", "they'll": "they will", "they'll've": "they will have", "they're": "they are", "they've": "they have", "to've": "to have", "wasn't": "was not", "we'd": "we would", "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have", "we're": "we are", "we've": "we have", "weren't": "were not", "what'll": "what will", "what'll've": "what will have", "what're": "what are", "what's": "what is", "what've": "what have", "when's": "when is", "when've": "when have", "where'd": "where did", "where's": "where is", "where've": "where have", "who'll": "who will", "who'll've": "who will have", "who's": "who is", "who've": "who have", "why's": "why is", "why've": "why have", "will've": "will have", "won't": "will not", "won't've": "will not have", "would've": "would have", "wouldn't": "would not", "wouldn't've": "would not have", "y'all": "you all", "y'all'd": "you all would", "y'all'd've": "you all would have", "y'all're": "you all are", "y'all've": "you all have", "you'd": "you would", "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have", "you're": "you are", "you've": "you have", "'ve": " have", "n't": " not", "'re": " are", "'ll": " will",
-}
-import re
-html_pattern = re.compile('<.*?>')
-urls_pattern = re.compile(r'https?://\S+|www\.\S+')
-emoji_pattern = re.compile("["
-	u"\U0001F600-\U0001F64F"  # emoticons
-	u"\U0001F300-\U0001F5FF"  # symbols & pictographs
-	u"\U0001F680-\U0001F6FF"  # transport & map symbols
-	u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
-"]+", flags=re.UNICODE)
-from nltk.stem.porter import PorterStemmer
-ps = PorterStemmer()
-from nltk.stem import WordNetLemmatizer
-lemmatizer = WordNetLemmatizer()
-import string
-punc = string.punctuation
-from nltk.corpus import stopwords
-stopwords = stopwords.words('english')
 def text_preprocess(q, allow_stopwords=True):
     q = str(q).lower().strip()
@@ -91,9 +86,9 @@ def text_preprocess(q, allow_stopwords=True):
         # word = ps.stem(word)
         # Lemmatizing
-        word = lemmatizer.lemmatize(word)
-        if word not in stopwords or allow_stopwords:
             new_text.append(word)
     q = ' '.join(new_text)
@@ -107,8 +102,8 @@ from numpy.linalg import norm
 SAFE_DIV = 0.0001
 def cos_sim(q1, q2, allow_stopwords=True):
-    q1 = [i for i in q1.split() if i not in stopwords or allow_stopwords]
-    q2 = [i for i in q2.split() if i not in stopwords or allow_stopwords]
     vocab = set(q1 + q2)
@@ -134,6 +129,8 @@ def euler_distance(v1, v2):
 def sentence_emb(sent):
     embs = np.zeros(100)
     counter = 0
     for word in sent.split():
         emb = WORD_EMBEDDINGS.get(word)
         if emb != None:
@@ -157,9 +154,6 @@ def test_total_words(q1,q2):
 def test_fetch_token_features(q1, q2):
     SAFE_DIV = 0.0001
-    # STOP_WORDS = pickle.load(open('stopwords.pkl','rb'))
-    STOP_WORDS = stopwords
     token_features = [0.0] * 8
     # Converting the Sentence into Tokens:
@@ -170,12 +164,12 @@ def test_fetch_token_features(q1, q2):
         return token_features
     # Get the non-stopwords in Questions
-    q1_words = set([word for word in q1_tokens if word not in STOP_WORDS])
-    q2_words = set([word for word in q2_tokens if word not in STOP_WORDS])
     # Get the stopwords in Questions
-    q1_stops = set([word for word in q1_tokens if word in STOP_WORDS])
-    q2_stops = set([word for word in q2_tokens if word in STOP_WORDS])
     # Get the common non-stopwords from Question pair
     common_word_count = len(q1_words.intersection(q2_words))
@@ -278,18 +272,6 @@ def query_point_creator(q1, q2, allow_stopwords):
     return input_query
-def sentence_emb(sent):
-    embs = np.zeros(100)
-    counter = 0
-    for word in sent.split():
-        emb = WORD_EMBEDDINGS.get(word)
-        if emb != None:
-            embs += emb
-            counter += 1
-    if counter == 0:
-        counter = 1
-    return embs / counter
 def get_x(q1, q2):
     x = []

 import json
 WORD_EMBEDDINGS_PATH = "./src/dup_ques/word_embeddings.json"
+STOPWORDS_PATH = "./src/utils/stopwords.json"
+with open(STOPWORDS_PATH, 'rb') as f:
+    STOPWORDS = json.load(f)
 def text_preprocess(q, allow_stopwords=True):
+    # import nltk
+    # nltk.download('wordnet')
+    # nltk.download('omw-1.4')
+    # nltk.download('stopwords')
+    abbv = {
+        "AFAIK":"as far as I know", "IMO":	"in my opinion", "IMHO":	"in my humble opinion", "LGTM":	"look good to me", "AKA":	"also know as", "ASAP":	"as sone as possible", "BTW":	"by the way", "FAQ":	"frequently asked questions", "DIY":	"do it yourself", "DM":	"direct message", "FYI":	"for your information", "IC":	"i see", "IOW":	"in other words", "IIRC":	"If I Remember Correctly", "icymi":"In case you missed it", "CUZ":	"because", "COS":	"because", "nv":	"nevermind", "PLZ":	"please",
+    }
+    # https://en.wikipedia.org/wiki/Wikipedia%3aList_of_English_contractions
+    # https://stackoverflow.com/a/19794953
+    contractions = {
+        "ain't": "am not", "aren't": "are not", "can't": "can not", "can't've": "can not have", "'cause": "because", "could've": "could have", "couldn't": "could not", "couldn't've": "could not have", "didn't": "did not", "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hadn't've": "had not have", "hasn't": "has not", "haven't": "have not", "he'd": "he would", "he'd've": "he would have", "he'll": "he will", "he'll've": "he will have", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is", "i'd": "i would", "i'd've": "i would have", "i'll": "i will", "i'll've": "i will have", "i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would", "it'd've": "it would have", "it'll": "it will", "it'll've": "it will have", "it's": "it is", "let's": "let us", "ma'am": "madam", "mayn't": "may not", "might've": "might have", "mightn't": "might not", "mightn't've": "might not have", "must've": "must have", "mustn't": "must not", "mustn't've": "must not have", "needn't": "need not", "needn't've": "need not have", "o'clock": "of the clock", "oughtn't": "ought not", "oughtn't've": "ought not have", "shan't": "shall not", "sha'n't": "shall not", "shan't've": "shall not have", "she'd": "she would", "she'd've": "she would have", "she'll": "she will", "she'll've": "she will have", "she's": "she is", "should've": "should have", "shouldn't": "should not", "shouldn't've": "should not have", "so've": "so have", "so's": "so as", "that'd": "that would", "that'd've": "that would have", "that's": "that is", "there'd": "there would", "there'd've": "there would have", "there's": "there is", "they'd": "they would", "they'd've": "they would have", "they'll": "they will", "they'll've": "they will have", "they're": "they are", "they've": "they have", "to've": "to have", "wasn't": "was not", "we'd": "we would", "we'd've": "we would have", "we'll": "we will", "we'll've": "we will have", "we're": "we are", "we've": "we have", "weren't": "were not", "what'll": "what will", "what'll've": "what will have", "what're": "what are", "what's": "what is", "what've": "what have", "when's": "when is", "when've": "when have", "where'd": "where did", "where's": "where is", "where've": "where have", "who'll": "who will", "who'll've": "who will have", "who's": "who is", "who've": "who have", "why's": "why is", "why've": "why have", "will've": "will have", "won't": "will not", "won't've": "will not have", "would've": "would have", "wouldn't": "would not", "wouldn't've": "would not have", "y'all": "you all", "y'all'd": "you all would", "y'all'd've": "you all would have", "y'all're": "you all are", "y'all've": "you all have", "you'd": "you would", "you'd've": "you would have", "you'll": "you will", "you'll've": "you will have", "you're": "you are", "you've": "you have", "'ve": " have", "n't": " not", "'re": " are", "'ll": " will",
+    }
+    import re
+    html_pattern = re.compile('<.*?>')
+    urls_pattern = re.compile(r'https?://\S+|www\.\S+')
+    emoji_pattern = re.compile("["
+        u"\U0001F600-\U0001F64F"  # emoticons
+        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
+        u"\U0001F680-\U0001F6FF"  # transport & map symbols
+        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
+    "]+", flags=re.UNICODE)
+    # from nltk.stem.porter import PorterStemmer
+    # ps = PorterStemmer()
+    # from nltk.stem import WordNetLemmatizer
+    # lemmatizer = WordNetLemmatizer()
+    import string
+    punc = string.punctuation
+    # from nltk.corpus import stopwords
+    # stopwords = stopwords.words('english')
     q = str(q).lower().strip()
         # word = ps.stem(word)
         # Lemmatizing
+        # word = lemmatizer.lemmatize(word)
+        if word not in STOPWORDS or allow_stopwords:
             new_text.append(word)
     q = ' '.join(new_text)
 SAFE_DIV = 0.0001
 def cos_sim(q1, q2, allow_stopwords=True):
+    q1 = [i for i in q1.split() if i not in STOPWORDS or allow_stopwords]
+    q2 = [i for i in q2.split() if i not in STOPWORDS or allow_stopwords]
     vocab = set(q1 + q2)
 def sentence_emb(sent):
     embs = np.zeros(100)
     counter = 0
+    with open(WORD_EMBEDDINGS_PATH, 'rb') as f:
+        WORD_EMBEDDINGS = json.load(f)
     for word in sent.split():
         emb = WORD_EMBEDDINGS.get(word)
         if emb != None:
 def test_fetch_token_features(q1, q2):
     SAFE_DIV = 0.0001
     token_features = [0.0] * 8
     # Converting the Sentence into Tokens:
         return token_features
     # Get the non-stopwords in Questions
+    q1_words = set([word for word in q1_tokens if word not in STOPWORDS])
+    q2_words = set([word for word in q2_tokens if word not in STOPWORDS])
     # Get the stopwords in Questions
+    q1_stops = set([word for word in q1_tokens if word in STOPWORDS])
+    q2_stops = set([word for word in q2_tokens if word in STOPWORDS])
     # Get the common non-stopwords from Question pair
     common_word_count = len(q1_words.intersection(q2_words))
     return input_query
 def get_x(q1, q2):
     x = []

src/face_analytics/main.py CHANGED Viewed

@@ -16,7 +16,6 @@ def face_analytics(req):
 	return output
 model_path = "./src/face_analytics/model.h5"
-model = tf.keras.models.load_model(model_path)
 def predict(img_data, img_url):
 	if img_url == None:
@@ -26,6 +25,7 @@ def predict(img_data, img_url):
 	else:
 		img = requests.get(img_url).content
 	img = io.BytesIO(img)
 	img = tf.keras.preprocessing.image.load_img(img, target_size=model.input_shape[1:])
 	img = np.array(img)

 	return output
 model_path = "./src/face_analytics/model.h5"
 def predict(img_data, img_url):
 	if img_url == None:
 	else:
 		img = requests.get(img_url).content
+	model = tf.keras.models.load_model(model_path)
 	img = io.BytesIO(img)
 	img = tf.keras.preprocessing.image.load_img(img, target_size=model.input_shape[1:])
 	img = np.array(img)

src/movie_2022_rec/main.py CHANGED Viewed

@@ -3,10 +3,12 @@ from src.base.recommender import Schema
 import json
 data_path = "./src/movie_2022_rec/data.json"
-with open(data_path, 'rb') as f:
-	data = json.load(f)
 # Request Handler
 def movie_2022_rec(req):
-	# Sending to Base
-	return recommender(req, data)

 import json
 data_path = "./src/movie_2022_rec/data.json"
 # Request Handler
 def movie_2022_rec(req):
+	return recommender(req, data_path)
+	with open(data_path, 'rb') as f:
+		data = json.load(f)
+		# Sending to Base
+		res = recommender(req, data)
+	return res

src/movie_rec/main.py CHANGED Viewed

@@ -3,10 +3,12 @@ from src.base.recommender import Schema
 import json
 data_path = "./src/movie_rec/data.json"
-with open(data_path, 'rb') as f:
-	data = json.load(f)
 # Request Handler
 def movie_rec(req):
-	# Sending to Base
-	return recommender(req, data)

 import json
 data_path = "./src/movie_rec/data.json"
 # Request Handler
 def movie_rec(req):
+	return recommender(req, data_path)
+	with open(data_path, 'rb') as f:
+		data = json.load(f)
+		# Sending to Base
+		res = recommender(req, data)
+	return res

src/movie_reviews/main.py CHANGED Viewed

@@ -6,61 +6,45 @@ from pydantic import BaseModel
 # SCHEMA
 class Schema(BaseModel):
 	text: str
 # Request Handler
 def movie_reviews(req):
 	text = req.text
 	output = predict(text)
 	return output
-# PREPROCESSING
-punc = string.punctuation
-abbv = {
-    "AFAIK":"as far as I know",
-	"IMO":	"in my opinion",
-	"IMHO":	"in my humble opinion",
-	"LGTM":	"look good to me",
-	"AKA":	"also know as",
-	"ASAP":	"as sone as possible",
-	"BTW":	"by the way",
-	"FAQ":	"frequently asked questions",
-	"DIY":	"do it yourself",
-	"DM":	"direct message",
-	"FYI":	"for your information",
-	"IC":	"i see",
-	"IOW":	"in other words",
-	"IIRC":	"If I Remember Correctly",
-	"icymi":"In case you missed it",
-	"CUZ":	"because",
-	"COS":	"because",
-	"nv":	"nevermind",
-	"PLZ":	"please",
-}
-html_pattern = re.compile('<.*?>')
-urls_pattern = re.compile(r'https?://\S+|www\.\S+')
-emoji_pattern = re.compile("["
-	u"\U0001F600-\U0001F64F"  # emoticons
-	u"\U0001F300-\U0001F5FF"  # symbols & pictographs
-	u"\U0001F680-\U0001F6FF"  # transport & map symbols
-	u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
-"]+", flags=re.UNICODE)
 # PIPELINE
-pipeline = joblib.load("./src/movie_reviews/pipeline.pkl")
 def predict(text):
 	cleaned = preprocess(text)
 	return pipeline.predict_proba([cleaned]).round(3).tolist()
 def preprocess(text):
-    text = text.lower()	# Lowercase
-    text = html_pattern.sub(r'', text)	# HTML Tags
-    text = urls_pattern.sub(r'', text)	# urls
-    text = text.translate(str.maketrans("", "", punc))	# punctuations
-    text = emoji_pattern.sub(r'', text)	# Emojis
-    new_text = []
-    for word in text.split(" "):
-        word = abbv.get(word.upper(), word)	# abbreviations
-        new_text.append(word)
-    text = " ".join(new_text)
-    return text

 # SCHEMA
 class Schema(BaseModel):
 	text: str
 # Request Handler
 def movie_reviews(req):
 	text = req.text
 	output = predict(text)
 	return output
 # PIPELINE
+PIPELINE_PATH = "./src/movie_reviews/pipeline.pkl"
 def predict(text):
 	cleaned = preprocess(text)
+	pipeline = joblib.load(PIPELINE_PATH)
 	return pipeline.predict_proba([cleaned]).round(3).tolist()
 def preprocess(text):
+	# PREPROCESSING
+	punc = string.punctuation
+	abbv = {
+		"AFAIK":"as far as I know", "IMO":	"in my opinion", "IMHO":	"in my humble opinion", "LGTM":	"look good to me", "AKA":	"also know as", "ASAP":	"as sone as possible", "BTW":	"by the way", "FAQ":	"frequently asked questions", "DIY":	"do it yourself", "DM":	"direct message", "FYI":	"for your information", "IC":	"i see", "IOW":	"in other words", "IIRC":	"If I Remember Correctly", "icymi":"In case you missed it", "CUZ":	"because", "COS":	"because", "nv":	"nevermind", "PLZ":	"please",
+	}
+	html_pattern = re.compile('<.*?>')
+	urls_pattern = re.compile(r'https?://\S+|www\.\S+')
+	emoji_pattern = re.compile("["
+		u"\U0001F600-\U0001F64F"  # emoticons
+		u"\U0001F300-\U0001F5FF"  # symbols & pictographs
+		u"\U0001F680-\U0001F6FF"  # transport & map symbols
+		u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
+	"]+", flags=re.UNICODE)
+	text = text.lower()	# Lowercase
+	text = html_pattern.sub(r'', text)	# HTML Tags
+	text = urls_pattern.sub(r'', text)	# urls
+	text = text.translate(str.maketrans("", "", punc))	# punctuations
+	text = emoji_pattern.sub(r'', text)	# Emojis
+	new_text = []
+	for word in text.split(" "):
+		word = abbv.get(word.upper(), word)	# abbreviations
+		new_text.append(word)
+	text = " ".join(new_text)
+	return text

src/utils/stopwords.json ADDED Viewed

	@@ -0,0 +1 @@

+ ["i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "you're", "you've", "you'll", "you'd", "your", "yours", "yourself", "yourselves", "he", "him", "his", "himself", "she", "she's", "her", "hers", "herself", "it", "it's", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "that'll", "these", "those", "am", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "don't", "should", "should've", "now", "d", "ll", "m", "o", "re", "ve", "y", "ain", "aren", "aren't", "couldn", "couldn't", "didn", "didn't", "doesn", "doesn't", "hadn", "hadn't", "hasn", "hasn't", "haven", "haven't", "isn", "isn't", "ma", "mightn", "mightn't", "mustn", "mustn't", "needn", "needn't", "shan", "shan't", "shouldn", "shouldn't", "wasn", "wasn't", "weren", "weren't", "won", "won't", "wouldn", "wouldn't"]