demo-app / app.py
sourav36's picture
Update app.py
f62ddb1
from flask import Flask, json , render_template
from flask.globals import request
from flask import jsonify
import pickle
import transformers
from sentence_transformers import SentenceTransformer
import pandas as pd
import numpy as np
import scipy as sp
import sklearn
import sys
from nltk.corpus import stopwords
import nltk
from gensim.models import ldamodel
import gensim.corpora
from sklearn.feature_extraction.text import CountVectorizer , TfidfTransformer
from sklearn.decomposition import NMF
from sklearn.preprocessing import normalize
model = model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
bio_list = pickle.load(open('cleaned_bio_list.pkl' , 'rb'))
bio_embedded = pickle.load(open('cleaned_bio_embedded.pkl' , 'rb'))
app = Flask(__name__)
@app.route('/')
def index():
return render_template('index.html')
@app.route('/predict' , methods = ['POST'])
def predict_query():
query_string = str(request.form.get('query'))
query_list = []
query_list.append(query_string)
query_embedded = model.encode(query_list)
ans = similar_context_paragraph(query_list , query_embedded , bio_list , bio_embedded)
jsonStr = json.dumps([e for e in ans])
return jsonStr
def similar_context_paragraph(queries , query_embeddings ,paragraph_list_chapter , paragraph_list_chapter_embedded):
closest_n = 5
ans = []
for query, query_embedding in zip(queries, query_embeddings):
distances = []
# query1 = []
for i in range(len(paragraph_list_chapter_embedded)):
distances.append(sp.spatial.distance.cdist([query_embedding], [paragraph_list_chapter_embedded[i]], "cosine")[0])
results = zip(range(len(distances)), distances)
results = sorted(results, key=lambda x: x[1])
print("\n\n======================\n\n")
print("Query:", query)
print("\nTop 5 most similar sentences in corpus:")
for idx, distance in results[0:closest_n]:
# print('id: ' , idx, paragraph_list_chapter[idx], "(Score: %.4f)" % (1-distance))
# print("=======")
# query1.append(paragraph_list_chapter[idx])
ans.append({'Paragraph_related' : paragraph_list_chapter[idx] , 'Book' : 'Haliday_Resnick_walker'})
return ans
if __name__ == '__main__':
app.run(debug =True)
# app.run(host= '0.0.0.0' , port= 8080)