|
from flask import Flask, json , render_template
|
|
from flask.globals import request
|
|
from flask import jsonify
|
|
import pickle
|
|
import transformers
|
|
from sentence_transformers import SentenceTransformer
|
|
import pandas as pd
|
|
import numpy as np
|
|
import scipy as sp
|
|
import sklearn
|
|
import sys
|
|
from nltk.corpus import stopwords
|
|
import nltk
|
|
from gensim.models import ldamodel
|
|
import gensim.corpora
|
|
from sklearn.feature_extraction.text import CountVectorizer , TfidfTransformer
|
|
from sklearn.decomposition import NMF
|
|
from sklearn.preprocessing import normalize
|
|
|
|
|
|
model = model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
|
|
bio_list = pickle.load(open('cleaned_bio_list.pkl' , 'rb'))
|
|
bio_embedded = pickle.load(open('cleaned_bio_embedded.pkl' , 'rb'))
|
|
|
|
app = Flask(__name__)
|
|
|
|
@app.route('/')
|
|
|
|
def index():
|
|
return render_template('index.html')
|
|
|
|
@app.route('/predict' , methods = ['POST'])
|
|
def predict_query():
|
|
query_string = str(request.form.get('query'))
|
|
query_list = []
|
|
query_list.append(query_string)
|
|
query_embedded = model.encode(query_list)
|
|
ans = similar_context_paragraph(query_list , query_embedded , bio_list , bio_embedded)
|
|
jsonStr = json.dumps([e for e in ans])
|
|
return jsonStr
|
|
|
|
|
|
def similar_context_paragraph(queries , query_embeddings ,paragraph_list_chapter , paragraph_list_chapter_embedded):
|
|
closest_n = 5
|
|
ans = []
|
|
for query, query_embedding in zip(queries, query_embeddings):
|
|
distances = []
|
|
|
|
for i in range(len(paragraph_list_chapter_embedded)):
|
|
distances.append(sp.spatial.distance.cdist([query_embedding], [paragraph_list_chapter_embedded[i]], "cosine")[0])
|
|
|
|
results = zip(range(len(distances)), distances)
|
|
results = sorted(results, key=lambda x: x[1])
|
|
|
|
print("\n\n======================\n\n")
|
|
print("Query:", query)
|
|
print("\nTop 5 most similar sentences in corpus:")
|
|
|
|
for idx, distance in results[0:closest_n]:
|
|
|
|
|
|
|
|
ans.append({'Paragraph_related' : paragraph_list_chapter[idx] , 'Book' : 'Haliday_Resnick_walker'})
|
|
return ans
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
app.run(debug =True)
|
|
|