from flask import Flask, json , render_template from flask.globals import request from flask import jsonify import pickle import transformers from sentence_transformers import SentenceTransformer import pandas as pd import numpy as np import scipy as sp import sklearn import sys from nltk.corpus import stopwords import nltk from gensim.models import ldamodel import gensim.corpora from sklearn.feature_extraction.text import CountVectorizer , TfidfTransformer from sklearn.decomposition import NMF from sklearn.preprocessing import normalize model = model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2') bio_list = pickle.load(open('cleaned_bio_list.pkl' , 'rb')) bio_embedded = pickle.load(open('cleaned_bio_embedded.pkl' , 'rb')) app = Flask(__name__) @app.route('/') def index(): return render_template('index.html') @app.route('/predict' , methods = ['POST']) def predict_query(): query_string = str(request.form.get('query')) query_list = [] query_list.append(query_string) query_embedded = model.encode(query_list) ans = similar_context_paragraph(query_list , query_embedded , bio_list , bio_embedded) jsonStr = json.dumps([e for e in ans]) return jsonStr def similar_context_paragraph(queries , query_embeddings ,paragraph_list_chapter , paragraph_list_chapter_embedded): closest_n = 5 ans = [] for query, query_embedding in zip(queries, query_embeddings): distances = [] # query1 = [] for i in range(len(paragraph_list_chapter_embedded)): distances.append(sp.spatial.distance.cdist([query_embedding], [paragraph_list_chapter_embedded[i]], "cosine")[0]) results = zip(range(len(distances)), distances) results = sorted(results, key=lambda x: x[1]) print("\n\n======================\n\n") print("Query:", query) print("\nTop 5 most similar sentences in corpus:") for idx, distance in results[0:closest_n]: # print('id: ' , idx, paragraph_list_chapter[idx], "(Score: %.4f)" % (1-distance)) # print("=======") # query1.append(paragraph_list_chapter[idx]) ans.append({'Paragraph_related' : paragraph_list_chapter[idx] , 'Book' : 'Haliday_Resnick_walker'}) return ans if __name__ == '__main__': app.run(debug =True) # app.run(host= '0.0.0.0' , port= 8080)