SEPredictor / app.py
YchKhan's picture
Duplicate from OrganizedProgrammers/SEPredictor
e46c1c6
raw
history blame
2.28 kB
from flask import Flask, jsonify, request, render_template
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util
import torch
import re
app = Flask(__name__)
def extract_embeddings(embeddings_str):
pattern = r'(-?\d+(?:\.\d+)?(?:[eE]-?\d+)?)'
matches = re.findall(pattern, embeddings_str)
return list(map(float, matches))
df = pd.read_excel("ebd4appdom.xlsx")
embedder = SentenceTransformer('all-MiniLM-L6-v2')
df['Embeddings'] = df['Embeddings'].apply(extract_embeddings)
descriptions_embeddings = list(df.Embeddings)
patnums = list(df["Number"])
standards = list(df["Standards"])
urls = list(df["URL"])
descriptions = list(df.Description)
def split_string(s, max_len, overlap, min_words_count=0):
words = s.split()
substrings = []
start = 0
while start + max_len < len(words):
end = start + max_len
substring = " ".join(words[start:end])
substrings.append(substring)
start = end - overlap
substrings.append(" ".join(words[start:]))
long_substrings = []
for string in substrings:
if len(string.split()) > min_words_count:
long_substrings.append(string)
return long_substrings
@app.route('/', methods=['GET', 'POST'])
def index():
if request.method == 'POST':
query = request.form['query']
user_samples = split_string(query, 80, 3)
top_k = min(5, len(descriptions))
results = []
cpt=0
for user_sample in user_samples:
sp=[[user_sample, 'sample' + str(cpt)]]
sample_embedding = embedder.encode(user_sample, convert_to_tensor=True)
cos_scores = util.cos_sim(sample_embedding, descriptions_embeddings)[0]
top_results = torch.topk(cos_scores, top_k)
for score, idx in zip(top_results[0], top_results[1]):
my_dict = dict(score= round(float(score.item()), 4), standards=standards[idx], desc=descriptions[idx], url=urls[idx])
sp.append(my_dict)
results.append(sp)
cpt += 1
return render_template('index.html', results=results)
else:
return render_template('index.html', results=None)
if __name__ == '__main__':
app.run(host="0.0.0.0", port=7860)