Spaces:
Sleeping
Sleeping
from flask import Flask, jsonify, request, render_template | |
import pandas as pd | |
import numpy as np | |
from sentence_transformers import SentenceTransformer, util | |
import torch | |
import re | |
app = Flask(__name__) | |
def extract_embeddings(embeddings_str): | |
pattern = r'(-?\d+(?:\.\d+)?(?:[eE]-?\d+)?)' | |
matches = re.findall(pattern, embeddings_str) | |
return list(map(float, matches)) | |
df = pd.read_excel("ebd4appdom.xlsx") | |
embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
df['Embeddings'] = df['Embeddings'].apply(extract_embeddings) | |
descriptions_embeddings = list(df.Embeddings) | |
patnums = list(df["Number"]) | |
standards = list(df["Standards"]) | |
urls = list(df["URL"]) | |
descriptions = list(df.Description) | |
def split_string(s, max_len, overlap, min_words_count=0): | |
words = s.split() | |
substrings = [] | |
start = 0 | |
while start + max_len < len(words): | |
end = start + max_len | |
substring = " ".join(words[start:end]) | |
substrings.append(substring) | |
start = end - overlap | |
substrings.append(" ".join(words[start:])) | |
long_substrings = [] | |
for string in substrings: | |
if len(string.split()) > min_words_count: | |
long_substrings.append(string) | |
return long_substrings | |
def index(): | |
if request.method == 'POST': | |
query = request.form['query'] | |
user_samples = split_string(query, 80, 3) | |
top_k = min(5, len(descriptions)) | |
results = [] | |
cpt=0 | |
for user_sample in user_samples: | |
sp=[[user_sample, 'sample' + str(cpt)]] | |
sample_embedding = embedder.encode(user_sample, convert_to_tensor=True) | |
cos_scores = util.cos_sim(sample_embedding, descriptions_embeddings)[0] | |
top_results = torch.topk(cos_scores, top_k) | |
for score, idx in zip(top_results[0], top_results[1]): | |
my_dict = dict(score= round(float(score.item()), 4), standards=standards[idx], desc=descriptions[idx], url=urls[idx]) | |
sp.append(my_dict) | |
results.append(sp) | |
cpt += 1 | |
return render_template('index.html', results=results) | |
else: | |
return render_template('index.html', results=None) | |
if __name__ == '__main__': | |
app.run(host="0.0.0.0", port=7860) |