from flask import Flask, jsonify, request, render_template
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util
import torch
import re

app = Flask(__name__)


def extract_embeddings(embeddings_str):
    pattern = r'(-?\d+(?:\.\d+)?(?:[eE]-?\d+)?)'
    matches = re.findall(pattern, embeddings_str)
    return list(map(float, matches))

df = pd.read_excel("ebd4appdom.xlsx")
embedder = SentenceTransformer('all-MiniLM-L6-v2')
df['Embeddings'] = df['Embeddings'].apply(extract_embeddings)
descriptions_embeddings = list(df.Embeddings)
patnums = list(df["Number"])
standards = list(df["Standards"])
urls = list(df["URL"])
descriptions = list(df.Description)

def split_string(s, max_len, overlap, min_words_count=0):
    words = s.split()
    substrings = []
    start = 0
    while start + max_len < len(words):
        end = start + max_len
        substring = " ".join(words[start:end])
        substrings.append(substring)
        start = end - overlap
    substrings.append(" ".join(words[start:]))
    long_substrings = []
    for string in substrings:
        if len(string.split()) > min_words_count:
            long_substrings.append(string)
    return long_substrings

@app.route('/', methods=['GET', 'POST'])
def index():
    if request.method == 'POST':
        query = request.form['query']
        user_samples = split_string(query, 80, 3)
        top_k = min(5, len(descriptions))
        results = []
        cpt=0
        for user_sample in user_samples:
            sp=[[user_sample, 'sample' + str(cpt)]]
            sample_embedding = embedder.encode(user_sample, convert_to_tensor=True)
            cos_scores = util.cos_sim(sample_embedding, descriptions_embeddings)[0]
            top_results = torch.topk(cos_scores, top_k)
            for score, idx in zip(top_results[0], top_results[1]):
                my_dict = dict(score= round(float(score.item()), 4), standards=standards[idx], desc=descriptions[idx], url=urls[idx])
                sp.append(my_dict)
            results.append(sp)
            cpt += 1
        return render_template('index.html', results=results)
    else:
        return render_template('index.html', results=None)

if __name__ == '__main__':
    app.run(host="0.0.0.0", port=7860)