|
from sentence_transformers import SentenceTransformer |
|
import torch |
|
import gradio as gr |
|
from scipy.spatial.distance import cosine |
|
|
|
|
|
torch.backends.cudnn.enabled = False |
|
torch.cuda.is_available = lambda : False |
|
|
|
|
|
modelname = "algolia/algolia-large-multilang-generic-v2410" |
|
model = SentenceTransformer(modelname) |
|
def get_embedding(text): |
|
embedding = model.encode([text]) |
|
return embedding[0] |
|
|
|
def compute_similarity(query, documents): |
|
query_emb = get_embedding(query) |
|
doc_embeddings = [get_embedding(doc) for doc in documents] |
|
|
|
|
|
similarities = [1 - cosine(query_emb, doc_emb) for doc_emb in doc_embeddings] |
|
ranked_docs = sorted(zip(documents, similarities), key=lambda x: x[1], reverse=True) |
|
|
|
|
|
return [{"document": doc, "similarity_score": round(sim, 4)} for doc, sim in ranked_docs] |
|
|
|
|
|
def gradio_compute_similarity(query, documents): |
|
|
|
query = "query: " + query |
|
|
|
documents_list = documents.split("\n") |
|
results = compute_similarity(query, documents_list) |
|
return results |
|
|
|
|
|
iface = gr.Interface( |
|
fn=gradio_compute_similarity, |
|
inputs=[ |
|
gr.Textbox(label="Query", placeholder="Enter your query here"), |
|
gr.Textbox(lines=5, label="Documents", placeholder="Enter a list of documents, one per line") |
|
], |
|
outputs=gr.JSON(label="Ranked Results"), |
|
allow_flagging="never", |
|
title="Document Similarity", |
|
description="Provide a query and a list of documents. See the ranked similarity scores." |
|
) |
|
|
|
iface.launch() |