File size: 1,696 Bytes
2505678 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
from sentence_transformers import SentenceTransformer
import torch
import gradio as gr
from scipy.spatial.distance import cosine
# Disable CUDA
torch.backends.cudnn.enabled = False
torch.cuda.is_available = lambda : False
# Load model and tokenizer
modelname = "algolia/algolia-large-en-generic-v2410"
model = SentenceTransformer(modelname)
def get_embedding(text):
embedding = model.encode([text])
return embedding[0]
def compute_similarity(query, documents):
query_emb = get_embedding(query)
doc_embeddings = [get_embedding(doc) for doc in documents]
# Calculate cosine similarity
similarities = [1 - cosine(query_emb, doc_emb) for doc_emb in doc_embeddings]
ranked_docs = sorted(zip(documents, similarities), key=lambda x: x[1], reverse=True)
# Format output
return [{"document": doc, "similarity_score": round(sim, 4)} for doc, sim in ranked_docs]
# Gradio interface function
def gradio_compute_similarity(query, documents):
# Prefix the query string
query = "query: " + query
# Split documents by lines for the Gradio input
documents_list = documents.split("\n")
results = compute_similarity(query, documents_list)
return results
# Gradio Interface
iface = gr.Interface(
fn=gradio_compute_similarity,
inputs=[
gr.Textbox(label="Query", placeholder="Enter your query here"),
gr.Textbox(lines=5, label="Documents", placeholder="Enter a list of documents, one per line")
],
outputs=gr.JSON(label="Ranked Results"),
allow_flagging="never",
title="Document Similarity",
description="Provide a query and a list of documents. See the ranked similarity scores."
)
iface.launch() |