File size: 1,696 Bytes
2505678
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from sentence_transformers import SentenceTransformer
import torch
import gradio as gr
from scipy.spatial.distance import cosine

# Disable CUDA
torch.backends.cudnn.enabled = False
torch.cuda.is_available = lambda : False

# Load model and tokenizer
modelname = "algolia/algolia-large-en-generic-v2410"
model = SentenceTransformer(modelname) 
def get_embedding(text):
    embedding = model.encode([text])
    return embedding[0]

def compute_similarity(query, documents):
    query_emb = get_embedding(query)
    doc_embeddings = [get_embedding(doc) for doc in documents]
    
    # Calculate cosine similarity
    similarities = [1 - cosine(query_emb, doc_emb) for doc_emb in doc_embeddings]
    ranked_docs = sorted(zip(documents, similarities), key=lambda x: x[1], reverse=True)
    
    # Format output
    return [{"document": doc, "similarity_score": round(sim, 4)} for doc, sim in ranked_docs]

# Gradio interface function
def gradio_compute_similarity(query, documents):
    # Prefix the query string
    query = "query: " + query
    # Split documents by lines for the Gradio input
    documents_list = documents.split("\n")
    results = compute_similarity(query, documents_list)
    return results

# Gradio Interface
iface = gr.Interface(
    fn=gradio_compute_similarity,
    inputs=[
        gr.Textbox(label="Query", placeholder="Enter your query here"),
        gr.Textbox(lines=5, label="Documents", placeholder="Enter a list of documents, one per line")
    ],
    outputs=gr.JSON(label="Ranked Results"),
    allow_flagging="never",
    title="Document Similarity",
    description="Provide a query and a list of documents. See the ranked similarity scores."
)

iface.launch()