|
import numpy as np |
|
import gradio as gr |
|
from sentence_transformers import SentenceTransformer, util |
|
|
|
|
|
model = SentenceTransformer("Omartificial-Intelligence-Space/Arabic-Nli-Matryoshka") |
|
|
|
|
|
|
|
def predict(mode, sentence1, sentence2=None, sentence3=None, sentence4=None, dimension="64"): |
|
dimension = int(dimension) |
|
result = { |
|
"Selected Dimension": dimension, |
|
"Input Sentences": { |
|
"Sentence 1": sentence1, |
|
"Sentence 2": sentence2, |
|
"Sentence 3": sentence3, |
|
"Sentence 4": sentence4 |
|
}, |
|
"Similarity Scores": {} |
|
} |
|
|
|
if mode == "Compare one to three": |
|
if sentence2 is None or sentence3 is None or sentence4 is None: |
|
return "Please provide three sentences for comparison.", {} |
|
sentences = [sentence1, sentence2, sentence3, sentence4] |
|
else: |
|
if sentence2 is None: |
|
return "Please provide the second sentence for comparison.", {} |
|
sentences = [sentence1, sentence2] |
|
|
|
embeddings = model.encode(sentences) |
|
embeddings = embeddings[..., :dimension] |
|
|
|
if mode == "Compare one to three": |
|
similarities = util.cos_sim(embeddings[0], embeddings[1:]) |
|
similarity_scores = {f"Sentence {i+2}": float(similarities[0, i]) for i in range(3)} |
|
result["Similarity Scores"] = similarity_scores |
|
else: |
|
similarity_score = util.cos_sim(embeddings[0], embeddings[1]) |
|
similarity_scores = {"Similarity Score": float(similarity_score)} |
|
result["Similarity Scores"] = similarity_scores |
|
|
|
return result |
|
|
|
|
|
mode_dropdown = gr.Dropdown(choices=["Compare two sentences", "Compare one to three"], label="Mode") |
|
dimension_dropdown = gr.Dropdown(choices=["768", "512", "256", "128", "64"], label="Embedding Dimension") |
|
sentence1_input = gr.Textbox(lines=2, placeholder="Enter the first sentence here...", label="Sentence 1") |
|
sentence2_input = gr.Textbox(lines=2, placeholder="Enter the second sentence here...", label="Sentence 2 (or first of three for mode)") |
|
sentence3_input = gr.Textbox(lines=2, placeholder="Enter the third sentence here...", label="Sentence 3") |
|
sentence4_input = gr.Textbox(lines=2, placeholder="Enter the fourth sentence here...", label="Sentence 4") |
|
|
|
inputs = [mode_dropdown, sentence1_input, sentence2_input, sentence3_input, sentence4_input, dimension_dropdown] |
|
outputs = gr.JSON(label="Detailed Similarity Scores") |
|
|
|
examples = [ |
|
["Compare one to three", "يجلس شاب ذو شعر أشقر على الحائط يقرأ جريدة بينما تمر امرأة وفتاة شابة.", "ذكر شاب ينظر إلى جريدة بينما تمر إمرأتان بجانبه", "الشاب نائم بينما الأم تقود ابنتها إلى الحديقة", "رجل يقرأ الجريدة في الحديقة", "64"], |
|
["Compare two sentences", "يجلس شاب ذو شعر أشقر على الحائط يقرأ جريدة بينما تمر امرأة وفتاة شابة.", "ذكر شاب ينظر إلى جريدة بينما تمر إمرأتان بجانبه", None, None, "64"] |
|
] |
|
|
|
|
|
gr.Interface( |
|
fn=predict, |
|
title="Arabic Sentence Similarity with Matryoshka Model", |
|
description="Compute the semantic similarity between Arabic sentences using the Matryoshka SentenceTransformer model.", |
|
inputs=inputs, |
|
examples=examples, |
|
outputs=outputs, |
|
cache_examples=False, |
|
article="Author: OMER NACAR. Model from Hugging Face Hub: [Omartificial-Intelligence-Space/Arabic-Nli-Matryoshka](https://huggingface.co/Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka)", |
|
).launch(debug=True, share=True) |
|
|