Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer, AutoModel | |
import torch | |
# Load public model | |
model_name = "intfloat/multilingual-e5-small" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModel.from_pretrained(model_name) | |
# Inference function | |
def get_embedding(text): | |
# E5 models expect: "query: your text here" | |
encoded_input = tokenizer("query: " + text, return_tensors='pt') | |
with torch.no_grad(): | |
model_output = model(**encoded_input) | |
embeddings = model_output.last_hidden_state[:, 0] # CLS token | |
normed = torch.nn.functional.normalize(embeddings, p=2, dim=1) | |
return normed[0].tolist() # return list | |
# Gradio UI | |
iface = gr.Interface(fn=get_embedding, | |
inputs=gr.Textbox(label="Enter text"), | |
outputs=gr.Textbox(label="Embedding"), | |
title="Text Embedder") | |
iface.launch() | |