Spaces:
Sleeping
Sleeping
File size: 898 Bytes
7a4200c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
import gradio as gr
from transformers import AutoTokenizer, AutoModel
import torch
# Load public model
model_name = "intfloat/multilingual-e5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
# Inference function
def get_embedding(text):
# E5 models expect: "query: your text here"
encoded_input = tokenizer("query: " + text, return_tensors='pt')
with torch.no_grad():
model_output = model(**encoded_input)
embeddings = model_output.last_hidden_state[:, 0] # CLS token
normed = torch.nn.functional.normalize(embeddings, p=2, dim=1)
return normed[0].tolist() # return list
# Gradio UI
iface = gr.Interface(fn=get_embedding,
inputs=gr.Textbox(label="Enter text"),
outputs=gr.Textbox(label="Embedding"),
title="Text Embedder")
iface.launch()
|