Spaces:
Sleeping
Sleeping
File size: 1,062 Bytes
56ba8e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import streamlit as st
from transformers import AutoTokenizer, AutoModel
import torch
# Load the model and tokenizer
@st.cache(allow_output_mutation=True)
def load_model():
tokenizer = AutoTokenizer.from_pretrained("Salesforce/SFR-Embedding-Mistral")
model = AutoModel.from_pretrained("Salesforce/SFR-Embedding-Mistral")
return tokenizer, model
tokenizer, model = load_model()
def embed_text(text):
inputs = tokenizer(text, return_tensors='pt', truncation=True, max_length=32768)
outputs = model(**inputs)
return outputs.last_hidden_state.mean(dim=1).detach().numpy()
def main():
st.title("Text Embedding using Salesforce/SFR-Embedding-Mistral")
# Text input
text = st.text_area("Enter text here:", height=150)
if st.button("Get Embeddings"):
if text:
with st.spinner('Fetching embeddings...'):
embeddings = embed_text(text)
st.write(embeddings)
else:
st.warning("Please enter some text to process.")
if __name__ == "__main__":
main()
|