File size: 505 Bytes
205b451
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
from transformers import AutoModel, AutoTokenizer

PhobertTokenizer = AutoTokenizer.from_pretrained("VoVanPhuc/sup-SimCSE-VietNamese-phobert-base")
model = AutoModel.from_pretrained("VoVanPhuc/sup-SimCSE-VietNamese-phobert-base")

def generate_embedding(sentence: str):
    inputs = PhobertTokenizer(sentence, padding=True, truncation=True, return_tensors="pt")
    embeddings =  model(**inputs, output_hidden_states=True, return_dict=True).pooler_output
    return embeddings[0].detach().numpy().tolist()