Spaces:
Runtime error
Runtime error
File size: 935 Bytes
8d8e1b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
import torch
import numpy as np
from typing import List
from transformers import AutoTokenizer, AutoModel
from embeddings.encoder import EmbeddingEncoder
def cls_pooling(model_output):
return model_output.last_hidden_state[:, 0]
class HuggingFaceEncoder(EmbeddingEncoder):
def __init__(self, model_name: str):
self.model_name = model_name
self.model = AutoModel.from_pretrained(model_name)
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
def generate_embeddings(self, sentences: List[str]) -> List[np.ndarray]:
# Tokenize sentences
encoded_input = self.tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
# Compute token embeddings
with torch.no_grad():
model_output = self.model(**encoded_input, return_dict=True)
# Perform pooling
embeddings = cls_pooling(model_output)
return embeddings
|