|
from transformers import AutoTokenizer,TFAutoModel |
|
import torch |
|
from torch import nn |
|
import tensorflow |
|
|
|
|
|
|
|
model_ckpt = "sentence-transformers/multi-qa-mpnet-base-dot-v1" |
|
|
|
tokenizer=AutoTokenizer.from_pretrained(model_ckpt) |
|
|
|
model=TFAutoModel.from_pretrained(model_ckpt,from_pt=True) |
|
|
|
|
|
def cls_pool(model): |
|
|
|
return model.last_hidden_state[:,0,:] |
|
|
|
def sample_embedding(example): |
|
|
|
token_output=tokenizer(example,padding=True,truncation=True,return_tensors="tf") |
|
|
|
token_output={k:v for k,v in token_output.items()} |
|
|
|
|
|
model_output=model(**token_output) |
|
|
|
return {"embedding":cls_pool(model_output).numpy()[0]} |
|
|