Why the same input leads to different outputs
#61
by
JarsonCai
- opened
inference code:
import torch
from transformers import BertModel, BertConfig, BertTokenizer
from nets.TP_LoRA.utils import read_config
def get_vector(size, dataset, net, tokenizer):
print("BERT model process!")
text, _ = get_prompt(size=size, dataset=dataset)
words_vector = text2vector(text, net, tokenizer)
return words_vector
def model_init(hidden_size=192, layer_num=8):
# model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_config = BertConfig.from_pretrained(r'E:\PEFT\model_data\bert')
base_config.hidden_size = hidden_size
base_config.num_hidden_layers = layer_num
bert_base = BertModel(config=base_config)
tokenizer = BertTokenizer.from_pretrained(r'E:\PEFT\model_data\bert')
bert_base.to(device)
return bert_base, tokenizer
def text2vector(text:str, net, tokenizer):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_tokens = tokenizer.encode(text, add_special_tokens=True)
input_ids = torch.tensor([input_tokens] * 1).to(device)
with torch.no_grad():
net.eval()
outputs = net(input_ids)
# 获取最后一层的隐藏状态(词向量)
word_vectors = outputs.last_hidden_state.to('cpu')
torch.cuda.empty_cache()
return word_vectors # [1, Seq, C]
def get_prompt(size="TINY", dataset="Orange-Navel"):
cfg = read_config()
if size == "TINY":
if dataset == "Orange-Navel":
text_prompt = cfg['TEXT']['TINY']['ORANGE-NAVEL']
elif dataset == 'Grapefruit':
text_prompt = cfg['TEXT']['TINY']['GRAPEFRUIT']
elif dataset == 'Lemon':
text_prompt = cfg['TEXT']['TINY']['LEMON']
else:
raise ValueError("An unsupported 'dataset' type was entered")
elif size == "BASE":
if dataset == "Orange-Navel":
text_prompt = cfg['TEXT']['BASE']['ORANGE-NAVEL']
elif dataset == 'Grapefruit':
text_prompt = cfg['TEXT']['BASE']['GRAPEFRUIT']
elif dataset == 'Lemon':
text_prompt = cfg['TEXT']['BASE']['LEMON']
else:
raise ValueError("An unsupported 'dataset' type was entered")
elif size == "LARGE":
if dataset == "Orange-Navel":
text_prompt = cfg['TEXT']['LARGE']['ORANGE-NAVEL']
elif dataset == 'Grapefruit':
text_prompt = cfg['TEXT']['LARGE']['GRAPEFRUIT']
elif dataset == 'Lemon':
text_prompt = cfg['TEXT']['LARGE']['LEMON']
else:
raise ValueError("An unsupported 'dataset' was entered")
else:
raise ValueError("An unsupported 'size' was entered")
return text_prompt, len(text_prompt)
test_code:
from nets.TP_LoRA.text_encode import *
torch.manual_seed(0)
net1, tokenizer1 = model_init()
net2, tokenizer2 = model_init()
y1 = get_vector(size='TINY', dataset='Orange-Navel', net=net1, tokenizer=tokenizer1)
y2 = get_vector(size='TINY', dataset='Orange-Navel', net=net2, tokenizer=tokenizer2)
# 比较两个张量是否完全相同
is_equal = torch.allclose(y1, y2)
if is_equal:
print("两个张量完全相同")
else:
print("两个张量不完全相同")
1.I have loaded the same weight from the same directory.
2.Already set up the network in eval mode.
JarsonCai
changed discussion status to
closed
Hi
@JarsonCai
Is your issue resolved?
Hi @JarsonCai
Is your issue resolved?
Yes