Why the same input leads to different outputs

#61
by JarsonCai - opened

inference code:

import torch

from transformers import BertModel, BertConfig, BertTokenizer
from nets.TP_LoRA.utils import read_config


def get_vector(size, dataset, net, tokenizer):
    print("BERT model process!")
    text, _ = get_prompt(size=size, dataset=dataset)
    words_vector = text2vector(text, net, tokenizer)

    return words_vector



def model_init(hidden_size=192, layer_num=8):
    # model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    base_config = BertConfig.from_pretrained(r'E:\PEFT\model_data\bert')
    base_config.hidden_size = hidden_size
    base_config.num_hidden_layers = layer_num
    bert_base = BertModel(config=base_config)
    tokenizer = BertTokenizer.from_pretrained(r'E:\PEFT\model_data\bert')
    bert_base.to(device)

    return bert_base, tokenizer




def text2vector(text:str, net, tokenizer):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_tokens = tokenizer.encode(text, add_special_tokens=True)
    input_ids = torch.tensor([input_tokens] * 1).to(device)

    with torch.no_grad():
        net.eval()
        outputs = net(input_ids)

    # 获取最后一层的隐藏状态(词向量)
    word_vectors = outputs.last_hidden_state.to('cpu')
    torch.cuda.empty_cache()

    return word_vectors # [1, Seq, C]




def get_prompt(size="TINY", dataset="Orange-Navel"):
    cfg = read_config()
    if size == "TINY":
        if dataset == "Orange-Navel":
            text_prompt = cfg['TEXT']['TINY']['ORANGE-NAVEL']
        elif dataset == 'Grapefruit':
            text_prompt = cfg['TEXT']['TINY']['GRAPEFRUIT']
        elif dataset == 'Lemon':
            text_prompt = cfg['TEXT']['TINY']['LEMON']
        else:
            raise ValueError("An unsupported 'dataset' type was entered")
    elif size == "BASE":
        if dataset == "Orange-Navel":
            text_prompt = cfg['TEXT']['BASE']['ORANGE-NAVEL']
        elif dataset == 'Grapefruit':
            text_prompt = cfg['TEXT']['BASE']['GRAPEFRUIT']
        elif dataset == 'Lemon':
            text_prompt = cfg['TEXT']['BASE']['LEMON']
        else:
            raise ValueError("An unsupported 'dataset' type was entered")

    elif size == "LARGE":
        if dataset == "Orange-Navel":
            text_prompt = cfg['TEXT']['LARGE']['ORANGE-NAVEL']
        elif dataset == 'Grapefruit':
            text_prompt = cfg['TEXT']['LARGE']['GRAPEFRUIT']
        elif dataset == 'Lemon':
            text_prompt = cfg['TEXT']['LARGE']['LEMON']
        else:
            raise ValueError("An unsupported 'dataset' was entered")
    else:
        raise ValueError("An unsupported 'size' was entered")

    return text_prompt, len(text_prompt)

test_code:

from nets.TP_LoRA.text_encode import *

torch.manual_seed(0)
net1, tokenizer1 = model_init()
net2, tokenizer2 = model_init()

y1 = get_vector(size='TINY', dataset='Orange-Navel', net=net1, tokenizer=tokenizer1)
y2 = get_vector(size='TINY', dataset='Orange-Navel', net=net2, tokenizer=tokenizer2)

# 比较两个张量是否完全相同
is_equal = torch.allclose(y1, y2)

if is_equal:
    print("两个张量完全相同")
else:
    print("两个张量不完全相同")

1.I have loaded the same weight from the same directory.
2.Already set up the network in eval mode.

JarsonCai changed discussion status to closed
BERT community org

Hi @JarsonCai
Is your issue resolved?

Hi @JarsonCai
Is your issue resolved?

Yes

Sign up or log in to comment