patrickvonplaten
/

opt_metaseq_350m

Feature Extraction

Inference Endpoints

text-generation-inference

Model card Files Files and versions Community

opt_metaseq_350m / run_model.py

patrickvonplaten's picture

patrickvonplaten

add opt

be76c6b almost 2 years ago

raw history blame contribute delete

No virus

2.74 kB

	#!/usr/bin/env python3
	import os
	from transformers import AutoTokenizer, GPT2Tokenizer
	from megatron.initialize import initialize_megatron
	from metaseq import checkpoint_utils
	from transformers import OPTForCausalLM
	import torch

	path = "./model"

	# just need to initialize args with something,
	# => doesn't need to correspond to the "correct" architecture for this checkpoint
	initialize_megatron(args_defaults={
	"micro_batch_size": 1,
	"num_layers": 12,
	"hidden_size": 768,
	"num_attention_heads": 12,
	"max_position_embeddings": 2048,
	"encoder_seq_length": 2048
	})

	vocab_file = os.path.join(path, "gpt2-vocab.json")
	merges_file = os.path.join(path, "gpt2-merges.txt")

	tokenizer = GPT2Tokenizer(vocab_file, merges_file)
	tokenizer.save_pretrained(path)

	checkpoint = checkpoint_utils.load_model_ensemble_and_task(
	[os.path.join(path, "restored.pt")],
	arg_overrides={
	"vocab_filename": vocab_file,
	"merges_filename": merges_file,
	}
	)

	model = checkpoint[0][0].eval()
	model = model.to("cuda:0").half()

	hf_model = OPTForCausalLM.from_pretrained("../opt-350m").to("cuda:1").half()


	# forward passes
	def single_batch_forward_logits(prompts):
	input_ids = tokenizer(prompts, return_tensors="pt").input_ids
	input_ids = torch.cat([torch.tensor([[0]]), input_ids], dim=-1)
	input_ids = input_ids.to("cuda:0")
	with torch.no_grad():
	logits = model(input_ids)[0]
	return logits

	# forward hf
	def forward_hf(prompts):
	input_ids = tokenizer(prompts, return_tensors="pt").input_ids
	input_ids = torch.cat([torch.tensor([[0]]), input_ids], dim=-1)
	input_ids = input_ids.to("cuda:1")
	with torch.no_grad():
	logits = hf_model(input_ids)[0]
	return logits

	prompts = [
	"Today is a beautiful day and I want to",
	"In the city of",
	"Paris is the capital of France and",
	"Computers and mobile phones have taken",
	]

	prompts = [
	"Today is a beautiful day and I want to",
	]

	#import ipdb; ipdb.set_trace()
	print("Next word generation")
	for prompt in prompts:
	print("-------------")
	print(f"Prompt: {prompt}...\n")
	logits_fsq = single_batch_forward_logits(prompt)
	pred_next_token = torch.argmax(logits_fsq[0, -1], -1)
	next_token = tokenizer.convert_ids_to_tokens([pred_next_token])
	next_token = next_token[0].replace("Ġ", "")
	print(f"Next word: {next_token}")
	print("-------------")
	logits = forward_hf(prompt)
	pred_next_token = torch.argmax(logits[0, -1], -1)
	next_token = tokenizer.convert_ids_to_tokens([pred_next_token])
	next_token = next_token[0].replace("Ġ", "")
	print(f"Next word: {next_token}")
	print("-------------")

	torch.allclose(logits_fsq.cpu(), logits.cpu(), atol=1e-3)