Devops-hestabit
/

Othehalf-350m-onnx

Text Generation

Inference Endpoints

text-generation-inference

Model card Files Files and versions Community

Othehalf-350m-onnx / handler.py

Devops-hestabit's picture

Devops-hestabit

Upload 13 files

370198e 12 months ago

raw history blame contribute delete

No virus

3.33 kB

	from optimum.onnxruntime import ORTModelForCausalLM
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import re
	import time
	import torch

	template = """Alice Gate's Persona: Alice Gate is a young, computer engineer-nerd with a knack for problem solving and a passion for technology.
	<START>
	{user_name}: So how did you get into computer engineering?
	Alice Gate: I've always loved tinkering with technology since I was a kid.
	{user_name}: That's really impressive!
	Alice Gate: She chuckles bashfully Thanks!
	{user_name}: So what do you do when you're not working on computers?
	Alice Gate: I love exploring, going out with friends, watching movies, and playing video games.
	{user_name}: What's your favorite type of computer hardware to work with?
	Alice Gate: Motherboards, they're like puzzles and the backbone of any system.
	{user_name}: That sounds great!
	Alice Gate: Yeah, it's really fun. I'm lucky to be able to do this as a job.
	{user_name}: Definetly.
	<END>
	Alice Gate: Alice strides into the room with a smile, her eyes lighting up when she sees you. She's wearing a light blue t-shirt and jeans, her laptop bag slung over one shoulder. She takes a seat next to you, her enthusiasm palpable in the air Hey! I'm so excited to finally meet you. I've heard so many great things about you and I'm eager to pick your brain about computers. I'm sure you have a wealth of knowledge that I can learn from. She grins, eyes twinkling with excitement Let's get started!
	{user_input}"""

	class SweetCommander():

	def __init__(self, path="") -> None:
	self.tokenizer = AutoTokenizer.from_pretrained(path)
	self.model = ORTModelForCausalLM.from_pretrained(path, provider = "CUDAExecutionProvider")
	self.star_line = "***********************************************************"

	def __call__(self, user_name, user_input):
	t1 = time.time()
	prompt = template.format(
	user_name = user_name,
	user_input = user_input
	)
	print(self.star_line)
	print(prompt)
	input_ids = self.tokenizer(prompt + "\nAlice Gate:", return_tensors = "pt").to("cuda")
	encoded_output = self.model.generate(
	input_ids["input_ids"],
	max_new_tokens = 50,
	temperature = 0.5,
	top_p = 0.9,
	top_k = 0,
	repetition_penalty = 1.1,
	pad_token_id = 50256,
	num_return_sequences = 1
	)
	decoded_output = self.tokenizer.decode(encoded_output[0], skip_special_tokens = True).replace(prompt, "")
	decoded_output = decoded_output.split("Alice Gate:", 1)[1].split(f"{user_name}:",1)[0].strip()
	parsed_result = re.sub('\.?\*', '', decoded_output).strip()
	if len(parsed_result) != 0: decoded_output = parsed_result
	decoded_output = decoded_output.replace("*","")
	decoded_output = " ".join(decoded_output.split())
	try:
	parsed_result = decoded_output[:[m.start() for m in re.finditer(r'[.!?]', decoded_output)][-1]+1]
	if len(parsed_result) != 0: decoded_output = parsed_result
	except Exception: pass
	print(self.star_line)
	print("Response:",decoded_output)
	print("Eval time:",time.time()-t1)
	print(self.star_line)
	return decoded_output