MrOvkill
/

Phi-3-Instruct-Bloated

Text Generation

microsoft/Phi-3-mini-128k-instruct

NexaAIDev/Octopus-v4

Inference Endpoints

Model card Files Files and versions Community

Phi-3-Instruct-Bloated / handler.py

MrOvkill's picture

Update handler.py

42565ce verified 2 months ago

No virus

1.36 kB

	import json
	import os
	from typing import Dict, List, Any
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	PROMPT_FORMAT= """
	<\|user\|>
	{inputs} <\|end\|>
	<\|assistant\|>
	"""

	class EndpointHandler():
	def __init__(self, data):
	cfg = {
	"repo": "MrOvkill/Phi-3-Instruct-Bloated",
	}
	self.model = AutoModelForCausalLM.from_pretrained(cfg['repo'], trust_remote_code=True, torch_dtype=torch.float16)
	self.tokenizer = AutoTokenizer.from_pretrained(cfg['repo'])

	def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
	inputs = data.pop("inputs", "Q: What is the chemical composition of common concrete in 2024?\nA: ")
	max_new_tokens = 1024
	if "max_new_tokens" in data:
	max_new_tokens = data.pop("max_new_tokens")
	max_new_tokens = int(max_new_tokens)
	try:
	max_new_tokens = int(max_new_tokens)
	except Exception as e:
	return json.dumps({
	"status": "error",
	"reason": "max_length was passed as something that was absolutely not a plain old int"
	})

	res = PROMPT_FORMAT.format(do_sample=False)
	retrurn model(
	res,
	max_new_tokens=max_new_tokens,
	do_sample=False
	)

	return res