Spaces:

zerovic
/

phi-3-mini-4k-instruct

Sleeping

App Files Files Community

phi-3-mini-4k-instruct / app.py

zerovic

Update app.py

94f9453 verified about 1 month ago

raw

history blame contribute delete

1.9 kB

	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from fastapi import FastAPI
	from pydantic import BaseModel

	app = FastAPI()

	# ✅ Phi-3 model
	MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"

	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
	)

	class RequestData(BaseModel):
	inputs: str


	def generate_text(prompt):
	# ✅ Add a System Message to enforce "Human-like" brevity
	messages = [
	{
	"role": "system",
	"content": "You are a concise assistant. Answer the user's question directly. If there is a typo in the question, correct it silently and provide the answer. Do not give unsolicited details."
	},
	{"role": "user", "content": prompt}
	]

	formatted_prompt = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	inputs = tokenizer(formatted_prompt, return_tensors="pt")

	# Store the length of the input tokens
	input_length = inputs.input_ids.shape[1]

	with torch.no_grad():
	output = model.generate(
	**inputs,
	max_new_tokens=200,
	do_sample=True,
	temperature=0.7,
	top_p=0.9,
	repetition_penalty=1.1,
	pad_token_id=tokenizer.eos_token_id
	)

	# ✅ FIX: Slice the output to exclude the input tokens
	# output[0] is the full sequence; [input_length:] takes everything AFTER the prompt
	new_tokens = output[0][input_length:]

	result = tokenizer.decode(new_tokens, skip_special_tokens=True)

	return result.strip()


	@app.post("/generate")
	async def generate(request: RequestData):

	text = generate_text(request.inputs)

	return {
	"data": [text]
	}