Spaces:

micas23
/

chat

Runtime error

chat / app.py

Update app.py

a9f105b verified about 22 hours ago

1.39 kB

	import gradio as gr
	from transformers import AutoProcessor, AutoModelForCausalLM

	TARGET_MODEL_ID = "google/gemma-4-E2B-it"
	ASSISTANT_MODEL_ID = "google/gemma-4-E2B-it-assistant"

	# Target Model
	processor = AutoProcessor.from_pretrained(TARGET_MODEL_ID)
	target_model = AutoModelForCausalLM.from_pretrained(
	TARGET_MODEL_ID,
	dtype="auto",
	device_map="auto",

	)

	# Assistant Model (the drafter)
	assistant_model = AutoModelForCausalLM.from_pretrained(
	ASSISTANT_MODEL_ID,
	dtype="auto",
	device_map="auto",
	)


	def greet(name):
	# Prompt
	messages = [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": name},
	]

	# Process input
	text = processor.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True,
	)
	inputs = processor(text=text, return_tensors="pt").to(target_model.device)
	input_len = inputs["input_ids"].shape[-1]

	# Generate output
	outputs = target_model.generate(
	**inputs,
	assistant_model=assistant_model,
	max_new_tokens=256,
	)
	response = processor.decode(outputs[0][input_len:], skip_special_tokens=False)

	# Parse output
	textofinal =processor.parse_response(response)
	return textofinal

	demo = gr.Interface(fn=greet, inputs="text", outputs="text")
	demo.launch()