Spaces:

CreitinGameplays
/

bloom-3b-conversational-gradio

Sleeping

App Files Files Community

bloom-3b-conversational-gradio / app.py

CreitinGameplays

Update app.py

fab30bb verified 5 months ago

raw

history blame

2.04 kB

	import gradio as gr
	import torch
	import spaces
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# Define the BLOOM model name
	model_name = "CreitinGameplays/bloom-3b-conversational"

	# Load tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	#@spaces.GPU(duration=90)
	def generate_text(user_prompt):
	"""Generates text using the BLOOM model from Hugging Face Transformers and removes the user prompt."""
	# Construct the full prompt with system introduction, user prompt, and assistant role
	prompt = f"<\|system\|> You are a helpful AI assistant. </s> <\|prompter\|> {user_prompt} </s> <\|assistant\|>"

	# Encode the entire prompt into tokens
	prompt_encoded = tokenizer.encode(prompt, return_tensors="pt").to(device)

	# Generate text with the complete prompt and limit the maximum length to 256 tokens
	output = model.generate(
	input_ids=prompt_encoded,
	max_length=1900,
	num_beams=1,
	num_return_sequences=1,
	do_sample=True,
	top_k=0,
	top_p=1.0,
	temperature=0.2,
	repetition_penalty=1.1
	)

	# Decode the generated token sequence back to text
	generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

	# Extract the assistant's response (assuming it starts with "<\|assistant\|>")
	assistant_response = generated_text.split("<\|assistant\|>")[-1]
	assistant_response = assistant_response.replace(f"{user_prompt}", "").strip()
	assistant_response = assistant_response.replace("You are a helpful AI assistant.", "").strip()

	return assistant_response

	# Define the Gradio interface
	interface = gr.Interface(
	fn=generate_text,
	inputs=[
	gr.Textbox(label="Text Prompt", value="What's an AI?"),
	],
	outputs="text",
	description="Interact with BLOOM-3b-conversational (Loaded with Hugging Face Transformers)",
	)


	# Launch the Gradio interface
	interface.launch()