Spaces:

redael
/

udc

Sleeping

App Files Files Community

udc / app.py

redael

Update app.py

6f1cdd7 verified about 1 year ago

raw

history blame contribute delete

2.47 kB


	import gradio as gr
	import torch
	from transformers import GPT2Tokenizer, GPT2LMHeadModel

	# Load the custom model and tokenizer
	model_path = 'redael/model_udc'
	tokenizer = GPT2Tokenizer.from_pretrained(model_path)
	model = GPT2LMHeadModel.from_pretrained(model_path)

	# Check if CUDA is available and use GPU if possible, enable FP16 precision
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)
	if device.type == 'cuda':
	model = model.half() # Use FP16 precision

	def generate_response(prompt, model, tokenizer, max_length=100, num_beams=1, temperature=0.7, top_p=0.9, repetition_penalty=2.0):
	# Prepare the prompt
	prompt = f"User: {prompt}\nAssistant:"
	inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True, max_length=512).to(device)
	outputs = model.generate(
	inputs['input_ids'],
	max_length=max_length,
	num_return_sequences=1,
	pad_token_id=tokenizer.eos_token_id,
	num_beams=num_beams, # Use a lower number of beams
	temperature=temperature,
	top_p=top_p,
	repetition_penalty=repetition_penalty, # Increased repetition penalty
	early_stopping=True
	)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Post-processing to clean up the response
	response = response.split("Assistant:")[-1].strip()
	response_lines = response.split('\n')
	clean_response = []
	for line in response_lines:
	if "User:" not in line and "Assistant:" not in line:
	clean_response.append(line)
	response = ' '.join(clean_response)
	return response.strip()

	def respond(message, history: list[tuple[str, str]]):
	# Prepare the prompt from the history and the new message
	system_message = "You are a friendly chatbot."
	conversation = system_message + "\n"
	for user_message, assistant_response in history:
	conversation += f"User: {user_message}\nAssistant: {assistant_response}\n"
	conversation += f"User: {message}\nAssistant:"

	# Fixed values for generation parameters
	max_tokens = 100 # Adjusted max tokens
	temperature = 0.7
	top_p = 0.9

	response = generate_response(conversation, model, tokenizer, max_length=max_tokens, temperature=temperature, top_p=top_p)

	return response

	# Gradio Chat Interface without customizable inputs
	demo = gr.ChatInterface(
	respond
	)

	if __name__ == "__main__":
	demo.launch()