Spaces:

damerajee
/

mingru-stories

Sleeping

App Files Files Community

mingru-stories / app.py

damerajee

Update app.py

e3517ec verified about 1 month ago

raw

history blame contribute delete

3.01 kB

	import gradio as gr
	import torch
	from mingru_lm import MinGRU_LM

	# Load the model
	model = MinGRU_LM(dim=512, num_tokens=256, num_layers=6)
	pt_model = "best_model.pt"
	checkpoint = torch.load(pt_model, map_location=torch.device('cpu'))
	model.load_state_dict(checkpoint['model_state_dict'])

	# Move model to GPU if available
	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	model = model.to(device)

	def decode_tokens(tokens):
	return ''.join([chr(token) for token in tokens if token >= 32 and token < 256]) # ASCII-safe decoding

	def tokenize_text(text):
	return [ord(char) for char in text if ord(char) < 256] # ASCII-safe tokenization

	def generate_text(start_text, max_length, temperature):
	model.eval()

	tokens = tokenize_text(start_text)
	input_tensor = torch.tensor(tokens, dtype=torch.long).unsqueeze(0).to(device) # Ensure long tensor

	generated_tokens = tokens.copy()

	# Use a generator to yield tokens one by one
	for _ in range(max_length):
	with torch.no_grad():
	logits = model(input_tensor, labels=None)[1] # Get logits directly

	last_token_logits = logits[0, -1, :] / temperature
	probs = torch.softmax(last_token_logits, dim=-1)

	# Sample the next token
	next_token = torch.multinomial(probs, num_samples=1).item()

	# Only append valid tokens
	if next_token < 256:
	generated_tokens.append(next_token)
	input_tensor = torch.cat([input_tensor, torch.tensor([[next_token]], device=device)], dim=1)

	yield decode_tokens(generated_tokens)
	else:
	continue # Skip tokens outside ASCII range

	yield decode_tokens(generated_tokens)

	def wrapper_generate_text(start_text, max_length, temperature):
	async_gen = generate_text(start_text, max_length, temperature)

	for output in async_gen:
	yield output

	# Gradio interface
	with gr.Blocks() as iface:
	gr.Markdown("The MinGRU model is a simplified version of the traditional Gated Recurrent Unit (GRU), designed to reduce complexity and improve efficiency,Trained on the [tiny-stories](https://huggingface.co/datasets/roneneldan/TinyStories?row=19)")
	gr.Markdown("To Learn more visit this [github](https://github.com/dame-cell/MinGru/tree/main)")
	with gr.Row():
	textbox = gr.Textbox(lines=3, label="Enter your prompt", value="Once upon a time")
	max_length = gr.Slider(minimum=10, maximum=500, value=200, step=1, label="Max Length")
	temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")

	output_textbox = gr.Textbox(lines=10, label="Generated Text")

	btn = gr.Button("Generate Text")

	btn.click(
	wrapper_generate_text,
	inputs=[textbox, max_length, temperature],
	outputs=output_textbox
	)

	iface.launch(show_api=False, server_name="0.0.0.0")