Spaces:
Sleeping
Sleeping
File size: 3,011 Bytes
c232079 755d66e c232079 6941429 755d66e c232079 755d66e c232079 755d66e c232079 755d66e c232079 755d66e c232079 755d66e c232079 755d66e e3517ec 4c63745 755d66e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import gradio as gr
import torch
from mingru_lm import MinGRU_LM
# Load the model
model = MinGRU_LM(dim=512, num_tokens=256, num_layers=6)
pt_model = "best_model.pt"
checkpoint = torch.load(pt_model, map_location=torch.device('cpu'))
model.load_state_dict(checkpoint['model_state_dict'])
# Move model to GPU if available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)
def decode_tokens(tokens):
return ''.join([chr(token) for token in tokens if token >= 32 and token < 256]) # ASCII-safe decoding
def tokenize_text(text):
return [ord(char) for char in text if ord(char) < 256] # ASCII-safe tokenization
def generate_text(start_text, max_length, temperature):
model.eval()
tokens = tokenize_text(start_text)
input_tensor = torch.tensor(tokens, dtype=torch.long).unsqueeze(0).to(device) # Ensure long tensor
generated_tokens = tokens.copy()
# Use a generator to yield tokens one by one
for _ in range(max_length):
with torch.no_grad():
logits = model(input_tensor, labels=None)[1] # Get logits directly
last_token_logits = logits[0, -1, :] / temperature
probs = torch.softmax(last_token_logits, dim=-1)
# Sample the next token
next_token = torch.multinomial(probs, num_samples=1).item()
# Only append valid tokens
if next_token < 256:
generated_tokens.append(next_token)
input_tensor = torch.cat([input_tensor, torch.tensor([[next_token]], device=device)], dim=1)
yield decode_tokens(generated_tokens)
else:
continue # Skip tokens outside ASCII range
yield decode_tokens(generated_tokens)
def wrapper_generate_text(start_text, max_length, temperature):
async_gen = generate_text(start_text, max_length, temperature)
for output in async_gen:
yield output
# Gradio interface
with gr.Blocks() as iface:
gr.Markdown("The MinGRU model is a simplified version of the traditional Gated Recurrent Unit (GRU), designed to reduce complexity and improve efficiency,Trained on the [tiny-stories](https://huggingface.co/datasets/roneneldan/TinyStories?row=19)")
gr.Markdown("To Learn more visit this [github](https://github.com/dame-cell/MinGru/tree/main)")
with gr.Row():
textbox = gr.Textbox(lines=3, label="Enter your prompt", value="Once upon a time")
max_length = gr.Slider(minimum=10, maximum=500, value=200, step=1, label="Max Length")
temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
output_textbox = gr.Textbox(lines=10, label="Generated Text")
btn = gr.Button("Generate Text")
btn.click(
wrapper_generate_text,
inputs=[textbox, max_length, temperature],
outputs=output_textbox
)
iface.launch(show_api=False, server_name="0.0.0.0")
|