Spaces:
Sleeping
Sleeping
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
import gradio as gr | |
# Load the model and tokenizer | |
model_name = "NoaiGPT/merged-llama3-8b-instruct-1720894657" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
# Move model to GPU if available | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
# Create a text generation pipeline | |
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1) | |
# Define the prediction function | |
def generate_text(prompt): | |
# Generate text using the pipeline | |
outputs = text_generator(prompt, max_length=200, num_return_sequences=1) | |
generated_text = outputs[0]["generated_text"] | |
return generated_text | |
# Define the Gradio interface | |
interface = gr.Interface( | |
fn=generate_text, | |
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), | |
outputs="text", | |
title="LLaMA 3 Text Generation", | |
description="Generate text using the LLaMA 3 model fine-tuned for instruction-following tasks." | |
) | |
# Launch the interface | |
interface.launch() | |