Spaces:

made1570
/

TestingModelAPI

Paused

File size: 1,939 Bytes

da40d5c
f900ba5
da40d5c
a0d9955
da40d5c
 
5fe0a09
da40d5c
 
5fe0a09
da40d5c
 
5fe0a09
da40d5c
 
5a13e2d
5fe0a09
da40d5c
 
 
 
5fe0a09
 
 
 
 
 
 
da40d5c
5fe0a09
 
 
 
 
 
da40d5c
 
5fe0a09
 
da40d5c
 
 
5fe0a09
da40d5c
5fe0a09
 
 
 
 
 
 
 
 
 
 
 
 
da40d5c
5fe0a09
da40d5c
5fe0a09

import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
from unsloth import FastModel

# Set environment for Hugging Face Spaces
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

# Load the model from Hugging Face Model Hub
model_repo_id = 'adarsh3601/my_gemma3_pt'

# Load model and tokenizer using FastModel
model, tokenizer = FastModel.from_pretrained(
    model_name=model_repo_id,
    max_seq_length=2048,
    load_in_4bit=True,  # Load model with 4-bit quantization
    load_in_8bit=False,
    full_finetuning=False
)

# Function to generate text based on user input
def generate_text(user_input):
    # Prepare the input as per the model's expected format
    messages = [{
        "role": "user",
        "content": [{"type" : "text", "text" : user_input}]
    }]
    
    text = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,  # Must add for generation
    )
    
    # Generate output with model
    with torch.no_grad():
        output = model.generate(
            **tokenizer([text], return_tensors="pt").to("cuda"),
            max_new_tokens=512,  # Adjust if you need more tokens
            temperature=1.0,
            top_p=0.95,
            top_k=64,
            streamer=None  # You can set a streamer if needed
        )
    
    # Decode the model output and return the result
    decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
    return decoded_output

# Build the Gradio interface
iface = gr.Interface(
    fn=generate_text, 
    inputs=gr.Textbox(lines=2, placeholder="Enter your text here..."), 
    outputs=gr.Textbox(lines=2, placeholder="Generated text will appear here..."),
    title="Gemma-3 Model",
    description="This is a simple interface to interact with the Gemma-3 model. Enter a prompt and see the generated response."
)

# Launch the app
if __name__ == "__main__":
    iface.launch(share=True)