Spaces:

made1570
/

TestingModelAPI

Paused

App Files Files Community

made1570 commited on 26 days ago

Commit

5fe0a09

verified ·

1 Parent(s): c9cc26c

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -46

app.py CHANGED Viewed

@@ -4,69 +4,58 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
 from unsloth import FastModel
-# Set environment for Hugging Face Spaces or CUDA
 os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
-# Load your model
 model_repo_id = 'adarsh3601/my_gemma3_pt'
 model, tokenizer = FastModel.from_pretrained(
     model_name=model_repo_id,
     max_seq_length=2048,
-    load_in_4bit=True,
     load_in_8bit=False,
     full_finetuning=False
 )
-# Define the main chat function with history
-def chat(user_input, history):
-    # Reformat history into messages
-    messages = []
-    for human, assistant in history:
-        messages.append({"role": "user", "content": [{"type": "text", "text": human}]})
-        messages.append({"role": "assistant", "content": [{"type": "text", "text": assistant}]})
-    # Append the new user message
-    messages.append({"role": "user", "content": [{"type": "text", "text": user_input}]})
-    # Apply template
-    prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
     with torch.no_grad():
         output = model.generate(
-            **tokenizer([prompt], return_tensors="pt").to("cuda"),
-            max_new_tokens=512,
             temperature=1.0,
             top_p=0.95,
             top_k=64,
-            streamer=None
         )
-    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
-    # Try to strip the original prompt to get the model's actual reply
-    reply = decoded[len(prompt):].strip()
-    # Append reply to history
-    history.append((user_input, reply))
-    return history, history
-# Gradio interface with state (chat history)
-with gr.Blocks() as demo:
-    gr.Markdown("## 🤖 Chat with Gemma-3")
-    chatbot = gr.Chatbot()
-    state = gr.State([])  # holds the message history
-    with gr.Row():
-        txt = gr.Textbox(placeholder="Type a message and hit enter...", show_label=False).style(container=False)
-    def user_submit(message, history):
-        return "", history + [[message, ""]]
-    txt.submit(user_submit, [txt, state], [txt, state], queue=False).then(
-        chat, [txt, state], [chatbot, state]
-    )
-# Launch with public sharing
 if __name__ == "__main__":
-    demo.launch(share=True)

 import gradio as gr
 from unsloth import FastModel
+# Set environment for Hugging Face Spaces
 os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
+# Load the model from Hugging Face Model Hub
 model_repo_id = 'adarsh3601/my_gemma3_pt'
+# Load model and tokenizer using FastModel
 model, tokenizer = FastModel.from_pretrained(
     model_name=model_repo_id,
     max_seq_length=2048,
+    load_in_4bit=True,  # Load model with 4-bit quantization
     load_in_8bit=False,
     full_finetuning=False
 )
+# Function to generate text based on user input
+def generate_text(user_input):
+    # Prepare the input as per the model's expected format
+    messages = [{
+        "role": "user",
+        "content": [{"type" : "text", "text" : user_input}]
+    }]
+    text = tokenizer.apply_chat_template(
+        messages,
+        add_generation_prompt=True,  # Must add for generation
+    )
+    # Generate output with model
     with torch.no_grad():
         output = model.generate(
+            **tokenizer([text], return_tensors="pt").to("cuda"),
+            max_new_tokens=512,  # Adjust if you need more tokens
             temperature=1.0,
             top_p=0.95,
             top_k=64,
+            streamer=None  # You can set a streamer if needed
         )
+    # Decode the model output and return the result
+    decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
+    return decoded_output
+# Build the Gradio interface
+iface = gr.Interface(
+    fn=generate_text,
+    inputs=gr.Textbox(lines=2, placeholder="Enter your text here..."),
+    outputs=gr.Textbox(lines=2, placeholder="Generated text will appear here..."),
+    title="Gemma-3 Model",
+    description="This is a simple interface to interact with the Gemma-3 model. Enter a prompt and see the generated response."
+)
+# Launch the app
 if __name__ == "__main__":
+    iface.launch(share=True)