Spaces:

made1570
/

TestingModelAPI

Paused

App Files Files Community

made1570 commited on 28 days ago

Commit

c742ff6

verified ·

1 Parent(s): 7be2f4a

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -41

app.py CHANGED Viewed

@@ -1,49 +1,41 @@
 import torch
-from transformers import AutoProcessor, AutoModelForImageTextToText, TextStreamer
 from peft import PeftModel
 import gradio as gr
-# Load base model and processor
-base_model_id = "unsloth/gemma-3-12b-it-unsloth-bnb-4bit"
-adapter_model_id = "adarsh3601/my_gemma3_pt"
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-processor = AutoProcessor.from_pretrained(base_model_id)
-model = AutoModelForImageTextToText.from_pretrained(base_model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto")
-# Apply adapter (LoRA)
-model = PeftModel.from_pretrained(model, adapter_model_id)
-model.eval()
-streamer = TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
-# Helper to format messages using the chat template
-def format_chat(messages):
-    formatted = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    return formatted
-# Chat function
-def chat(message, history):
-    messages = []
-    # Format history into messages
-    for user_msg, bot_msg in history:
-        messages.append({"role": "user", "content": user_msg})
-        messages.append({"role": "assistant", "content": bot_msg})
-    messages.append({"role": "user", "content": message})
-    prompt = format_chat(messages)
-    inputs = processor(prompt, return_tensors="pt").to(device)
     with torch.no_grad():
-        outputs = model.generate(**inputs, max_new_tokens=512, streamer=streamer)
-    decoded = processor.batch_decode(outputs, skip_special_tokens=True)[0]
-    response = decoded.split("<end_of_turn>")[0].strip().split("<start_of_turn>model")[-1].strip()
-    return response
 # Gradio interface
-gui = gr.ChatInterface(fn=chat, title="Gemma-3 Chatbot", description="Fine-tuned on adarsh3601/my_gemma_pt3")
-gui.launch()

 import torch
+from transformers import AutoProcessor, AutoModelForImageTextToText
 from peft import PeftModel
 import gradio as gr
+# Set up device (CPU or GPU)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load processor and model
+model_name = "adarsh3601/my_gemma_pt3"  # Change to your model path
+processor = AutoProcessor.from_pretrained(model_name)
+model = AutoModelForImageTextToText.from_pretrained(model_name).to(device)
+# Optional: If using PEFT model with adapter
+# adapter_model_id = "your_adapter_model_id"  # Uncomment and replace if using adapter
+# model = PeftModel.from_pretrained(model, adapter_model_id)
+# Define function to process the user input
+def chat(prompt):
+    # Prepare the message in the format the model expects
+    messages = [{"role": "user", "content": prompt}]
+    # Process the input using the processor
+    inputs = processor(messages, return_tensors="pt").to(device)
+    # Generate the output from the model
     with torch.no_grad():
+        outputs = model.generate(**inputs, max_length=200)
+    # Decode and return the response
+    return processor.decode(outputs[0], skip_special_tokens=True)
 # Gradio interface
+gr.Interface(
+    fn=chat,
+    inputs="text",
+    outputs="text",
+    title="Gemma Chat Model",
+    description="Chat with Gemma3 model",
+    live=True
+).launch(share=False)  # share=False for Hugging Face Spaces