Spaces:

Hrushi02
/

Root_Math

Sleeping

App Files Files Community

Hrushi02 commited on Oct 16

Commit

e02d7d6

verified ·

1 Parent(s): 7ee65b2

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -8

app.py CHANGED Viewed

@@ -5,7 +5,8 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import PeftModel
 """
-Root_Math fine-tuned model chat app for Hugging Face Spaces.
 """
 # ✅ Load Hugging Face API token securely
@@ -15,7 +16,7 @@ if not api_token:
 # ✅ Define model names
 base_model_name = "unsloth/qwen2.5-math-7b-bnb-4bit"
-peft_model_name = "Hrushi02/Root_Math"  # <-- stays the same
 # ✅ Load base model
 print("🔄 Loading base model...")
@@ -34,7 +35,8 @@ model = PeftModel.from_pretrained(base_model, peft_model_name, token=api_token)
 print("🔄 Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(base_model_name, token=api_token)
-# ✅ Define chat response function
 def respond(message, history, system_message, max_tokens, temperature, top_p):
     """Generate responses from your fine-tuned model."""
     full_prompt = system_message + "\n\n"
@@ -57,16 +59,19 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
         )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # extract only the assistant's last message
     if "Assistant:" in response:
         response = response.split("Assistant:")[-1].strip()
-    yield response
-# ✅ Create Gradio interface
-demo = gr.ChatInterface(
-    respond,
     additional_inputs=[
         gr.Textbox(value="You are a helpful math assistant.", label="System message"),
         gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
@@ -78,6 +83,26 @@ demo = gr.ChatInterface(
 )
 # ✅ Launch app
 if __name__ == "__main__":
     demo.launch()

 from peft import PeftModel
 """
+🧮 Root_Math fine-tuned model chat app for Hugging Face Spaces.
+Supports both Gradio UI and API access via `/chat`.
 """
 # ✅ Load Hugging Face API token securely
 # ✅ Define model names
 base_model_name = "unsloth/qwen2.5-math-7b-bnb-4bit"
+peft_model_name = "Hrushi02/Root_Math"  # <-- model name stays the same
 # ✅ Load base model
 print("🔄 Loading base model...")
 print("🔄 Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(base_model_name, token=api_token)
+# ✅ Define the response function
 def respond(message, history, system_message, max_tokens, temperature, top_p):
     """Generate responses from your fine-tuned model."""
     full_prompt = system_message + "\n\n"
         )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract only the assistant's last message
     if "Assistant:" in response:
         response = response.split("Assistant:")[-1].strip()
+    return response
+# ✅ Create Gradio Chat Interface
+chat_ui = gr.ChatInterface(
+    fn=lambda message, history, system_message, max_tokens, temperature, top_p: (
+        respond(message, history, system_message, max_tokens, temperature, top_p)
+    ),
     additional_inputs=[
         gr.Textbox(value="You are a helpful math assistant.", label="System message"),
         gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
 )
+# ✅ Add API endpoint `/chat` (for gradio_client access)
+api_chat = gr.Interface(
+    fn=respond,
+    inputs=[
+        gr.Textbox(label="Message"),
+        gr.State(),  # placeholder for chat history (can be None)
+        gr.Textbox(value="You are a helpful math assistant.", label="System message"),
+        gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
+    ],
+    outputs="text",
+    api_name="/chat"
+)
+# ✅ Combine UI + API
+demo = gr.TabbedInterface([chat_ui, api_chat], ["Chat", "API"])
 # ✅ Launch app
 if __name__ == "__main__":
     demo.launch()