Spaces:

fllay
/

ransight-demo

Sleeping

App Files Files Community

fllay commited on Sep 6

Commit

5a1c3b8

verified ·

1 Parent(s): 33553e1

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -16

app.py CHANGED Viewed

@@ -2,70 +2,78 @@ import torch
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
-# Hugging Face repo ID (from the model page)
 MODEL_NAME = "NextGLab/ORANSight_Gemma_2_2B_Instruct"
-# Load tokenizer & model
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
-    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
-    device_map="auto"
 )
-# --- Helper function ---
 def chat(message, history, max_new_tokens=128, temperature=0.7):
     try:
-        # Turn history into messages for the chat template
         messages = []
         for user_msg, bot_msg in history:
             messages.append({"role": "user", "content": user_msg})
             messages.append({"role": "assistant", "content": bot_msg})
         messages.append({"role": "user", "content": message})
-        # Tokenize input
         inputs = tokenizer.apply_chat_template(
             messages,
             add_generation_prompt=True,
             tokenize=True,
             return_tensors="pt",
-        ).to(model.device)
-        # Generate response
         outputs = model.generate(
             **inputs,
             max_new_tokens=max_new_tokens,
             temperature=temperature,
             do_sample=True,
-            pad_token_id=tokenizer.eos_token_id,
         )
-        # Decode only new tokens
         response = tokenizer.decode(
             outputs[0][inputs["input_ids"].shape[-1]:],
             skip_special_tokens=True
         ).strip()
         history.append((message, response))
         return history, history, ""
     except Exception as e:
         import traceback
-        traceback.print_exc()   # this will show the full error in Logs
         return history + [(message, f"⚠️ Error: {str(e)}")], history, ""
-# --- Gradio App ---
 with gr.Blocks() as demo:
-    gr.Markdown("# 🤖 ORANSight Gemma 2 2B Instruct")
     chatbot = gr.Chatbot()
     msg = gr.Textbox(show_label=False, placeholder="Type a message...")
     send = gr.Button("Send")
     clear = gr.Button("Clear Chat")
-    max_tokens = gr.Slider(50, 512, value=128, step=10, label="Max new tokens")
-    temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
     state = gr.State([])

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# Choose your model repo (from NextGLab)
 MODEL_NAME = "NextGLab/ORANSight_Gemma_2_2B_Instruct"
+# Load tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
+    torch_dtype="auto",   # lets HF decide (fp16/bf16/fp32 depending on GPU)
+    device_map="auto"     # put on GPU if available
 )
+# --- Chat function ---
 def chat(message, history, max_new_tokens=128, temperature=0.7):
     try:
+        # Convert history into Hugging Face messages format
         messages = []
         for user_msg, bot_msg in history:
             messages.append({"role": "user", "content": user_msg})
             messages.append({"role": "assistant", "content": bot_msg})
         messages.append({"role": "user", "content": message})
+        # Prepare inputs with chat template → return dictionary
         inputs = tokenizer.apply_chat_template(
             messages,
             add_generation_prompt=True,
             tokenize=True,
             return_tensors="pt",
+            return_dict=True
+        )
+        # Move all tensors in the input dict to the model device
+        for k in inputs:
+            inputs[k] = inputs[k].to(model.device)
+        # Generate model output
         outputs = model.generate(
             **inputs,
             max_new_tokens=max_new_tokens,
             temperature=temperature,
             do_sample=True,
+            pad_token_id=tokenizer.eos_token_id
         )
+        # Decode ONLY the newly generated tokens (past the input length)
         response = tokenizer.decode(
             outputs[0][inputs["input_ids"].shape[-1]:],
             skip_special_tokens=True
         ).strip()
+        # Append to history
         history.append((message, response))
         return history, history, ""
     except Exception as e:
         import traceback
+        traceback.print_exc()  # will show in HF Space Logs
         return history + [(message, f"⚠️ Error: {str(e)}")], history, ""
+# --- Gradio UI ---
 with gr.Blocks() as demo:
+    gr.Markdown("# 🤖 ORANSight Gemma Chat (2B Instruct)")
     chatbot = gr.Chatbot()
     msg = gr.Textbox(show_label=False, placeholder="Type a message...")
     send = gr.Button("Send")
     clear = gr.Button("Clear Chat")
+    with gr.Row():
+        max_tokens = gr.Slider(50, 512, step=10, value=128, label="Max tokens")
+        temperature = gr.Slider(0.1, 1.5, step=0.1, value=0.7, label="Temperature")
     state = gr.State([])