Spaces:

Smilyai-labs
/

Sam-3-PRO-SOLVER-chat

Runtime error

App Files Files Community

Keeby-smilyai commited on Sep 15

Commit

a459d20

verified ·

1 Parent(s): 07e759b

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -39

app.py CHANGED Viewed

@@ -1,8 +1,8 @@
 # -------------------------------
 # app.py
 #
-# This file contains the backend logic and Gradio UI for the chatbot.
-# Now using Sam-3.0-3 from Smilyai-labs/Sam-3.0-3 — a model that thinks, reasons, and responds with clarity.
 # -------------------------------
 import math
@@ -18,7 +18,7 @@ import os
 from huggingface_hub import hf_hub_download
 # -------------------------------
-# 1) Sam-3.0-3 Architecture (from your second code)
 # -------------------------------
 @dataclass
 class Sam3Config:
@@ -116,7 +116,7 @@ class Sam3(nn.Module):
         return self.lm_head(x)
 # -------------------------------
-# 2) Load tokenizer & special tokens (Sam-3.0-3 style)
 # -------------------------------
 SPECIAL_TOKENS = {
     "bos": "<|bos|>",
@@ -127,19 +127,18 @@ SPECIAL_TOKENS = {
     "think": "<|think|>",
 }
-# Use GPT-2 tokenizer and add special tokens
 tokenizer = AutoTokenizer.from_pretrained("gpt2")
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
 tokenizer.add_special_tokens({"additional_special_tokens": list(SPECIAL_TOKENS.values())})
-EOT_ID = SPECIAL_TOKENS["eot"]
-EOT_ID = tokenizer.convert_tokens_to_ids(EOT_ID) or tokenizer.eos_token_id
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # -------------------------------
-# 3) Download model weights from Hugging Face Hub
 # -------------------------------
 hf_repo = "Smilyai-labs/Sam-3.0-3"
 weights_filename = "model.safetensors"
@@ -147,11 +146,9 @@ weights_filename = "model.safetensors"
 print(f"Loading model '{hf_repo}' from Hugging Face Hub...")
 try:
-    # Download weights
     weights_path = hf_hub_download(repo_id=hf_repo, filename=weights_filename)
     print(f"✅ Downloaded weights to: {weights_path}")
-    # Verify file size
     if not os.path.exists(weights_path):
         raise FileNotFoundError(f"Downloaded file not found at {weights_path}")
     file_size = os.path.getsize(weights_path)
@@ -160,20 +157,18 @@ try:
 except Exception as e:
     raise RuntimeError(f"❌ Failed to download model weights: {e}")
-# Initialize model with correct vocab size
 cfg = Sam3Config(vocab_size=len(tokenizer))
 model = Sam3(cfg).to(device)
 # Load state dict safely
 print("Loading state dict...")
 try:
-    # Try safe_open first (preferred)
     state_dict = {}
     with safe_open(weights_path, framework="pt", device="cpu") as f:
         for key in f.keys():
             state_dict[key] = f.get_tensor(key)
     print("✅ Loaded via safe_open")
 except Exception as e:
     print(f"⚠️ safe_open failed: {e}. Falling back to torch.load...")
     try:
@@ -182,24 +177,22 @@ except Exception as e:
     except Exception as torch_e:
         raise RuntimeError(f"❌ Could not load model weights: {torch_e}")
-# Filter state_dict to match model keys
 model_state_dict = model.state_dict()
 filtered_state_dict = {k: v for k, v in state_dict.items() if k in model_state_dict}
-# Warn about missing/extra keys
 missing_keys = set(model_state_dict.keys()) - set(filtered_state_dict.keys())
 extra_keys = set(state_dict.keys()) - set(model_state_dict.keys())
 if missing_keys:
-    print(f"⚠️ Missing keys in loaded state dict: {missing_keys}")
 if extra_keys:
-    print(f"⚠️ Extra keys in loaded state dict: {extra_keys}")
 model.load_state_dict(filtered_state_dict, strict=False)
 model.eval()
 print("✅ Model loaded successfully!")
 # -------------------------------
-# 4) Sampling function (unchanged from Sam-3.0-3 code)
 # -------------------------------
 def sample_next_token(
     logits,
@@ -277,12 +270,12 @@ def sample_next_token(
     return next_token.to(device)
 # -------------------------------
-# 5) Gradio Chat UI and API Logic (Updated with truthful, compelling UI)
 # -------------------------------
-SPECIAL_TOKENS_CHAT = {"bos": "<|bos|>", "eot": "<|eot|>", "user": "<|user|>", "assistant": "<|assistant|>", "system": "<|system|>"}
 def predict(message, history):
-    # Construct the chat history with special tokens
     chat_history = []
     for human, assistant in history:
         chat_history.append(f"{SPECIAL_TOKENS_CHAT['user']} {human} {SPECIAL_TOKENS_CHAT['eot']}")
@@ -291,44 +284,85 @@ def predict(message, history):
     chat_history.append(f"{SPECIAL_TOKENS_CHAT['user']} {message} {SPECIAL_TOKENS_CHAT['eot']}")
-    system_prompt = "You are Sam-3, an advanced reasoning AI. You think step by step, analyze deeply, and answer with precision. You do not guess — you deduce. Avoid medical or legal advice."
-    prompt = f"{SPECIAL_TOKENS_CHAT['system']} {system_prompt} {SPECIAL_TOKENS_CHAT['eot']}\n" + "\n".join(chat_history) + f"\n{SPECIAL_TOKENS_CHAT['assistant']}"
     inputs = tokenizer(prompt, return_tensors="pt").to(device)
     input_ids = inputs["input_ids"]
     attention_mask = inputs["attention_mask"]
     generated_text = ""
     for _ in range(256):
         with torch.no_grad():
             logits = model(input_ids, attention_mask=attention_mask)
         next_token = sample_next_token(logits, input_ids[0], temperature=0.4, top_k=50, top_p=0.9, repetition_penalty=1.1)
         token_id = int(next_token.squeeze().item())
-        token_str = tokenizer.decode([token_id], skip_special_tokens=True)
         input_ids = torch.cat([input_ids, next_token], dim=1)
         attention_mask = torch.cat([attention_mask, torch.ones((attention_mask.size(0), 1), device=device, dtype=attention_mask.dtype)], dim=1)
-        generated_text += token_str
-        yield generated_text
-        if token_id == EOT_ID:
             break
-# Gradio Interface — Now Truthfully Representing the Model’s Capabilities
 demo = gr.ChatInterface(
     fn=predict,
     title="🌟 Sam-3: The Reasoning AI",
     description="""
-    Sam-3 is not just a language model — it **thinks before it speaks**.
-    Built with deep architectural integrity, it analyzes problems step-by-step, uncovers hidden patterns, and delivers precise, logical answers.
-    No fluff. No guessing. Just reasoning.
-    Try asking it:
-    → “If I have 3 apples and give away half of them, then buy 5 more, how many do I have?”
     → “Explain quantum entanglement like I’m 10.”
-    → “What’s the flaw in this argument: ‘All birds fly; penguins are birds; therefore penguins can fly’?”
     """,
     theme=gr.themes.Soft(
         primary_hue="indigo",
@@ -338,6 +372,10 @@ demo = gr.ChatInterface(
         label="Sam-3 🤔",
         bubble_full_width=False,
         height=600,
     ),
     examples=[
         "What is the capital of France?",
@@ -345,6 +383,7 @@ demo = gr.ChatInterface(
         "If a train leaves at 2 PM going 60 mph, and another leaves 30 minutes later at 80 mph, when does the second catch up?",
         "What are the ethical implications of AI making medical diagnoses?"
     ],
     cache_examples=False
 ).launch(
     show_api=True

 # -------------------------------
 # app.py
 #
+# Sam-3: The Reasoning AI — Now Showing Its Thought Process!
+# Powered by Smilyai-labs/Sam-3.0-3. Trained to think before speaking.
 # -------------------------------
 import math
 from huggingface_hub import hf_hub_download
 # -------------------------------
+# 1) Sam-3.0-3 Architecture
 # -------------------------------
 @dataclass
 class Sam3Config:
         return self.lm_head(x)
 # -------------------------------
+# 2) Load Tokenizer & Special Tokens
 # -------------------------------
 SPECIAL_TOKENS = {
     "bos": "<|bos|>",
     "think": "<|think|>",
 }
 tokenizer = AutoTokenizer.from_pretrained("gpt2")
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
 tokenizer.add_special_tokens({"additional_special_tokens": list(SPECIAL_TOKENS.values())})
+EOT_ID = tokenizer.convert_tokens_to_ids("<|eot|>") or tokenizer.eos_token_id
+THINK_ID = tokenizer.convert_tokens_to_ids("<|think|>")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # -------------------------------
+# 3) Download Model Weights from Hugging Face Hub
 # -------------------------------
 hf_repo = "Smilyai-labs/Sam-3.0-3"
 weights_filename = "model.safetensors"
 print(f"Loading model '{hf_repo}' from Hugging Face Hub...")
 try:
     weights_path = hf_hub_download(repo_id=hf_repo, filename=weights_filename)
     print(f"✅ Downloaded weights to: {weights_path}")
     if not os.path.exists(weights_path):
         raise FileNotFoundError(f"Downloaded file not found at {weights_path}")
     file_size = os.path.getsize(weights_path)
 except Exception as e:
     raise RuntimeError(f"❌ Failed to download model weights: {e}")
+# Initialize model
 cfg = Sam3Config(vocab_size=len(tokenizer))
 model = Sam3(cfg).to(device)
 # Load state dict safely
 print("Loading state dict...")
 try:
     state_dict = {}
     with safe_open(weights_path, framework="pt", device="cpu") as f:
         for key in f.keys():
             state_dict[key] = f.get_tensor(key)
     print("✅ Loaded via safe_open")
 except Exception as e:
     print(f"⚠️ safe_open failed: {e}. Falling back to torch.load...")
     try:
     except Exception as torch_e:
         raise RuntimeError(f"❌ Could not load model weights: {torch_e}")
+# Filter and load
 model_state_dict = model.state_dict()
 filtered_state_dict = {k: v for k, v in state_dict.items() if k in model_state_dict}
 missing_keys = set(model_state_dict.keys()) - set(filtered_state_dict.keys())
 extra_keys = set(state_dict.keys()) - set(model_state_dict.keys())
 if missing_keys:
+    print(f"⚠️ Missing keys: {missing_keys}")
 if extra_keys:
+    print(f"⚠️ Extra keys: {extra_keys}")
 model.load_state_dict(filtered_state_dict, strict=False)
 model.eval()
 print("✅ Model loaded successfully!")
 # -------------------------------
+# 4) Sampling Function (Unchanged)
 # -------------------------------
 def sample_next_token(
     logits,
     return next_token.to(device)
 # -------------------------------
+# 5) Gradio Chat Interface — WITH STYLED THINKING STEPS
 # -------------------------------
+SPECIAL_TOKENS_CHAT = {"bos": "<|bos|>", "eot": "<|eot|>", "user": "<|user|>", "assistant": "<|assistant|>", "system": "<|system|>", "think": "<|think|>"}
 def predict(message, history):
+    # Build prompt with <|think|> to trigger internal reasoning
     chat_history = []
     for human, assistant in history:
         chat_history.append(f"{SPECIAL_TOKENS_CHAT['user']} {human} {SPECIAL_TOKENS_CHAT['eot']}")
     chat_history.append(f"{SPECIAL_TOKENS_CHAT['user']} {message} {SPECIAL_TOKENS_CHAT['eot']}")
+    system_prompt = "You are Sam-3, an advanced reasoning AI. You think step-by-step, analyze deeply, and respond with precision. You do not guess — you deduce. Avoid medical or legal advice."
+    prompt = f"{SPECIAL_TOKENS_CHAT['system']} {system_prompt} {SPECIAL_TOKENS_CHAT['eot']}\n" + "\n".join(chat_history) + f"\n{SPECIAL_TOKENS_CHAT['assistant']} {SPECIAL_TOKENS_CHAT['think']}"
     inputs = tokenizer(prompt, return_tensors="pt").to(device)
     input_ids = inputs["input_ids"]
     attention_mask = inputs["attention_mask"]
     generated_text = ""
+    thinking_mode = False
+    thinking_buffer = ""
     for _ in range(256):
         with torch.no_grad():
             logits = model(input_ids, attention_mask=attention_mask)
         next_token = sample_next_token(logits, input_ids[0], temperature=0.4, top_k=50, top_p=0.9, repetition_penalty=1.1)
         token_id = int(next_token.squeeze().item())
+        token_str = tokenizer.decode([token_id], skip_special_tokens=False)  # Keep special tokens!
         input_ids = torch.cat([input_ids, next_token], dim=1)
         attention_mask = torch.cat([attention_mask, torch.ones((attention_mask.size(0), 1), device=device, dtype=attention_mask.dtype)], dim=1)
+        # Detect if we're entering/exiting thinking mode
+        if not thinking_mode and token_str == "<|think|>":
+            thinking_mode = True
+            thinking_buffer = ""  # Start capturing thoughts
+            continue  # Don't yield <|think|> itself
+        if thinking_mode:
+            if token_str == "<|eot|>":
+                # End of thought — now yield the full thinking block
+                thinking_buffer = thinking_buffer.strip()
+                if thinking_buffer:
+                    # Yield as styled markdown block
+                    yield f"<div style='background-color:#f8f9fa; padding:12px; border-left:4px solid #ccc; border-radius:0 8px 8px 0; margin:10px 0; font-style:italic; color:#555;'>💡 Thinking: {thinking_buffer}</div>"
+                thinking_mode = False
+                continue
+            else:
+                thinking_buffer += token_str
+                continue  # Don't yield yet — buffer until <|eot|>
+        # Normal response output
+        if not thinking_mode:
+            generated_text += token_str
+            yield generated_text
+        # Stop on final EOT
+        if token_id == EOT_ID and not thinking_mode:
             break
+# Custom CSS for styling thinking blocks
+CSS = """
+.gradio-container .message-bubble {
+    border-radius: 12px !important;
+}
+.gradio-container .message-bubble.user {
+    background-color: #1f7bff !important;
+    color: white !important;
+}
+.gradio-container .message-bubble.assistant {
+    background-color: #e9ecef !important;
+    color: #212529 !important;
+}
+"""
+# Gradio Interface
 demo = gr.ChatInterface(
     fn=predict,
     title="🌟 Sam-3: The Reasoning AI",
     description="""
+    Sam-3 doesn’t just answer — it **thinks first**.
+    Watch its internal reasoning unfold in real time — step by step, clearly shown.
+    No guessing. No fluff. Just pure deduction.
+    Try asking:
+    → “Why does a mirror reverse left and right but not up and down?”
+    → “If I have 3 apples and give away half, then buy 5 more, how many do I have?”
     → “Explain quantum entanglement like I’m 10.”
+    → “What’s wrong with this argument: ‘All birds fly; penguins are birds; therefore penguins can fly’?”
     """,
     theme=gr.themes.Soft(
         primary_hue="indigo",
         label="Sam-3 🤔",
         bubble_full_width=False,
         height=600,
+        avatar_images=(
+            "https://huggingface.co/datasets/huggingface/branding/resolve/main/avatar-bot.jpg",
+            "https://huggingface.co/datasets/huggingface/branding/resolve/main/avatar-user.jpg"
+        )
     ),
     examples=[
         "What is the capital of France?",
         "If a train leaves at 2 PM going 60 mph, and another leaves 30 minutes later at 80 mph, when does the second catch up?",
         "What are the ethical implications of AI making medical diagnoses?"
     ],
+    css=CSS,
     cache_examples=False
 ).launch(
     show_api=True