Spaces:

Tonic
/

Petite-LLM-3

Running on Zero

App Files Files Community

Tonic commited on Jul 29

Commit

9d0240c

1 Parent(s): 384c439

attempts to add chat template

Browse files

Files changed (2) hide show

app.py +99 -56
requirements.txt +2 -2

app.py CHANGED Viewed

@@ -53,6 +53,70 @@ joinus = """
 4. Use advanced settings for fine-tuning
 """
 def check_local_model():
     """Check if local int4 model files exist"""
     required_files = [
@@ -74,20 +138,42 @@ def check_local_model():
 def load_model():
     """Load the model and tokenizer"""
     global model, tokenizer
-            # logger.info(f"Loading tokenizer from {LOCAL_MODEL_PATH}")
-    tokenizer = AutoTokenizer.from_pretrained(MAIN_MODEL_ID, subfolder="int4")
-            # logger.info(f"Loading int4 model from {LOCAL_MODEL_PATH}")
-    model = AutoModelForCausalLM.from_pretrained(
-                MAIN_MODEL_ID,
-                subfolder="int4",
-                device_map="auto" if DEVICE == "cuda" else "cpu",
-                torch_dtype=torch.bfloat16,
-                trust_remote_code=True
-            )
-    if tokenizer.pad_token_id is None:
-        tokenizer.pad_token_id = tokenizer.eos_token_id
         logger.info("Model loaded successfully")
         return True
 def create_prompt(system_message, user_message, enable_thinking=True):
@@ -283,49 +369,6 @@ with gr.Blocks() as demo:
     )
 if __name__ == "__main__":
-    # # Advanced model download and verification
-    # logger.info("Starting advanced model download and verification process...")
-    # try:
-    #     from download_model import main as download_main, check_model_files, verify_model_integrity
-    #     # Check if model files already exist and are valid
-    #     if check_model_files():
-    #         logger.info("Model files found, verifying integrity...")
-    #         if verify_model_integrity():
-    #             logger.info("✅ Model files verified successfully - no download needed")
-    #         else:
-    #             logger.warning("⚠️ Model files exist but failed integrity check, re-downloading...")
-    #             download_success = download_main()
-    #             if not download_success:
-    #                 logger.error("❌ Model download failed")
-    #                 sys.exit(1)
-    #     else:
-    #         logger.info("📥 Model files not found, downloading...")
-    #         download_success = download_main()
-    #         if download_success:
-    #             logger.info("✅ Model download and verification completed successfully")
-    #         else:
-    #             logger.error("❌ Model download failed")
-    #             sys.exit(1)
-    # except ImportError as e:
-    #     logger.error(f"❌ Error importing download_model: {e}")
-    #     logger.info("🔄 Continuing with direct model loading...")
-    # except Exception as e:
-    #     logger.error(f"❌ Error during model download process: {e}")
-    #     logger.info("🔄 Continuing with direct model loading...")
-    # # Load model with enhanced error handling
-    # logger.info("🔄 Loading model...")
-    # try:
-    #     if not load_model():
-    #         logger.error("❌ Failed to load model. Please check the logs above.")
-    #         sys.exit(1)
-    #     logger.info("✅ Model loaded successfully")
-    # except Exception as e:
-    #     logger.error(f"❌ Error loading model: {e}")
-    #     sys.exit(1)
-    # logger.info("🚀 Starting Gradio application...")
     demo.queue()
     demo.launch(ssr_mode=False, mcp_server=True)

 4. Use advanced settings for fine-tuning
 """
+def download_chat_template():
+    """Download the chat template from the main repository"""
+    try:
+        chat_template_url = f"https://huggingface.co/{MAIN_MODEL_ID}/raw/main/chat_template.jinja"
+        logger.info(f"Downloading chat template from {chat_template_url}")
+        response = requests.get(chat_template_url, timeout=30)
+        response.raise_for_status()
+        chat_template_content = response.text
+        logger.info("Chat template downloaded successfully")
+        return chat_template_content
+    except requests.exceptions.RequestException as e:
+        logger.error(f"Error downloading chat template: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"Unexpected error downloading chat template: {e}")
+        return None
+def get_fallback_chat_template():
+    """Return a fallback chat template if download fails"""
+    return """{# ───── defaults ───── #}
+{%- if enable_thinking is not defined -%}
+{%- set enable_thinking = true -%}
+{%- endif -%}
+{# ───── reasoning mode ───── #}
+{%- if enable_thinking -%}
+{%- set reasoning_mode = "/think" -%}
+{%- else -%}
+{%- set reasoning_mode = "/no_think" -%}
+{%- endif -%}
+{# ───── header (system message) ───── #}
+{{- "<|im_start|>system\\n" -}}
+{{- system_message | trim -}}
+{{- "<|im_end|>\\n" -}}
+{# ───── conversation history ───── #}
+{%- for message in messages -%}
+{%- set content = message.content | trim -%}
+{%- if message.role == "user" -%}
+{{ "<|im_start|>user\\n" + content + "<|im_end|>\\n" }}
+{%- elif message.role == "assistant" -%}
+{%- if content.startswith("<think>") and content.endswith("</think>") -%}
+{{ "<|im_start|>assistant\\n" + content + "<|im_end|>\\n" }}
+{%- else -%}
+{{ "<|im_start|>assistant\\n" + "<think>\\n\\n</think>\\n" + content.lstrip("\\n") + "<|im_end|>\\n" }}
+{%- endif -%}
+{%- elif message.role == "tool" -%}
+{{ "<|im_start|>" + "user\\n" + content + "<|im_end|>\\n" }}
+{%- endif -%}
+{%- endfor -%}
+{# ───── generation prompt ───── #}
+{%- if add_generation_prompt -%}
+{%- if reasoning_mode == "/think" -%}
+{{ "<|im_start|>assistant\\n" }}
+{%- else -%}
+{{ "<|im_start|>assistant\\n" + "<think>\\n\\n</think>\\n" }}
+{%- endif -%}
+{%- endif -%}"""
 def check_local_model():
     """Check if local int4 model files exist"""
     required_files = [
 def load_model():
     """Load the model and tokenizer"""
     global model, tokenizer
+    try:
+        # Load tokenizer from main repository to get the base configuration
+        logger.info(f"Loading tokenizer from {MAIN_MODEL_ID}")
+        tokenizer = AutoTokenizer.from_pretrained(MAIN_MODEL_ID, subfolder="int4")
+        # Download and set the chat template
+        chat_template = download_chat_template()
+        if chat_template:
+            tokenizer.chat_template = chat_template
+            logger.info("Chat template downloaded and set successfully")
+        else:
+            # Use fallback chat template
+            logger.warning("Failed to download chat template, using fallback")
+            tokenizer.chat_template = get_fallback_chat_template()
+            logger.info("Fallback chat template set successfully")
+        # Load the int4 model from local path
+        logger.info(f"Loading int4 model from {LOCAL_MODEL_PATH}")
+        model = AutoModelForCausalLM.from_pretrained(
+            LOCAL_MODEL_PATH,
+            subfolder="int4",
+            device_map="auto" if DEVICE == "cuda" else "cpu",
+            torch_dtype=torch.bfloat16,
+            trust_remote_code=True
+        )
+        if tokenizer.pad_token_id is None:
+            tokenizer.pad_token_id = tokenizer.eos_token_id
         logger.info("Model loaded successfully")
         return True
+    except Exception as e:
+        logger.error(f"Error loading model: {e}")
+        return False
 def create_prompt(system_message, user_message, enable_thinking=True):
     )
 if __name__ == "__main__":
     demo.queue()
     demo.launch(ssr_mode=False, mcp_server=True)

requirements.txt CHANGED Viewed

@@ -7,5 +7,5 @@ safetensors>=0.4.0
 tokenizers>=0.21.2
 pyyaml>=6.0
 psutil>=5.9.0
-huggingface_hub>=0.20.0
-tqdm>=4.64.0

 tokenizers>=0.21.2
 pyyaml>=6.0
 psutil>=5.9.0
+tqdm>=4.64.0
+requests>=2.31.0