Spaces:

Tonic
/

Petite-LLM-3

Running on Zero

App Files Files Community

Tonic commited on Jul 29

Commit

a3113ce

1 Parent(s): f3832f5

fix local model typo

Browse files

Files changed (1) hide show

app.py +4 -23

app.py CHANGED Viewed

@@ -15,8 +15,7 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Model configuration
-MAIN_MODEL_ID = "Tonic/petite-elle-L-aime-3-sft"  # Main repo for config and chat template
-LOCAL_MODEL_PATH = "./int4"  # Local int4 weights
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 # Global variables for model and tokenizer
@@ -118,24 +117,6 @@ def get_fallback_chat_template():
 {%- endif -%}
 {%- endif -%}"""
-def check_local_model():
-    """Check if local int4 model files exist"""
-    required_files = [
-        "config.json",
-        "pytorch_model.bin",
-        "tokenizer.json",
-        "tokenizer_config.json"
-    ]
-    for file in required_files:
-        file_path = os.path.join(LOCAL_MODEL_PATH, file)
-        if not os.path.exists(file_path):
-            logger.warning(f"Missing required file: {file_path}")
-            return False
-    logger.info("All required model files found locally")
-    return True
 def load_model():
     """Load the model and tokenizer"""
     global model, tokenizer
@@ -157,9 +138,9 @@ def load_model():
             logger.info("Fallback chat template set successfully")
         # Load the int4 model from local path
-        logger.info(f"Loading int4 model from {LOCAL_MODEL_PATH}")
         model = AutoModelForCausalLM.from_pretrained(
-            LOCAL_MODEL_PATH,
             subfolder="int4",
             device_map="auto" if DEVICE == "cuda" else "cpu",
             torch_dtype=torch.bfloat16,
@@ -200,7 +181,7 @@ def create_prompt(system_message, user_message, enable_thinking=True):
         # Add  /no_think to the end of prompt when thinking is disabled
         if not enable_thinking:
-            prompt += "  /no_think"
         return prompt

 logger = logging.getLogger(__name__)
 # Model configuration
+MAIN_MODEL_ID = "Tonic/petite-elle-L-aime-3-sft"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 # Global variables for model and tokenizer
 {%- endif -%}
 {%- endif -%}"""
 def load_model():
     """Load the model and tokenizer"""
     global model, tokenizer
             logger.info("Fallback chat template set successfully")
         # Load the int4 model from local path
+        logger.info(f"Loading int4 model from {MAIN_MODEL_ID}")
         model = AutoModelForCausalLM.from_pretrained(
+            MAIN_MODEL_ID,
             subfolder="int4",
             device_map="auto" if DEVICE == "cuda" else "cpu",
             torch_dtype=torch.bfloat16,
         # Add  /no_think to the end of prompt when thinking is disabled
         if not enable_thinking:
+            prompt += " /no_think"
         return prompt