Spaces:
Running
on
Zero
Running
on
Zero
fix local model typo
Browse files
app.py
CHANGED
|
@@ -15,8 +15,7 @@ logging.basicConfig(level=logging.INFO)
|
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
|
| 17 |
# Model configuration
|
| 18 |
-
MAIN_MODEL_ID = "Tonic/petite-elle-L-aime-3-sft"
|
| 19 |
-
LOCAL_MODEL_PATH = "./int4" # Local int4 weights
|
| 20 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 21 |
|
| 22 |
# Global variables for model and tokenizer
|
|
@@ -118,24 +117,6 @@ def get_fallback_chat_template():
|
|
| 118 |
{%- endif -%}
|
| 119 |
{%- endif -%}"""
|
| 120 |
|
| 121 |
-
def check_local_model():
|
| 122 |
-
"""Check if local int4 model files exist"""
|
| 123 |
-
required_files = [
|
| 124 |
-
"config.json",
|
| 125 |
-
"pytorch_model.bin",
|
| 126 |
-
"tokenizer.json",
|
| 127 |
-
"tokenizer_config.json"
|
| 128 |
-
]
|
| 129 |
-
|
| 130 |
-
for file in required_files:
|
| 131 |
-
file_path = os.path.join(LOCAL_MODEL_PATH, file)
|
| 132 |
-
if not os.path.exists(file_path):
|
| 133 |
-
logger.warning(f"Missing required file: {file_path}")
|
| 134 |
-
return False
|
| 135 |
-
|
| 136 |
-
logger.info("All required model files found locally")
|
| 137 |
-
return True
|
| 138 |
-
|
| 139 |
def load_model():
|
| 140 |
"""Load the model and tokenizer"""
|
| 141 |
global model, tokenizer
|
|
@@ -157,9 +138,9 @@ def load_model():
|
|
| 157 |
logger.info("Fallback chat template set successfully")
|
| 158 |
|
| 159 |
# Load the int4 model from local path
|
| 160 |
-
logger.info(f"Loading int4 model from {
|
| 161 |
model = AutoModelForCausalLM.from_pretrained(
|
| 162 |
-
|
| 163 |
subfolder="int4",
|
| 164 |
device_map="auto" if DEVICE == "cuda" else "cpu",
|
| 165 |
torch_dtype=torch.bfloat16,
|
|
@@ -200,7 +181,7 @@ def create_prompt(system_message, user_message, enable_thinking=True):
|
|
| 200 |
|
| 201 |
# Add /no_think to the end of prompt when thinking is disabled
|
| 202 |
if not enable_thinking:
|
| 203 |
-
prompt += "
|
| 204 |
|
| 205 |
return prompt
|
| 206 |
|
|
|
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
|
| 17 |
# Model configuration
|
| 18 |
+
MAIN_MODEL_ID = "Tonic/petite-elle-L-aime-3-sft"
|
|
|
|
| 19 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 20 |
|
| 21 |
# Global variables for model and tokenizer
|
|
|
|
| 117 |
{%- endif -%}
|
| 118 |
{%- endif -%}"""
|
| 119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
def load_model():
|
| 121 |
"""Load the model and tokenizer"""
|
| 122 |
global model, tokenizer
|
|
|
|
| 138 |
logger.info("Fallback chat template set successfully")
|
| 139 |
|
| 140 |
# Load the int4 model from local path
|
| 141 |
+
logger.info(f"Loading int4 model from {MAIN_MODEL_ID}")
|
| 142 |
model = AutoModelForCausalLM.from_pretrained(
|
| 143 |
+
MAIN_MODEL_ID,
|
| 144 |
subfolder="int4",
|
| 145 |
device_map="auto" if DEVICE == "cuda" else "cpu",
|
| 146 |
torch_dtype=torch.bfloat16,
|
|
|
|
| 181 |
|
| 182 |
# Add /no_think to the end of prompt when thinking is disabled
|
| 183 |
if not enable_thinking:
|
| 184 |
+
prompt += " /no_think"
|
| 185 |
|
| 186 |
return prompt
|
| 187 |
|