Tonic commited on
Commit
a3113ce
·
1 Parent(s): f3832f5

fix local model typo

Browse files
Files changed (1) hide show
  1. app.py +4 -23
app.py CHANGED
@@ -15,8 +15,7 @@ logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger(__name__)
16
 
17
  # Model configuration
18
- MAIN_MODEL_ID = "Tonic/petite-elle-L-aime-3-sft" # Main repo for config and chat template
19
- LOCAL_MODEL_PATH = "./int4" # Local int4 weights
20
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
21
 
22
  # Global variables for model and tokenizer
@@ -118,24 +117,6 @@ def get_fallback_chat_template():
118
  {%- endif -%}
119
  {%- endif -%}"""
120
 
121
- def check_local_model():
122
- """Check if local int4 model files exist"""
123
- required_files = [
124
- "config.json",
125
- "pytorch_model.bin",
126
- "tokenizer.json",
127
- "tokenizer_config.json"
128
- ]
129
-
130
- for file in required_files:
131
- file_path = os.path.join(LOCAL_MODEL_PATH, file)
132
- if not os.path.exists(file_path):
133
- logger.warning(f"Missing required file: {file_path}")
134
- return False
135
-
136
- logger.info("All required model files found locally")
137
- return True
138
-
139
  def load_model():
140
  """Load the model and tokenizer"""
141
  global model, tokenizer
@@ -157,9 +138,9 @@ def load_model():
157
  logger.info("Fallback chat template set successfully")
158
 
159
  # Load the int4 model from local path
160
- logger.info(f"Loading int4 model from {LOCAL_MODEL_PATH}")
161
  model = AutoModelForCausalLM.from_pretrained(
162
- LOCAL_MODEL_PATH,
163
  subfolder="int4",
164
  device_map="auto" if DEVICE == "cuda" else "cpu",
165
  torch_dtype=torch.bfloat16,
@@ -200,7 +181,7 @@ def create_prompt(system_message, user_message, enable_thinking=True):
200
 
201
  # Add /no_think to the end of prompt when thinking is disabled
202
  if not enable_thinking:
203
- prompt += " /no_think"
204
 
205
  return prompt
206
 
 
15
  logger = logging.getLogger(__name__)
16
 
17
  # Model configuration
18
+ MAIN_MODEL_ID = "Tonic/petite-elle-L-aime-3-sft"
 
19
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
20
 
21
  # Global variables for model and tokenizer
 
117
  {%- endif -%}
118
  {%- endif -%}"""
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  def load_model():
121
  """Load the model and tokenizer"""
122
  global model, tokenizer
 
138
  logger.info("Fallback chat template set successfully")
139
 
140
  # Load the int4 model from local path
141
+ logger.info(f"Loading int4 model from {MAIN_MODEL_ID}")
142
  model = AutoModelForCausalLM.from_pretrained(
143
+ MAIN_MODEL_ID,
144
  subfolder="int4",
145
  device_map="auto" if DEVICE == "cuda" else "cpu",
146
  torch_dtype=torch.bfloat16,
 
181
 
182
  # Add /no_think to the end of prompt when thinking is disabled
183
  if not enable_thinking:
184
+ prompt += " /no_think"
185
 
186
  return prompt
187