Spestly commited on
Commit
2410ca2
·
verified ·
1 Parent(s): 8d4bde5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -58
app.py CHANGED
@@ -8,12 +8,15 @@ from typing import List, Dict, Optional
8
  model_cache = {}
9
  tokenizer_cache = {}
10
 
11
- # Available models (only Daedalus)
12
  AVAILABLE_MODELS = {
13
  "Daedalus-1-2B": "NoemaResearch/Daedalus-1-2B",
14
  "Daedalus-1-8B": "NoemaResearch/Daedalus-1-8B",
15
  }
16
 
 
 
 
17
  @spaces.GPU
18
  def initialize_model(model_name):
19
  global model_cache, tokenizer_cache
@@ -116,58 +119,76 @@ def generate_response(message, history, model_name, max_length=512, temperature=
116
  # Format the conversation using the chat template
117
  formatted_prompt = format_conversation_with_template(messages, tokenizer)
118
 
119
- # CRITICAL: Proper stop tokens to prevent repetition
120
- stop_tokens = [
121
- "<[end▁of▁sentence]>", # EOS token
122
- "<[begin▁of▁sentence]>", # BOS token (shouldn't appear mid-generation)
123
- "user\n", # Stop if model tries to continue conversation
124
- "system\n", # Stop if model tries to add system messages
125
- "\nuser", # Alternative format
126
- "\nsystem" # Alternative format
127
- ]
128
-
129
- response = model_pipe(
130
- formatted_prompt,
131
- max_new_tokens=max_length,
132
- temperature=temperature,
133
- top_p=top_p,
134
- do_sample=True,
135
- pad_token_id=1, # PAD token ID from your config
136
- eos_token_id=2, # EOS token ID from your config
137
- bos_token_id=0, # BOS token ID from your config
138
- return_full_text=False,
139
- # Add repetition penalty to reduce loops
140
- repetition_penalty=1.1,
141
- # Stop on these strings
142
- stop_sequence=stop_tokens[0] # Primary stop token
143
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
  if isinstance(response, list) and len(response) > 0:
146
  generated_text = response[0]['generated_text']
147
  else:
148
  generated_text = str(response)
149
 
150
- # Clean up the response - remove stop tokens and formatting
151
  assistant_response = str(generated_text).strip()
152
 
153
- # Remove stop tokens if they appear in output
154
- for stop_token in stop_tokens:
155
- if stop_token in assistant_response:
156
- assistant_response = assistant_response.split(stop_token)[0].strip()
157
-
158
- # Remove any residual role formatting
159
- if assistant_response.startswith("assistant\n"):
160
- assistant_response = assistant_response[10:].strip()
161
-
162
- # Additional cleanup for common repetition patterns
163
- lines = assistant_response.split('\n')
164
- cleaned_lines = []
165
- for line in lines:
166
- # Skip empty lines or lines that look like role markers
167
- if line.strip() and not line.strip().startswith(('user', 'assistant', 'system')):
168
- cleaned_lines.append(line)
169
-
170
- assistant_response = '\n'.join(cleaned_lines).strip()
 
 
 
 
 
171
 
172
  return assistant_response if assistant_response else "I apologize, but I couldn't generate a proper response. Please try again."
173
 
@@ -177,14 +198,19 @@ def generate_response(message, history, model_name, max_length=512, temperature=
177
  def create_interface():
178
  with gr.Blocks(title="Daedalus-1-8B Chat", theme=gr.themes.Base(primary_hue="green")) as demo:
179
  gr.Markdown("""
180
- # 🟢 Daedalus-1-8B Chat Interface
181
-
182
- Chat with **Daedalus-1-8B** by Noema Research.
183
 
184
- **Model:**
185
- - Daedalus-1-8B (8 billion parameters)
186
  """)
187
 
 
 
 
 
 
 
 
 
188
  chatbot = gr.Chatbot(
189
  height=400,
190
  placeholder="Start chatting with Daedalus-1-8B...",
@@ -230,13 +256,13 @@ def create_interface():
230
  def user_message(message, history):
231
  return "", history + [[message, None]]
232
 
233
- def bot_response(history, max_len, temp, top_p):
234
  if history:
235
  user_message = history[-1][0]
236
  bot_message = generate_response(
237
  user_message,
238
  history[:-1],
239
- "Daedalus-1-8B",
240
  max_len,
241
  temp,
242
  top_p
@@ -245,11 +271,11 @@ def create_interface():
245
  return history
246
 
247
  msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
248
- bot_response, [chatbot, max_length, temperature, top_p], chatbot
249
  )
250
 
251
  submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
252
- bot_response, [chatbot, max_length, temperature, top_p], chatbot
253
  )
254
 
255
  clear_btn.click(lambda: None, None, chatbot, queue=False)
@@ -257,11 +283,14 @@ def create_interface():
257
  gr.Markdown("""
258
  ---
259
 
260
- ### About Daedalus-1-8B
261
- **Daedalus-1-8B** is a state-of-the-art code reasoning model by Noema Research,
262
- fine-tuned for structured outputs, debugging, and long-context reasoning (up to ~64K tokens).
 
 
 
263
 
264
- Optimized for:
265
  - Conversational AI
266
  - Code generation & debugging
267
  - Structured JSON/function outputs
 
8
  model_cache = {}
9
  tokenizer_cache = {}
10
 
11
+ # Available models
12
  AVAILABLE_MODELS = {
13
  "Daedalus-1-2B": "NoemaResearch/Daedalus-1-2B",
14
  "Daedalus-1-8B": "NoemaResearch/Daedalus-1-8B",
15
  }
16
 
17
+ # Models that need special token handling for repetition issues
18
+ MODELS_NEEDING_SPECIAL_HANDLING = {"Daedalus-1-8B"}
19
+
20
  @spaces.GPU
21
  def initialize_model(model_name):
22
  global model_cache, tokenizer_cache
 
119
  # Format the conversation using the chat template
120
  formatted_prompt = format_conversation_with_template(messages, tokenizer)
121
 
122
+ # Different generation parameters based on model
123
+ if model_name in MODELS_NEEDING_SPECIAL_HANDLING:
124
+ # 8B model needs special token handling to prevent repetition
125
+ stop_tokens = [
126
+ "<[end▁of▁sentence]>", # EOS token
127
+ "<[begin▁of▁sentence]>", # BOS token (shouldn't appear mid-generation)
128
+ "user\n", # Stop if model tries to continue conversation
129
+ "system\n", # Stop if model tries to add system messages
130
+ "\nuser", # Alternative format
131
+ "\nsystem" # Alternative format
132
+ ]
133
+
134
+ response = model_pipe(
135
+ formatted_prompt,
136
+ max_new_tokens=max_length,
137
+ temperature=temperature,
138
+ top_p=top_p,
139
+ do_sample=True,
140
+ pad_token_id=1, # PAD token ID from config
141
+ eos_token_id=2, # EOS token ID from config
142
+ bos_token_id=0, # BOS token ID from config
143
+ return_full_text=False,
144
+ repetition_penalty=1.1, # Reduce loops
145
+ stop_sequence=stop_tokens[0] # Primary stop token
146
+ )
147
+ else:
148
+ # 2B model - standard generation without special handling
149
+ response = model_pipe(
150
+ formatted_prompt,
151
+ max_new_tokens=max_length,
152
+ temperature=temperature,
153
+ top_p=top_p,
154
+ do_sample=True,
155
+ pad_token_id=tokenizer.pad_token_id,
156
+ eos_token_id=tokenizer.eos_token_id,
157
+ return_full_text=False,
158
+ repetition_penalty=1.05 # Light repetition penalty
159
+ )
160
 
161
  if isinstance(response, list) and len(response) > 0:
162
  generated_text = response[0]['generated_text']
163
  else:
164
  generated_text = str(response)
165
 
166
+ # Clean up the response
167
  assistant_response = str(generated_text).strip()
168
 
169
+ # Apply different cleanup based on model
170
+ if model_name in MODELS_NEEDING_SPECIAL_HANDLING:
171
+ # More aggressive cleanup for 8B model
172
+ stop_tokens = [
173
+ "<[end▁of▁sentence]>", "<[begin▁of▁sentence]>",
174
+ "user\n", "system\n", "\nuser", "\nsystem"
175
+ ]
176
+
177
+ for stop_token in stop_tokens:
178
+ if stop_token in assistant_response:
179
+ assistant_response = assistant_response.split(stop_token)[0].strip()
180
+
181
+ # Additional cleanup for common repetition patterns
182
+ lines = assistant_response.split('\n')
183
+ cleaned_lines = []
184
+ for line in lines:
185
+ if line.strip() and not line.strip().startswith(('user', 'assistant', 'system')):
186
+ cleaned_lines.append(line)
187
+ assistant_response = '\n'.join(cleaned_lines).strip()
188
+ else:
189
+ # Standard cleanup for 2B model
190
+ if assistant_response.startswith("assistant\n"):
191
+ assistant_response = assistant_response[10:].strip()
192
 
193
  return assistant_response if assistant_response else "I apologize, but I couldn't generate a proper response. Please try again."
194
 
 
198
  def create_interface():
199
  with gr.Blocks(title="Daedalus-1-8B Chat", theme=gr.themes.Base(primary_hue="green")) as demo:
200
  gr.Markdown("""
201
+ # 🟢 Daedalus Chat Interface
 
 
202
 
203
+ Chat with **Daedalus models** by Noema Research.
 
204
  """)
205
 
206
+ # Model selection dropdown
207
+ model_dropdown = gr.Dropdown(
208
+ choices=list(AVAILABLE_MODELS.keys()),
209
+ value="Daedalus-1-2B", # Default to 2B model
210
+ label="Select Model",
211
+ info="Choose between Daedalus-1-2B (faster) or Daedalus-1-8B (more capable)"
212
+ )
213
+
214
  chatbot = gr.Chatbot(
215
  height=400,
216
  placeholder="Start chatting with Daedalus-1-8B...",
 
256
  def user_message(message, history):
257
  return "", history + [[message, None]]
258
 
259
+ def bot_response(history, selected_model, max_len, temp, top_p):
260
  if history:
261
  user_message = history[-1][0]
262
  bot_message = generate_response(
263
  user_message,
264
  history[:-1],
265
+ selected_model, # Use selected model
266
  max_len,
267
  temp,
268
  top_p
 
271
  return history
272
 
273
  msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
274
+ bot_response, [chatbot, model_dropdown, max_length, temperature, top_p], chatbot
275
  )
276
 
277
  submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
278
+ bot_response, [chatbot, model_dropdown, max_length, temperature, top_p], chatbot
279
  )
280
 
281
  clear_btn.click(lambda: None, None, chatbot, queue=False)
 
283
  gr.Markdown("""
284
  ---
285
 
286
+ ### About Daedalus Models
287
+
288
+ **Daedalus-1-2B:** Faster, lightweight model for quick responses and basic coding tasks.
289
+
290
+ **Daedalus-1-8B:** More capable model with advanced reasoning, fine-tuned for structured outputs,
291
+ debugging, and long-context reasoning (up to ~64K tokens).
292
 
293
+ Both models are optimized for:
294
  - Conversational AI
295
  - Code generation & debugging
296
  - Structured JSON/function outputs