Spestly commited on
Commit
330c803
·
verified ·
1 Parent(s): ccb939a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -177
app.py CHANGED
@@ -2,34 +2,16 @@ import gradio as gr
2
  import spaces
3
  from transformers import pipeline
4
  import torch
5
- import re
6
- import json
7
  from typing import List, Dict, Optional
8
 
9
  # Global variable to store pipelines
10
  model_cache = {}
11
 
12
- # Available models
13
  AVAILABLE_MODELS = {
14
  "Daedalus-1-8B": "NoemaResearch/Daedalus-1-8B",
15
  }
16
 
17
- def parse_thinking_tags(text):
18
- """Parse text and extract thinking sections, return clean text and thinking content"""
19
- think_pattern = r'<think>(.*?)</think>'
20
- thinking_blocks = []
21
-
22
- # Extract all thinking blocks
23
- for match in re.finditer(think_pattern, text, re.DOTALL):
24
- thinking_content = match.group(1).strip()
25
- if thinking_content:
26
- thinking_blocks.append(thinking_content)
27
-
28
- # Remove thinking tags from the main text
29
- clean_text = re.sub(think_pattern, '', text, flags=re.DOTALL).strip()
30
-
31
- return clean_text, thinking_blocks
32
-
33
  @spaces.GPU
34
  def initialize_model(model_name):
35
  global model_cache
@@ -49,7 +31,7 @@ def initialize_model(model_name):
49
  device_map="auto",
50
  trust_remote_code=True
51
  )
52
- except Exception as e:
53
  # Fallback to CPU if GPU fails
54
  model_cache[model_id] = pipeline(
55
  "text-generation",
@@ -65,29 +47,22 @@ def initialize_model(model_name):
65
  def generate_response(message, history, model_name, max_length=512, temperature=0.7, top_p=0.9):
66
  """Generate response using the selected model"""
67
 
68
- # Initialize model inside the GPU-decorated function
69
  try:
70
  model_pipe = initialize_model(model_name)
71
  except Exception as e:
72
- return f"Error loading model {model_name}: {str(e)}", []
73
 
74
  # Format the conversation history
75
  messages = []
76
-
77
- # Add conversation history
78
  for user_msg, assistant_msg in history:
79
  messages.append({"role": "user", "content": user_msg})
80
  if assistant_msg:
81
  messages.append({"role": "assistant", "content": assistant_msg})
82
 
83
- # Add current message
84
  messages.append({"role": "user", "content": message})
85
 
86
- # Generate response
87
  try:
88
- # Some models may not support the messages format, so we'll try different approaches
89
  try:
90
- # Try with messages format first
91
  response = model_pipe(
92
  messages,
93
  max_length=max_length,
@@ -98,7 +73,6 @@ def generate_response(message, history, model_name, max_length=512, temperature=
98
  return_full_text=False
99
  )
100
  except:
101
- # Fallback to simple text format
102
  conversation_text = ""
103
  for msg in messages:
104
  if msg["role"] == "user":
@@ -117,131 +91,39 @@ def generate_response(message, history, model_name, max_length=512, temperature=
117
  return_full_text=False
118
  )
119
 
120
- # Extract the generated text
121
  if isinstance(response, list) and len(response) > 0:
122
  generated_text = response[0]['generated_text']
123
  else:
124
  generated_text = str(response)
125
 
126
- # Clean up the response
127
  if isinstance(generated_text, list):
128
  assistant_response = generated_text[-1]['content']
129
  else:
130
- # Remove the prompt and extract assistant response
131
  assistant_response = str(generated_text).strip()
132
  if "Assistant:" in assistant_response:
133
  assistant_response = assistant_response.split("Assistant:")[-1].strip()
134
 
135
- # Parse thinking tags
136
- clean_response, thinking_blocks = parse_thinking_tags(assistant_response)
137
-
138
- return clean_response, thinking_blocks
139
 
140
  except Exception as e:
141
- return f"Error generating response: {str(e)}", []
142
 
143
- @spaces.GPU
144
- def generate(
145
- model: str,
146
- user_input: str,
147
- history: Optional[str] = "",
148
- temperature: float = 0.7,
149
- system_prompt: Optional[str] = "",
150
- max_tokens: int = 512
151
- ):
152
- """
153
- API endpoint for LLM generation
154
-
155
- Args:
156
- model: Model name to use (Daedalus-1-8B)
157
- user_input: Current user message/input
158
- history: JSON string of conversation history in format [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]
159
- temperature: Temperature for generation (0.1-2.0)
160
- system_prompt: System prompt to guide the model
161
- max_tokens: Maximum tokens to generate (1-8192)
162
-
163
- Returns:
164
- Generated response from the model
165
- """
166
-
167
- # Validate model
168
- if model not in AVAILABLE_MODELS:
169
- return f"Error: Model {model} not available. Available models: {list(AVAILABLE_MODELS.keys())}"
170
-
171
- # Initialize model
172
- try:
173
- model_pipe = initialize_model(model)
174
- except Exception as e:
175
- return f"Error loading model {model}: {str(e)}"
176
-
177
- # Parse history if provided and convert to gradio format
178
- gradio_history = []
179
- if history and history.strip():
180
- try:
181
- import json
182
- history_list = json.loads(history)
183
- current_pair = [None, None]
184
- for msg in history_list:
185
- if isinstance(msg, dict) and "role" in msg and "content" in msg:
186
- if msg["role"] == "user":
187
- if current_pair[0] is not None:
188
- gradio_history.append([current_pair[0], current_pair[1]])
189
- current_pair = [msg["content"], None]
190
- elif msg["role"] == "assistant":
191
- current_pair[1] = msg["content"]
192
- if current_pair[0] is not None:
193
- gradio_history.append([current_pair[0], current_pair[1]])
194
- except:
195
- # If history parsing fails, continue without history
196
- pass
197
-
198
- # Add system prompt to user input if provided
199
- final_user_input = user_input
200
- if system_prompt and system_prompt.strip():
201
- final_user_input = f"System: {system_prompt}\n\nUser: {user_input}"
202
-
203
- # Use the generate_response function and return only the clean response
204
- clean_response, thinking_blocks = generate_response(final_user_input, gradio_history, model, max_tokens, temperature, 0.9)
205
- return clean_response
206
-
207
- # Create the Gradio interface
208
  def create_interface():
209
- with gr.Blocks(title="Multi-Model Chat", theme=gr.themes.Soft()) as demo:
210
  gr.Markdown("""
211
- # 🚀 Daedalus-1 Model Chat Interface
212
 
213
- Chat with the Daedalus-1 models by Noema Research. This interface will render thinking processes when the model outputs `<think></think>` tags.
214
 
215
- **Available Models:**
216
  - Daedalus-1-8B (8 billion parameters)
217
  """)
218
 
219
- with gr.Row():
220
- model_selector = gr.Dropdown(
221
- choices=list(AVAILABLE_MODELS.keys()),
222
- value="Daedalus-1-8B",
223
- label="Select Model",
224
- info="Choose which model to use for generation"
225
- )
226
-
227
- with gr.Row():
228
- with gr.Column(scale=2):
229
- chatbot = gr.Chatbot(
230
- height=500,
231
- placeholder="Select a model and start chatting...",
232
- label="Chat"
233
- )
234
-
235
- with gr.Column(scale=1):
236
- thinking_display = gr.Accordion("💭 Thinking Process", open=True, visible=False)
237
- with thinking_display:
238
- thinking_content = gr.Textbox(
239
- label="Model's Thinking",
240
- lines=15,
241
- interactive=False,
242
- show_label=False,
243
- container=False
244
- )
245
 
246
  msg = gr.Textbox(
247
  placeholder="Type your message here...",
@@ -279,70 +161,45 @@ def create_interface():
279
  info="Controls diversity via nucleus sampling"
280
  )
281
 
282
- # Event handlers
283
  def user_message(message, history):
284
  return "", history + [[message, None]]
285
 
286
- def bot_response(history, model_name, max_len, temp, top_p):
287
  if history:
288
  user_message = history[-1][0]
289
- clean_response, thinking_blocks = generate_response(
290
  user_message,
291
  history[:-1],
292
- model_name,
293
  max_len,
294
  temp,
295
  top_p
296
  )
297
-
298
- # Update chat with clean response
299
- history[-1][1] = clean_response
300
-
301
- # Format thinking content for display
302
- thinking_text = ""
303
- if thinking_blocks:
304
- for i, thinking in enumerate(thinking_blocks, 1):
305
- thinking_text += f"=== Thinking Block {i} ===\n\n{thinking}\n\n"
306
-
307
- return history, thinking_text, gr.update(visible=bool(thinking_blocks))
308
-
309
- return history, "", gr.update(visible=False)
310
-
311
- def clear_chat():
312
- return None, "", gr.update(visible=False)
313
-
314
- def model_changed(model_name):
315
- return gr.update(placeholder=f"Chat with {model_name}...")
316
 
317
- # Wire up the events
318
- msg_submit = msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
319
- bot_response, [chatbot, model_selector, max_length, temperature, top_p],
320
- [chatbot, thinking_content, thinking_display]
321
  )
322
 
323
- btn_submit = submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
324
- bot_response, [chatbot, model_selector, max_length, temperature, top_p],
325
- [chatbot, thinking_content, thinking_display]
326
  )
327
 
328
- clear_btn.click(clear_chat, None, [chatbot, thinking_content, thinking_display], queue=False)
329
-
330
- model_selector.change(model_changed, model_selector, chatbot)
331
 
332
  gr.Markdown("""
333
  ---
334
 
335
- ### About the Daedalus-1 Models
336
-
337
- **Daedalus-1-8B**: Daedalus-1-8B is an 8 billion parameter language model for code generation and reasoning, developed by **Noema Research**.
338
- It is a finetuned derivative of [Seed-Coder-8B-Reasoning](https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Reasoning),
339
- with enhancements for instruction following, structured code generation, and improved safety alignment.
340
-
341
-
342
- The model is designed for conversational AI and supports various text generation tasks. When the model uses thinking tags (`<think></think>`),
343
- this interface will show the thinking process in a separate panel on the right.
344
 
345
- This Space uses ZeroGPU for efficient GPU allocation.
 
 
 
 
346
  """)
347
 
348
  return demo
@@ -350,5 +207,4 @@ def create_interface():
350
  # Launch the app
351
  if __name__ == "__main__":
352
  demo = create_interface()
353
- # Enable API and launch
354
- demo.launch(share=True)
 
2
  import spaces
3
  from transformers import pipeline
4
  import torch
 
 
5
  from typing import List, Dict, Optional
6
 
7
  # Global variable to store pipelines
8
  model_cache = {}
9
 
10
+ # Available models (only Daedalus)
11
  AVAILABLE_MODELS = {
12
  "Daedalus-1-8B": "NoemaResearch/Daedalus-1-8B",
13
  }
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  @spaces.GPU
16
  def initialize_model(model_name):
17
  global model_cache
 
31
  device_map="auto",
32
  trust_remote_code=True
33
  )
34
+ except Exception:
35
  # Fallback to CPU if GPU fails
36
  model_cache[model_id] = pipeline(
37
  "text-generation",
 
47
  def generate_response(message, history, model_name, max_length=512, temperature=0.7, top_p=0.9):
48
  """Generate response using the selected model"""
49
 
 
50
  try:
51
  model_pipe = initialize_model(model_name)
52
  except Exception as e:
53
+ return f"Error loading model {model_name}: {str(e)}"
54
 
55
  # Format the conversation history
56
  messages = []
 
 
57
  for user_msg, assistant_msg in history:
58
  messages.append({"role": "user", "content": user_msg})
59
  if assistant_msg:
60
  messages.append({"role": "assistant", "content": assistant_msg})
61
 
 
62
  messages.append({"role": "user", "content": message})
63
 
 
64
  try:
 
65
  try:
 
66
  response = model_pipe(
67
  messages,
68
  max_length=max_length,
 
73
  return_full_text=False
74
  )
75
  except:
 
76
  conversation_text = ""
77
  for msg in messages:
78
  if msg["role"] == "user":
 
91
  return_full_text=False
92
  )
93
 
 
94
  if isinstance(response, list) and len(response) > 0:
95
  generated_text = response[0]['generated_text']
96
  else:
97
  generated_text = str(response)
98
 
 
99
  if isinstance(generated_text, list):
100
  assistant_response = generated_text[-1]['content']
101
  else:
 
102
  assistant_response = str(generated_text).strip()
103
  if "Assistant:" in assistant_response:
104
  assistant_response = assistant_response.split("Assistant:")[-1].strip()
105
 
106
+ return assistant_response
 
 
 
107
 
108
  except Exception as e:
109
+ return f"Error generating response: {str(e)}"
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  def create_interface():
112
+ with gr.Blocks(title="Daedalus-1-8B Chat", theme=gr.themes.Base(primary_hue="green")) as demo:
113
  gr.Markdown("""
114
+ # 🟢 Daedalus-1-8B Chat Interface
115
 
116
+ Chat with **Daedalus-1-8B** by Noema Research.
117
 
118
+ **Model:**
119
  - Daedalus-1-8B (8 billion parameters)
120
  """)
121
 
122
+ chatbot = gr.Chatbot(
123
+ height=400,
124
+ placeholder="Start chatting with Daedalus-1-8B...",
125
+ label="Chat"
126
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  msg = gr.Textbox(
129
  placeholder="Type your message here...",
 
161
  info="Controls diversity via nucleus sampling"
162
  )
163
 
 
164
  def user_message(message, history):
165
  return "", history + [[message, None]]
166
 
167
+ def bot_response(history, max_len, temp, top_p):
168
  if history:
169
  user_message = history[-1][0]
170
+ bot_message = generate_response(
171
  user_message,
172
  history[:-1],
173
+ "Daedalus-1-8B",
174
  max_len,
175
  temp,
176
  top_p
177
  )
178
+ history[-1][1] = bot_message
179
+ return history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
+ msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
182
+ bot_response, [chatbot, max_length, temperature, top_p], chatbot
 
 
183
  )
184
 
185
+ submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
186
+ bot_response, [chatbot, max_length, temperature, top_p], chatbot
 
187
  )
188
 
189
+ clear_btn.click(lambda: None, None, chatbot, queue=False)
 
 
190
 
191
  gr.Markdown("""
192
  ---
193
 
194
+ ### About Daedalus-1-8B
195
+ **Daedalus-1-8B** is a state-of-the-art code reasoning model by Noema Research,
196
+ fine-tuned for structured outputs, debugging, and long-context reasoning (up to ~64K tokens).
 
 
 
 
 
 
197
 
198
+ Optimized for:
199
+ - Conversational AI
200
+ - Code generation & debugging
201
+ - Structured JSON/function outputs
202
+ - Multi-step reasoning
203
  """)
204
 
205
  return demo
 
207
  # Launch the app
208
  if __name__ == "__main__":
209
  demo = create_interface()
210
+ demo.launch(share=True)