Daedalus-1

Sleeping

App Files Files Community

Spestly commited on Aug 31

Commit

330c803

verified ·

1 Parent(s): ccb939a

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -177

app.py CHANGED Viewed

@@ -2,34 +2,16 @@ import gradio as gr
 import spaces
 from transformers import pipeline
 import torch
-import re
-import json
 from typing import List, Dict, Optional
 # Global variable to store pipelines
 model_cache = {}
-# Available models
 AVAILABLE_MODELS = {
     "Daedalus-1-8B": "NoemaResearch/Daedalus-1-8B",
 }
-def parse_thinking_tags(text):
-    """Parse text and extract thinking sections, return clean text and thinking content"""
-    think_pattern = r'<think>(.*?)</think>'
-    thinking_blocks = []
-    # Extract all thinking blocks
-    for match in re.finditer(think_pattern, text, re.DOTALL):
-        thinking_content = match.group(1).strip()
-        if thinking_content:
-            thinking_blocks.append(thinking_content)
-    # Remove thinking tags from the main text
-    clean_text = re.sub(think_pattern, '', text, flags=re.DOTALL).strip()
-    return clean_text, thinking_blocks
 @spaces.GPU
 def initialize_model(model_name):
     global model_cache
@@ -49,7 +31,7 @@ def initialize_model(model_name):
                 device_map="auto",
                 trust_remote_code=True
             )
-        except Exception as e:
             # Fallback to CPU if GPU fails
             model_cache[model_id] = pipeline(
                 "text-generation",
@@ -65,29 +47,22 @@ def initialize_model(model_name):
 def generate_response(message, history, model_name, max_length=512, temperature=0.7, top_p=0.9):
     """Generate response using the selected model"""
-    # Initialize model inside the GPU-decorated function
     try:
         model_pipe = initialize_model(model_name)
     except Exception as e:
-        return f"Error loading model {model_name}: {str(e)}", []
     # Format the conversation history
     messages = []
-    # Add conversation history
     for user_msg, assistant_msg in history:
         messages.append({"role": "user", "content": user_msg})
         if assistant_msg:
             messages.append({"role": "assistant", "content": assistant_msg})
-    # Add current message
     messages.append({"role": "user", "content": message})
-    # Generate response
     try:
-        # Some models may not support the messages format, so we'll try different approaches
         try:
-            # Try with messages format first
             response = model_pipe(
                 messages,
                 max_length=max_length,
@@ -98,7 +73,6 @@ def generate_response(message, history, model_name, max_length=512, temperature=
                 return_full_text=False
             )
         except:
-            # Fallback to simple text format
             conversation_text = ""
             for msg in messages:
                 if msg["role"] == "user":
@@ -117,131 +91,39 @@ def generate_response(message, history, model_name, max_length=512, temperature=
                 return_full_text=False
             )
-        # Extract the generated text
         if isinstance(response, list) and len(response) > 0:
             generated_text = response[0]['generated_text']
         else:
             generated_text = str(response)
-        # Clean up the response
         if isinstance(generated_text, list):
             assistant_response = generated_text[-1]['content']
         else:
-            # Remove the prompt and extract assistant response
             assistant_response = str(generated_text).strip()
             if "Assistant:" in assistant_response:
                 assistant_response = assistant_response.split("Assistant:")[-1].strip()
-        # Parse thinking tags
-        clean_response, thinking_blocks = parse_thinking_tags(assistant_response)
-        return clean_response, thinking_blocks
     except Exception as e:
-        return f"Error generating response: {str(e)}", []
-@spaces.GPU
-def generate(
-    model: str,
-    user_input: str,
-    history: Optional[str] = "",
-    temperature: float = 0.7,
-    system_prompt: Optional[str] = "",
-    max_tokens: int = 512
-):
-    """
-    API endpoint for LLM generation
-    Args:
-        model: Model name to use (Daedalus-1-8B)
-        user_input: Current user message/input
-        history: JSON string of conversation history in format [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]
-        temperature: Temperature for generation (0.1-2.0)
-        system_prompt: System prompt to guide the model
-        max_tokens: Maximum tokens to generate (1-8192)
-    Returns:
-        Generated response from the model
-    """
-    # Validate model
-    if model not in AVAILABLE_MODELS:
-        return f"Error: Model {model} not available. Available models: {list(AVAILABLE_MODELS.keys())}"
-    # Initialize model
-    try:
-        model_pipe = initialize_model(model)
-    except Exception as e:
-        return f"Error loading model {model}: {str(e)}"
-    # Parse history if provided and convert to gradio format
-    gradio_history = []
-    if history and history.strip():
-        try:
-            import json
-            history_list = json.loads(history)
-            current_pair = [None, None]
-            for msg in history_list:
-                if isinstance(msg, dict) and "role" in msg and "content" in msg:
-                    if msg["role"] == "user":
-                        if current_pair[0] is not None:
-                            gradio_history.append([current_pair[0], current_pair[1]])
-                        current_pair = [msg["content"], None]
-                    elif msg["role"] == "assistant":
-                        current_pair[1] = msg["content"]
-            if current_pair[0] is not None:
-                gradio_history.append([current_pair[0], current_pair[1]])
-        except:
-            # If history parsing fails, continue without history
-            pass
-    # Add system prompt to user input if provided
-    final_user_input = user_input
-    if system_prompt and system_prompt.strip():
-        final_user_input = f"System: {system_prompt}\n\nUser: {user_input}"
-    # Use the generate_response function and return only the clean response
-    clean_response, thinking_blocks = generate_response(final_user_input, gradio_history, model, max_tokens, temperature, 0.9)
-    return clean_response
-# Create the Gradio interface
 def create_interface():
-    with gr.Blocks(title="Multi-Model Chat", theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
-        # 🚀 Daedalus-1 Model Chat Interface
-        Chat with the Daedalus-1 models by Noema Research. This interface will render thinking processes when the model outputs `<think></think>` tags.
-        **Available Models:**
         - Daedalus-1-8B (8 billion parameters)
         """)
-        with gr.Row():
-            model_selector = gr.Dropdown(
-                choices=list(AVAILABLE_MODELS.keys()),
-                value="Daedalus-1-8B",
-                label="Select Model",
-                info="Choose which model to use for generation"
-            )
-        with gr.Row():
-            with gr.Column(scale=2):
-                chatbot = gr.Chatbot(
-                    height=500,
-                    placeholder="Select a model and start chatting...",
-                    label="Chat"
-                )
-            with gr.Column(scale=1):
-                thinking_display = gr.Accordion("💭 Thinking Process", open=True, visible=False)
-                with thinking_display:
-                    thinking_content = gr.Textbox(
-                        label="Model's Thinking",
-                        lines=15,
-                        interactive=False,
-                        show_label=False,
-                        container=False
-                    )
         msg = gr.Textbox(
             placeholder="Type your message here...",
@@ -279,70 +161,45 @@ def create_interface():
                 info="Controls diversity via nucleus sampling"
             )
-        # Event handlers
         def user_message(message, history):
             return "", history + [[message, None]]
-        def bot_response(history, model_name, max_len, temp, top_p):
             if history:
                 user_message = history[-1][0]
-                clean_response, thinking_blocks = generate_response(
                     user_message,
                     history[:-1],
-                    model_name,
                     max_len,
                     temp,
                     top_p
                 )
-                # Update chat with clean response
-                history[-1][1] = clean_response
-                # Format thinking content for display
-                thinking_text = ""
-                if thinking_blocks:
-                    for i, thinking in enumerate(thinking_blocks, 1):
-                        thinking_text += f"=== Thinking Block {i} ===\n\n{thinking}\n\n"
-                return history, thinking_text, gr.update(visible=bool(thinking_blocks))
-            return history, "", gr.update(visible=False)
-        def clear_chat():
-            return None, "", gr.update(visible=False)
-        def model_changed(model_name):
-            return gr.update(placeholder=f"Chat with {model_name}...")
-        # Wire up the events
-        msg_submit = msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
-            bot_response, [chatbot, model_selector, max_length, temperature, top_p],
-            [chatbot, thinking_content, thinking_display]
         )
-        btn_submit = submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
-            bot_response, [chatbot, model_selector, max_length, temperature, top_p],
-            [chatbot, thinking_content, thinking_display]
         )
-        clear_btn.click(clear_chat, None, [chatbot, thinking_content, thinking_display], queue=False)
-        model_selector.change(model_changed, model_selector, chatbot)
         gr.Markdown("""
         ---
-        ### About the Daedalus-1 Models
-        **Daedalus-1-8B**: Daedalus-1-8B is an 8 billion parameter language model for code generation and reasoning, developed by **Noema Research**.
-        It is a finetuned derivative of [Seed-Coder-8B-Reasoning](https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Reasoning),
-        with enhancements for instruction following, structured code generation, and improved safety alignment.
-        The model is designed for conversational AI and supports various text generation tasks. When the model uses thinking tags (`<think></think>`),
-        this interface will show the thinking process in a separate panel on the right.
-        This Space uses ZeroGPU for efficient GPU allocation.
         """)
     return demo
@@ -350,5 +207,4 @@ def create_interface():
 # Launch the app
 if __name__ == "__main__":
     demo = create_interface()
-    # Enable API and launch
-    demo.launch(share=True)

 import spaces
 from transformers import pipeline
 import torch
 from typing import List, Dict, Optional
 # Global variable to store pipelines
 model_cache = {}
+# Available models (only Daedalus)
 AVAILABLE_MODELS = {
     "Daedalus-1-8B": "NoemaResearch/Daedalus-1-8B",
 }
 @spaces.GPU
 def initialize_model(model_name):
     global model_cache
                 device_map="auto",
                 trust_remote_code=True
             )
+        except Exception:
             # Fallback to CPU if GPU fails
             model_cache[model_id] = pipeline(
                 "text-generation",
 def generate_response(message, history, model_name, max_length=512, temperature=0.7, top_p=0.9):
     """Generate response using the selected model"""
     try:
         model_pipe = initialize_model(model_name)
     except Exception as e:
+        return f"Error loading model {model_name}: {str(e)}"
     # Format the conversation history
     messages = []
     for user_msg, assistant_msg in history:
         messages.append({"role": "user", "content": user_msg})
         if assistant_msg:
             messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
     try:
         try:
             response = model_pipe(
                 messages,
                 max_length=max_length,
                 return_full_text=False
             )
         except:
             conversation_text = ""
             for msg in messages:
                 if msg["role"] == "user":
                 return_full_text=False
             )
         if isinstance(response, list) and len(response) > 0:
             generated_text = response[0]['generated_text']
         else:
             generated_text = str(response)
         if isinstance(generated_text, list):
             assistant_response = generated_text[-1]['content']
         else:
             assistant_response = str(generated_text).strip()
             if "Assistant:" in assistant_response:
                 assistant_response = assistant_response.split("Assistant:")[-1].strip()
+        return assistant_response
     except Exception as e:
+        return f"Error generating response: {str(e)}"
 def create_interface():
+    with gr.Blocks(title="Daedalus-1-8B Chat", theme=gr.themes.Base(primary_hue="green")) as demo:
         gr.Markdown("""
+        # 🟢 Daedalus-1-8B Chat Interface
+        Chat with **Daedalus-1-8B** by Noema Research.
+        **Model:**
         - Daedalus-1-8B (8 billion parameters)
         """)
+        chatbot = gr.Chatbot(
+            height=400,
+            placeholder="Start chatting with Daedalus-1-8B...",
+            label="Chat"
+        )
         msg = gr.Textbox(
             placeholder="Type your message here...",
                 info="Controls diversity via nucleus sampling"
             )
         def user_message(message, history):
             return "", history + [[message, None]]
+        def bot_response(history, max_len, temp, top_p):
             if history:
                 user_message = history[-1][0]
+                bot_message = generate_response(
                     user_message,
                     history[:-1],
+                    "Daedalus-1-8B",
                     max_len,
                     temp,
                     top_p
                 )
+                history[-1][1] = bot_message
+            return history
+        msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
+            bot_response, [chatbot, max_length, temperature, top_p], chatbot
         )
+        submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
+            bot_response, [chatbot, max_length, temperature, top_p], chatbot
         )
+        clear_btn.click(lambda: None, None, chatbot, queue=False)
         gr.Markdown("""
         ---
+        ### About Daedalus-1-8B
+        **Daedalus-1-8B** is a state-of-the-art code reasoning model by Noema Research,
+        fine-tuned for structured outputs, debugging, and long-context reasoning (up to ~64K tokens).
+        Optimized for:
+        - Conversational AI
+        - Code generation & debugging
+        - Structured JSON/function outputs
+        - Multi-step reasoning
         """)
     return demo
 # Launch the app
 if __name__ == "__main__":
     demo = create_interface()
+    demo.launch(share=True)