Daedalus-1

Sleeping

App Files Files Community

Spestly commited on Aug 31

Commit

2410ca2

verified ·

1 Parent(s): 8d4bde5

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -58

app.py CHANGED Viewed

@@ -8,12 +8,15 @@ from typing import List, Dict, Optional
 model_cache = {}
 tokenizer_cache = {}
-# Available models (only Daedalus)
 AVAILABLE_MODELS = {
     "Daedalus-1-2B": "NoemaResearch/Daedalus-1-2B",
     "Daedalus-1-8B": "NoemaResearch/Daedalus-1-8B",
 }
 @spaces.GPU
 def initialize_model(model_name):
     global model_cache, tokenizer_cache
@@ -116,58 +119,76 @@ def generate_response(message, history, model_name, max_length=512, temperature=
         # Format the conversation using the chat template
         formatted_prompt = format_conversation_with_template(messages, tokenizer)
-        # CRITICAL: Proper stop tokens to prevent repetition
-        stop_tokens = [
-            "<[end▁of▁sentence]>",  # EOS token
-            "<[begin▁of▁sentence]>",  # BOS token (shouldn't appear mid-generation)
-            "user\n",  # Stop if model tries to continue conversation
-            "system\n",  # Stop if model tries to add system messages
-            "\nuser",  # Alternative format
-            "\nsystem"  # Alternative format
-        ]
-        response = model_pipe(
-            formatted_prompt,
-            max_new_tokens=max_length,
-            temperature=temperature,
-            top_p=top_p,
-            do_sample=True,
-            pad_token_id=1,  # PAD token ID from your config
-            eos_token_id=2,  # EOS token ID from your config
-            bos_token_id=0,  # BOS token ID from your config
-            return_full_text=False,
-            # Add repetition penalty to reduce loops
-            repetition_penalty=1.1,
-            # Stop on these strings
-            stop_sequence=stop_tokens[0]  # Primary stop token
-        )
         if isinstance(response, list) and len(response) > 0:
             generated_text = response[0]['generated_text']
         else:
             generated_text = str(response)
-        # Clean up the response - remove stop tokens and formatting
         assistant_response = str(generated_text).strip()
-        # Remove stop tokens if they appear in output
-        for stop_token in stop_tokens:
-            if stop_token in assistant_response:
-                assistant_response = assistant_response.split(stop_token)[0].strip()
-        # Remove any residual role formatting
-        if assistant_response.startswith("assistant\n"):
-            assistant_response = assistant_response[10:].strip()
-        # Additional cleanup for common repetition patterns
-        lines = assistant_response.split('\n')
-        cleaned_lines = []
-        for line in lines:
-            # Skip empty lines or lines that look like role markers
-            if line.strip() and not line.strip().startswith(('user', 'assistant', 'system')):
-                cleaned_lines.append(line)
-        assistant_response = '\n'.join(cleaned_lines).strip()
         return assistant_response if assistant_response else "I apologize, but I couldn't generate a proper response. Please try again."
@@ -177,14 +198,19 @@ def generate_response(message, history, model_name, max_length=512, temperature=
 def create_interface():
     with gr.Blocks(title="Daedalus-1-8B Chat", theme=gr.themes.Base(primary_hue="green")) as demo:
         gr.Markdown("""
-        # 🟢 Daedalus-1-8B Chat Interface
-        Chat with **Daedalus-1-8B** by Noema Research.
-        **Model:**
-        - Daedalus-1-8B (8 billion parameters)
         """)
         chatbot = gr.Chatbot(
             height=400,
             placeholder="Start chatting with Daedalus-1-8B...",
@@ -230,13 +256,13 @@ def create_interface():
         def user_message(message, history):
             return "", history + [[message, None]]
-        def bot_response(history, max_len, temp, top_p):
             if history:
                 user_message = history[-1][0]
                 bot_message = generate_response(
                     user_message,
                     history[:-1],
-                    "Daedalus-1-8B",
                     max_len,
                     temp,
                     top_p
@@ -245,11 +271,11 @@ def create_interface():
             return history
         msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
-            bot_response, [chatbot, max_length, temperature, top_p], chatbot
         )
         submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
-            bot_response, [chatbot, max_length, temperature, top_p], chatbot
         )
         clear_btn.click(lambda: None, None, chatbot, queue=False)
@@ -257,11 +283,14 @@ def create_interface():
         gr.Markdown("""
         ---
-        ### About Daedalus-1-8B
-        **Daedalus-1-8B** is a state-of-the-art code reasoning model by Noema Research,
-        fine-tuned for structured outputs, debugging, and long-context reasoning (up to ~64K tokens).
-        Optimized for:
         - Conversational AI
         - Code generation & debugging
         - Structured JSON/function outputs

 model_cache = {}
 tokenizer_cache = {}
+# Available models
 AVAILABLE_MODELS = {
     "Daedalus-1-2B": "NoemaResearch/Daedalus-1-2B",
     "Daedalus-1-8B": "NoemaResearch/Daedalus-1-8B",
 }
+# Models that need special token handling for repetition issues
+MODELS_NEEDING_SPECIAL_HANDLING = {"Daedalus-1-8B"}
 @spaces.GPU
 def initialize_model(model_name):
     global model_cache, tokenizer_cache
         # Format the conversation using the chat template
         formatted_prompt = format_conversation_with_template(messages, tokenizer)
+        # Different generation parameters based on model
+        if model_name in MODELS_NEEDING_SPECIAL_HANDLING:
+            # 8B model needs special token handling to prevent repetition
+            stop_tokens = [
+                "<[end▁of▁sentence]>",  # EOS token
+                "<[begin▁of▁sentence]>",  # BOS token (shouldn't appear mid-generation)
+                "user\n",  # Stop if model tries to continue conversation
+                "system\n",  # Stop if model tries to add system messages
+                "\nuser",  # Alternative format
+                "\nsystem"  # Alternative format
+            ]
+            response = model_pipe(
+                formatted_prompt,
+                max_new_tokens=max_length,
+                temperature=temperature,
+                top_p=top_p,
+                do_sample=True,
+                pad_token_id=1,  # PAD token ID from config
+                eos_token_id=2,  # EOS token ID from config
+                bos_token_id=0,  # BOS token ID from config
+                return_full_text=False,
+                repetition_penalty=1.1,  # Reduce loops
+                stop_sequence=stop_tokens[0]  # Primary stop token
+            )
+        else:
+            # 2B model - standard generation without special handling
+            response = model_pipe(
+                formatted_prompt,
+                max_new_tokens=max_length,
+                temperature=temperature,
+                top_p=top_p,
+                do_sample=True,
+                pad_token_id=tokenizer.pad_token_id,
+                eos_token_id=tokenizer.eos_token_id,
+                return_full_text=False,
+                repetition_penalty=1.05  # Light repetition penalty
+            )
         if isinstance(response, list) and len(response) > 0:
             generated_text = response[0]['generated_text']
         else:
             generated_text = str(response)
+        # Clean up the response
         assistant_response = str(generated_text).strip()
+        # Apply different cleanup based on model
+        if model_name in MODELS_NEEDING_SPECIAL_HANDLING:
+            # More aggressive cleanup for 8B model
+            stop_tokens = [
+                "<[end▁of▁sentence]>", "<[begin▁of▁sentence]>",
+                "user\n", "system\n", "\nuser", "\nsystem"
+            ]
+            for stop_token in stop_tokens:
+                if stop_token in assistant_response:
+                    assistant_response = assistant_response.split(stop_token)[0].strip()
+            # Additional cleanup for common repetition patterns
+            lines = assistant_response.split('\n')
+            cleaned_lines = []
+            for line in lines:
+                if line.strip() and not line.strip().startswith(('user', 'assistant', 'system')):
+                    cleaned_lines.append(line)
+            assistant_response = '\n'.join(cleaned_lines).strip()
+        else:
+            # Standard cleanup for 2B model
+            if assistant_response.startswith("assistant\n"):
+                assistant_response = assistant_response[10:].strip()
         return assistant_response if assistant_response else "I apologize, but I couldn't generate a proper response. Please try again."
 def create_interface():
     with gr.Blocks(title="Daedalus-1-8B Chat", theme=gr.themes.Base(primary_hue="green")) as demo:
         gr.Markdown("""
+        # 🟢 Daedalus Chat Interface
+        Chat with **Daedalus models** by Noema Research.
         """)
+        # Model selection dropdown
+        model_dropdown = gr.Dropdown(
+            choices=list(AVAILABLE_MODELS.keys()),
+            value="Daedalus-1-2B",  # Default to 2B model
+            label="Select Model",
+            info="Choose between Daedalus-1-2B (faster) or Daedalus-1-8B (more capable)"
+        )
         chatbot = gr.Chatbot(
             height=400,
             placeholder="Start chatting with Daedalus-1-8B...",
         def user_message(message, history):
             return "", history + [[message, None]]
+        def bot_response(history, selected_model, max_len, temp, top_p):
             if history:
                 user_message = history[-1][0]
                 bot_message = generate_response(
                     user_message,
                     history[:-1],
+                    selected_model,  # Use selected model
                     max_len,
                     temp,
                     top_p
             return history
         msg.submit(user_message, [msg, chatbot], [msg, chatbot]).then(
+            bot_response, [chatbot, model_dropdown, max_length, temperature, top_p], chatbot
         )
         submit_btn.click(user_message, [msg, chatbot], [msg, chatbot]).then(
+            bot_response, [chatbot, model_dropdown, max_length, temperature, top_p], chatbot
         )
         clear_btn.click(lambda: None, None, chatbot, queue=False)
         gr.Markdown("""
         ---
+        ### About Daedalus Models
+        **Daedalus-1-2B:** Faster, lightweight model for quick responses and basic coding tasks.
+        **Daedalus-1-8B:** More capable model with advanced reasoning, fine-tuned for structured outputs,
+        debugging, and long-context reasoning (up to ~64K tokens).
+        Both models are optimized for:
         - Conversational AI
         - Code generation & debugging
         - Structured JSON/function outputs