Spaces:

mgbam
/

builder

Running

App Files Files Community

mgbam commited on Jul 18

Commit

c5abecc

verified ·

1 Parent(s): 9b8ec81

Update utils.py

Browse files

Files changed (1) hide show

utils.py +133 -61

utils.py CHANGED Viewed

@@ -3,101 +3,173 @@
 """
 A collection of utility functions for data manipulation and formatting.
-This module provides helpers for tasks like converting chat history formats,
-processing images for multimodal models, cleaning model outputs, and
-applying code modifications.
 """
 import base64
 import io
 import re
-from typing import Dict, List, Optional, Tuple
 import numpy as np
 from PIL import Image
 from config import SEARCH_START, DIVIDER, REPLACE_END, GRADIO_SUPPORTED_LANGUAGES
-# --- Type Definitions ---
 History = List[Tuple[Optional[str], Optional[str]]]
-Messages = List[Dict[str, any]]
-# --- History and Message Conversion ---
 def history_to_messages(history: History, system_prompt: str) -> Messages:
-    """Converts Gradio's history format to the list of messages format for an API call."""
-    messages = [{'role': 'system', 'content': system_prompt}]
     for user_msg, assistant_msg in history:
-        # Handle potential multimodal user message (which comes as a list)
         if isinstance(user_msg, list):
-            # Find the text part of the message for history
-            text_content = next((item.get("text", "") for item in user_msg if isinstance(item, dict) and item.get("type") == "text"), "")
-            messages.append({'role': 'user', 'content': text_content})
         elif user_msg:
-             messages.append({'role': 'user', 'content': user_msg})
         if assistant_msg:
-            messages.append({'role': 'assistant', 'content': assistant_msg})
     return messages
-def messages_to_history(messages: Messages) -> History:
-    """Converts a list of messages back to Gradio's history format."""
-    history = []
-    # Skip system message at index 0
-    for i in range(1, len(messages), 2):
-        user_msg = messages[i]['content']
-        assistant_msg = messages[i+1]['content'] if (i+1) < len(messages) else ""
-        history.append((user_msg, assistant_msg))
-    return history
-# --- Image Processing ---
 def process_image_for_model(image_data: np.ndarray) -> str:
-    """Converts a NumPy image array to a base64-encoded string."""
     pil_img = Image.fromarray(image_data)
     buffer = io.BytesIO()
     pil_img.save(buffer, format="PNG")
     img_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
     return f"data:image/png;base64,{img_str}"
-# --- Code Manipulation ---
 def remove_code_block(text: str) -> str:
-    """Extracts code from a markdown-style code block."""
-    pattern = r'```(?:[a-zA-Z]+)?\n(.*?)\n```'
-    match = re.search(pattern, text, re.DOTALL)
-    if match:
-        return match.group(1).strip()
-    return text.strip()
-def apply_search_replace(original_code: str, change_block: str) -> str:
-    """Applies a single search-and-replace block to the code."""
-    try:
-        parts = re.split(f"^{DIVIDER}$", change_block, flags=re.MULTILINE)
-        if len(parts) != 2: return original_code # Invalid block
-        search_part, replace_part = parts
-        search_content = search_part.replace(SEARCH_START, "").strip()
-        replace_content = replace_part.replace(REPLACE_END, "").strip()
-        # To insert, search block is empty or just contains the line before insertion
-        if not search_content:
-             # Inserting at the beginning
-             return replace_content + "\n" + original_code
-        if search_content in original_code:
-            return original_code.replace(search_content, replace_content)
         else:
-            # Handle insertion case where `search_content` is the line *before* insertion point
-            # and `replace_content` includes that line plus the new code.
-            # This is a common pattern LLMs use.
-            # We can simply return the original code, as more advanced logic is needed to reliably handle this.
-            print(f"Warning: Search block not found:\n---\n{search_content}\n---")
-            return original_code
-    except Exception as e:
-        print(f"Error applying changes: {e}")
-        return original_code
 def get_gradio_language(language: str) -> Optional[str]:
-    """Returns the language name if supported by Gradio, otherwise None."""
     return language if language in GRADIO_SUPPORTED_LANGUAGES else None

 """
 A collection of utility functions for data manipulation and formatting.
+This module provides helpers for tasks such as:
+- Converting between different chat history formats (internal state vs. API vs. UI).
+- Processing images for multimodal language models.
+- Extracting and modifying code based on specific patterns.
+- Validating language support for UI components.
 """
 import base64
 import io
 import re
+import logging
+from typing import Dict, List, Optional, Tuple, Any
 import numpy as np
 from PIL import Image
 from config import SEARCH_START, DIVIDER, REPLACE_END, GRADIO_SUPPORTED_LANGUAGES
+# --- Type Aliases for Clarity ---
+# Internal history format: a list of (user, assistant) tuples.
 History = List[Tuple[Optional[str], Optional[str]]]
+# API/Gradio message format: a list of OpenAI-style dictionaries.
+Messages = List[Dict[str, Any]]
+# --------------------------------------------------------------------------
+# 1. HISTORY & MESSAGE CONVERSION
+# --------------------------------------------------------------------------
 def history_to_messages(history: History, system_prompt: str) -> Messages:
+    """
+    Converts the internal history (list of tuples) to the API message format.
+    This format is required for making calls to the LLM API and includes the
+    system prompt at the beginning.
+    Args:
+        history: The conversation history as a list of (user, assistant) tuples.
+        system_prompt: The initial system prompt to guide the model.
+    Returns:
+        A list of message dictionaries in the format expected by the API.
+    """
+    messages: Messages = [{'role': 'system', 'content': system_prompt}]
+    for user_msg, assistant_msg in history:
+        if user_msg:
+            messages.append({'role': 'user', 'content': user_msg})
+        if assistant_msg:
+            messages.append({'role': 'assistant', 'content': assistant_msg})
+    return messages
+def history_to_chatbot_messages(history: History) -> Messages:
+    """
+    Converts the internal history (list of tuples) to the Gradio Chatbot format.
+    The modern `gr.Chatbot` component with `type="messages"` expects a list of
+    dictionaries, excluding the system prompt.
+    Args:
+        history: The conversation history as a list of (user, assistant) tuples.
+    Returns:
+        A list of message dictionaries for display in the Gradio Chatbot UI.
+    """
+    messages: Messages = []
     for user_msg, assistant_msg in history:
+        # For display, we only care about the text part of a multimodal message
         if isinstance(user_msg, list):
+            display_text = next((item.get("text", "") for item in user_msg if isinstance(item, dict) and item.get("type") == "text"), "")
+            messages.append({"role": "user", "content": display_text})
         elif user_msg:
+            messages.append({"role": "user", "content": user_msg})
         if assistant_msg:
+            messages.append({"role": "assistant", "content": assistant_msg})
     return messages
+# --------------------------------------------------------------------------
+# 2. CONTENT & CODE PROCESSING
+# --------------------------------------------------------------------------
 def process_image_for_model(image_data: np.ndarray) -> str:
+    """
+    Converts a NumPy image array from Gradio into a base64-encoded data URI.
+    Args:
+        image_data: The image as a NumPy array.
+    Returns:
+        A base64-encoded string formatted as a data URI for multimodal models.
+    """
     pil_img = Image.fromarray(image_data)
     buffer = io.BytesIO()
     pil_img.save(buffer, format="PNG")
     img_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
     return f"data:image/png;base64,{img_str}"
 def remove_code_block(text: str) -> str:
+    """
+    Extracts code from a markdown-style code block.
+    This function robustly handles code blocks with or without language
+    specifiers and with varying whitespace.
+    Args:
+        text: The raw string from the model, potentially containing a code block.
+    Returns:
+        The extracted code, or the original text if no block is found.
+    """
+    pattern = r'```[a-zA-Z]*\s*\n?(.*?)\n?```'
+    match = re.search(pattern, text, re.DOTALL)
+    if match:
+        return match.group(1).strip()
+    return text.strip()  # Fallback for when no code block is detected
+def apply_search_replace_changes(original_code: str, changes_text: str) -> str:
+    """
+    Applies one or more SEARCH/REPLACE blocks to the original code.
+    This function iterates through all search/replace blocks in the given
+    `changes_text` and applies them sequentially to the `original_code`.
+    Args:
+        original_code: The starting code to be modified.
+        changes_text: A string containing one or more formatted change blocks.
+    Returns:
+        The modified code after all changes have been applied.
+    """
+    modified_code = original_code
+    # Define the pattern to find all SEARCH/REPLACE blocks
+    block_pattern = re.compile(
+        rf"^{SEARCH_START}\n(.*?)\n^{DIVIDER}\n(.*?)\n^{REPLACE_END}",
+        re.DOTALL | re.MULTILINE
+    )
+    for match in block_pattern.finditer(changes_text):
+        search_content = match.group(1)
+        replace_content = match.group(2)
+        if search_content in modified_code:
+            modified_code = modified_code.replace(search_content, replace_content, 1)
         else:
+            # Handle insertion case: if search block is empty, prepend.
+            if not search_content.strip():
+                 modified_code = replace_content + "\n" + modified_code
+            else:
+                logging.warning(
+                    f"Search block not found in the code. Skipping this change.\n"
+                    f"--- BLOCK NOT FOUND ---\n{search_content}\n-----------------------"
+                )
+    return modified_code
+# --------------------------------------------------------------------------
+# 3. UI HELPERS
+# --------------------------------------------------------------------------
 def get_gradio_language(language: str) -> Optional[str]:
+    """
+    Returns the language name if it is supported for syntax highlighting by Gradio.
+    Args:
+        language: The language identifier (e.g., "python", "html").
+    Returns:
+        The language string if supported, otherwise None.
+    """
     return language if language in GRADIO_SUPPORTED_LANGUAGES else None