Spaces:

AIencoder
/

Forgekit

Sleeping

App Files Files Community

AIencoder commited on 7 days ago

Commit

58a2c61

verified ·

1 Parent(s): 2cea58f

Update forgekit/ai_advisor.py

Browse files

Files changed (1) hide show

forgekit/ai_advisor.py +80 -135

forgekit/ai_advisor.py CHANGED Viewed

@@ -1,224 +1,169 @@
-"""AI-powered merge advisor using HuggingFace Inference API."""
-import json
 import requests
 from typing import Optional
-HF_INFERENCE_URL = "https://api-inference.huggingface.co/models"
-DEFAULT_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
-def _query_llm(
     prompt: str,
     system: str = "",
     model: str = DEFAULT_MODEL,
-    token: Optional[str] = None,
-    max_tokens: int = 800,
 ) -> str:
-    """Query an LLM via HF Inference API.
     Args:
         prompt: User message
         system: System prompt
-        model: HF model ID for inference
-        token: HF API token (recommended for higher rate limits)
         max_tokens: Max response length
     Returns:
         Generated text response
     """
-    headers = {"Content-Type": "application/json"}
-    if token:
-        headers["Authorization"] = f"Bearer {token}"
-    # Format as chat messages
     messages = []
     if system:
         messages.append({"role": "system", "content": system})
     messages.append({"role": "user", "content": prompt})
     payload = {
-        "inputs": _format_chat(messages, model),
-        "parameters": {
-            "max_new_tokens": max_tokens,
-            "temperature": 0.7,
-            "do_sample": True,
-            "return_full_text": False,
-        },
     }
     try:
-        resp = requests.post(
-            f"{HF_INFERENCE_URL}/{model}",
-            headers=headers,
-            json=payload,
-            timeout=60,
-        )
-        if resp.status_code == 503:
-            # Model loading
-            return "⏳ The AI model is loading (this can take 1-2 minutes on first use). Please try again shortly."
         if resp.status_code == 429:
-            return "⚠️ Rate limited — please wait a moment and try again, or add your HF token for higher limits."
         if resp.status_code != 200:
-            return f"⚠️ AI service returned status {resp.status_code}. Try again or add an HF token."
         data = resp.json()
-        if isinstance(data, list) and len(data) > 0:
-            text = data[0].get("generated_text", "")
-            # Clean up any leftover template tokens
-            for tag in ["</s>", "<|im_end|>", "<|eot_id|>", "[/INST]"]:
-                text = text.replace(tag, "")
-            return text.strip()
-        return "⚠️ No response generated. The model may be overloaded — try again."
     except requests.exceptions.Timeout:
-        return "⚠️ Request timed out. The model may be loading — try again in a minute."
     except Exception as e:
-        return f"⚠️ Error: {str(e)}"
-def _format_chat(messages: list[dict], model: str) -> str:
-    """Format messages into the model's expected chat template."""
-    # Mistral Instruct format
-    if "mistral" in model.lower() or "mixtral" in model.lower():
-        parts = []
-        for msg in messages:
-            if msg["role"] == "system":
-                parts.append(f"[INST] {msg['content']}\n")
-            elif msg["role"] == "user":
-                if parts:
-                    parts.append(f"{msg['content']} [/INST]")
-                else:
-                    parts.append(f"[INST] {msg['content']} [/INST]")
-        return "".join(parts)
-    # Generic ChatML fallback
-    parts = []
-    for msg in messages:
-        parts.append(f"<|im_start|>{msg['role']}\n{msg['content']}<|im_end|>")
-    parts.append("<|im_start|>assistant\n")
-    return "\n".join(parts)
-# ===== AI FEATURES =====
-ADVISOR_SYSTEM = """You are ForgeKit AI, an expert assistant for merging large language models. You have deep knowledge of mergekit, model architectures, merge methods (DARE-TIES, TIES, SLERP, Linear, Task Arithmetic, Passthrough), and best practices for creating high-quality merged models.
-Be concise, practical, and specific. Give actionable recommendations with concrete numbers (weights, densities). Format your response with clear sections using markdown."""
 def merge_advisor(
     models_text: str,
     goal: str = "",
-    token: Optional[str] = None,
 ) -> str:
-    """AI recommends the best merge method, weights, and configuration.
-    Args:
-        models_text: Newline-separated model IDs
-        goal: What the user wants the merged model to do
-        token: HF API token
-    Returns:
-        AI recommendation as markdown
-    """
     models = [m.strip() for m in models_text.strip().split("\n") if m.strip()]
     if len(models) < 2:
-        return "⚠️ Add at least 2 models to get a recommendation."
     models_str = "\n".join(f"- {m}" for m in models)
-    goal_str = f"\n\nUser's goal: {goal}" if goal.strip() else ""
     prompt = f"""I want to merge these models:
 {models_str}
 {goal_str}
-Recommend:
-1. **Best merge method** and why (DARE-TIES, SLERP, Linear, TIES, Task Arithmetic, or Passthrough)
-2. **Optimal weights** for each model (with reasoning)
-3. **Density values** if applicable
-4. **Which model to use as base** and why
 5. **Which tokenizer** to keep
-6. **Any warnings** or tips specific to these models
-Be specific with numbers and keep it practical."""
-    return _query_llm(prompt, system=ADVISOR_SYSTEM, token=token)
 def model_describer(
     models_text: str,
     method: str = "",
     weights_text: str = "",
-    token: Optional[str] = None,
 ) -> str:
-    """AI explains what the merged model will be good at.
-    Args:
-        models_text: Newline-separated model IDs
-        method: Merge method being used
-        weights_text: Comma-separated weights
-        token: HF API token
-    Returns:
-        AI description of expected capabilities
-    """
     models = [m.strip() for m in models_text.strip().split("\n") if m.strip()]
     if not models:
-        return "⚠️ Add models first."
     models_str = "\n".join(f"- {m}" for m in models)
-    method_str = f" using {method}" if method else ""
     weights_str = f"\nWeights: {weights_text}" if weights_text.strip() else ""
     prompt = f"""I'm merging these models{method_str}:
 {models_str}{weights_str}
-Based on what each source model is known for, describe:
-1. **What the merged model will excel at** (specific tasks/benchmarks)
-2. **What it might struggle with** compared to the source models
 3. **Ideal use cases** for this merge
-4. **Expected quality** compared to each individual model
-5. **A creative name suggestion** for this merge
-Keep it concise and practical."""
-    return _query_llm(prompt, system=ADVISOR_SYSTEM, token=token)
 def config_explainer(
     yaml_config: str,
-    token: Optional[str] = None,
 ) -> str:
-    """AI explains a YAML merge config in plain English.
-    Args:
-        yaml_config: The YAML configuration string
-        token: HF API token
-    Returns:
-        Plain English explanation
-    """
     if not yaml_config.strip() or yaml_config.startswith("# Add"):
-        return "⚠️ Generate a YAML config first."
-    prompt = f"""Explain this mergekit YAML configuration in plain English. Break it down so someone new to model merging can understand exactly what will happen:
 ```yaml
 {yaml_config}
 ```
-Explain:
-1. **What this config does** in simple terms
-2. **Why these specific settings** were chosen (method, weights, density)
-3. **What the output model will be like**
-4. **Any potential issues** to watch out for
-5. **Estimated resource requirements** (RAM, time)
-Be clear and beginner-friendly."""
-    return _query_llm(prompt, system=ADVISOR_SYSTEM, token=token)

+"""AI-powered merge advisor using Groq API (free, fast inference)."""
+import os
 import requests
 from typing import Optional
+GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
+DEFAULT_MODEL = "llama-3.3-70b-versatile"
+def _query_groq(
     prompt: str,
     system: str = "",
     model: str = DEFAULT_MODEL,
+    api_key: Optional[str] = None,
+    max_tokens: int = 1024,
 ) -> str:
+    """Query Groq's OpenAI-compatible API.
     Args:
         prompt: User message
         system: System prompt
+        model: Groq model ID
+        api_key: Groq API key (free at console.groq.com)
         max_tokens: Max response length
     Returns:
         Generated text response
     """
+    key = (api_key or "").strip() or os.environ.get("GROQ_API_KEY", "")
+    if not key:
+        return (
+            "**Groq API Key required** — the AI Advisor uses Groq for fast, free inference.\n\n"
+            "1. Go to [console.groq.com](https://console.groq.com) and sign up (free, no credit card)\n"
+            "2. Create an API key\n"
+            "3. Paste it in the field above\n\n"
+            "Groq gives you thousands of free requests per day with Llama 3.3 70B!"
+        )
     messages = []
     if system:
         messages.append({"role": "system", "content": system})
     messages.append({"role": "user", "content": prompt})
+    headers = {
+        "Authorization": f"Bearer {key}",
+        "Content-Type": "application/json",
+    }
     payload = {
+        "model": model,
+        "messages": messages,
+        "max_tokens": max_tokens,
+        "temperature": 0.7,
     }
     try:
+        resp = requests.post(GROQ_API_URL, headers=headers, json=payload, timeout=30)
         if resp.status_code == 429:
+            return "Rate limited — Groq free tier allows ~30 requests/min. Wait a moment and try again."
+        if resp.status_code == 401:
+            return "Invalid Groq API key. Get a free one at [console.groq.com](https://console.groq.com)."
         if resp.status_code != 200:
+            return f"Groq API error (status {resp.status_code}). Try again."
         data = resp.json()
+        text = data["choices"][0]["message"]["content"]
+        return text.strip()
     except requests.exceptions.Timeout:
+        return "Request timed out — try again."
     except Exception as e:
+        return f"Error: {str(e)}"
+# ===== SYSTEM PROMPT =====
+ADVISOR_SYSTEM = """You are ForgeKit AI, an expert assistant for merging large language models using mergekit. You have deep knowledge of:
+- Model architectures (LLaMA, Qwen, Mistral, Gemma, Phi)
+- Merge methods: DARE-TIES, TIES, SLERP, Linear, Task Arithmetic, Passthrough (Frankenmerge)
+- Optimal weight/density configurations for different use cases
+- Common pitfalls and best practices
+Be concise, practical, and specific. Always give concrete numbers for weights and densities.
+Format responses with markdown headers and bullet points for readability."""
+# ===== AI FEATURES =====
 def merge_advisor(
     models_text: str,
     goal: str = "",
+    api_key: Optional[str] = None,
 ) -> str:
+    """AI recommends the best merge method, weights, and configuration."""
     models = [m.strip() for m in models_text.strip().split("\n") if m.strip()]
     if len(models) < 2:
+        return "Add at least 2 models (one per line) to get a recommendation."
     models_str = "\n".join(f"- {m}" for m in models)
+    goal_str = f"\n\nThe user's goal: {goal}" if goal.strip() else ""
     prompt = f"""I want to merge these models:
 {models_str}
 {goal_str}
+Give me a specific recommendation:
+1. **Best merge method** and why
+2. **Exact weights** for each model
+3. **Density values** (if applicable)
+4. **Which model as base** and why
 5. **Which tokenizer** to keep
+6. **Warnings or tips** for these specific models
+7. **The complete YAML config** ready for mergekit"""
+    return _query_groq(prompt, system=ADVISOR_SYSTEM, api_key=api_key)
 def model_describer(
     models_text: str,
     method: str = "",
     weights_text: str = "",
+    api_key: Optional[str] = None,
 ) -> str:
+    """AI predicts what the merged model will be good at."""
     models = [m.strip() for m in models_text.strip().split("\n") if m.strip()]
     if not models:
+        return "Add models first."
     models_str = "\n".join(f"- {m}" for m in models)
+    method_str = f" using **{method}**" if method else ""
     weights_str = f"\nWeights: {weights_text}" if weights_text.strip() else ""
     prompt = f"""I'm merging these models{method_str}:
 {models_str}{weights_str}
+Predict:
+1. **What it will excel at** — specific tasks and benchmarks
+2. **What it might lose** compared to individual source models
 3. **Ideal use cases** for this merge
+4. **Quality estimate** vs each source model
+5. **A creative name suggestion** for this merged model"""
+    return _query_groq(prompt, system=ADVISOR_SYSTEM, api_key=api_key)
 def config_explainer(
     yaml_config: str,
+    api_key: Optional[str] = None,
 ) -> str:
+    """AI explains a YAML merge config in plain English."""
     if not yaml_config.strip() or yaml_config.startswith("# Add"):
+        return "Generate or paste a YAML config first."
+    prompt = f"""Explain this mergekit config in plain English for a beginner:
 ```yaml
 {yaml_config}
 ```
+Cover:
+1. **What this does** in simple terms
+2. **Why these settings** — explain each parameter
+3. **What the output will be like**
+4. **Potential issues** to watch for
+5. **Resource requirements** (RAM, time, Colab tier)"""
+    return _query_groq(prompt, system=ADVISOR_SYSTEM, api_key=api_key)