Spaces:

CompactAI
/

AIFinder

Running

App Files Files Community

CompactAI commited on 6 days ago

Commit

17ef86f

verified ·

1 Parent(s): 0051294

Upload 8 files

Browse files

Files changed (9) hide show

.gitattributes +1 -0
app.py +98 -162
config.py +77 -32
inference.py +52 -0
models/1773449581 (1).png +3 -0
models/enc_4provider.joblib +3 -0
models/pipeline_4provider.joblib +3 -0
models/rf_4provider.joblib +3 -0
templates/index.html +1373 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+models/1773449581[[:space:]](1).png filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -1,168 +1,92 @@
 """
-AIFinder API Server
-Serves classification and training endpoints for the frontend.
-Public API:
-    POST /v1/classify  — classify text, returns top-N provider predictions.
-    No API key required. Rate-limited to 60 requests/minute per IP.
 """
 import os
 import re
-import sys
-import json
 import joblib
 import numpy as np
-import torch
-import torch.nn as nn
-from flask import Flask, request, jsonify, send_from_directory
 from flask_cors import CORS
 from flask_limiter import Limiter
 from flask_limiter.util import get_remote_address
 from config import MODEL_DIR
-from model import AIFinderNet
-from features import FeaturePipeline
-app = Flask(__name__, static_folder="static", static_url_path="")
 CORS(app)
-limiter = Limiter(get_remote_address, app=app, default_limits=[])
-DEFAULT_TOP_N = 5
-pipeline = None
-provider_enc = None
-net = None
-device = None
-checkpoint = None
 def load_models():
-    global pipeline, provider_enc, net, device, checkpoint
-    pipeline = joblib.load(os.path.join(MODEL_DIR, "feature_pipeline.joblib"))
-    provider_enc = joblib.load(os.path.join(MODEL_DIR, "provider_enc.joblib"))
-    checkpoint = torch.load(
-        os.path.join(MODEL_DIR, "classifier.pt"),
-        map_location="cpu",
-        weights_only=True,
-    )
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    net = AIFinderNet(
-        input_dim=checkpoint["input_dim"],
-        num_providers=checkpoint["num_providers"],
-        hidden_dim=checkpoint["hidden_dim"],
-        embed_dim=checkpoint["embed_dim"],
-        dropout=checkpoint["dropout"],
-    ).to(device)
-    net.load_state_dict(checkpoint["state_dict"], strict=False)
-    net.eval()
-@app.route("/")
-def index():
-    return send_from_directory("static", "index.html")
-@app.route("/api/providers", methods=["GET"])
-def get_providers():
-    """Return list of available providers."""
-    return jsonify({"providers": sorted(provider_enc.classes_.tolist())})
-@app.route("/api/classify", methods=["POST"])
-def classify():
-    """Classify text and return provider predictions."""
-    data = request.json
-    text = data.get("text", "")
-    if len(text) < 20:
-        return jsonify({"error": "Text too short (minimum 20 characters)"}), 400
-    X = pipeline.transform([text])
-    X_t = torch.tensor(X.toarray(), dtype=torch.float32).to(device)
-    with torch.no_grad():
-        prov_logits = net(X_t)
-    prov_proba = torch.softmax(prov_logits.float(), dim=1)[0].cpu().numpy()
-    top_prov_idxs = np.argsort(prov_proba)[::-1][:5]
-    top_providers = [
-        {
-            "name": provider_enc.inverse_transform([i])[0],
-            "confidence": float(prov_proba[i] * 100),
-        }
-        for i in top_prov_idxs
-    ]
-    return jsonify(
-        {
-            "provider": top_providers[0]["name"],
-            "confidence": top_providers[0]["confidence"],
-            "top_providers": top_providers,
-        }
-    )
 def _strip_think_tags(text):
-    """Remove <think>…</think> (and <thinking>…</thinking>) blocks from input."""
     text = re.sub(r"<think(?:ing)?>.*?</think(?:ing)?>", "", text, flags=re.DOTALL)
     return text.strip()
 @app.route("/v1/classify", methods=["POST"])
 @limiter.limit("60/minute")
 def v1_classify():
-    """Public API — classify text and return top-N provider predictions.
-    Request JSON:
-        text (str):  The text to classify. Any <think>/<thinking> tags will be
-                     stripped automatically before classification.
-        top_n (int): Number of results to return (default: 5).
-    Response JSON:
-        provider (str):       Best-matching provider name.
-        confidence (float):   Confidence % for the top provider.
-        top_providers (list): List of {name, confidence} dicts.
-    Rate limit: 60 requests per minute per IP.  No API key required.
-    NOTE: If the text you are classifying was produced by a model that emits
-          <think> or <thinking> blocks, you should strip those tags BEFORE
-          sending the text.  This endpoint does it for you automatically, but
-          doing it on your side avoids wasting bytes on the wire.
-    """
     data = request.get_json(silent=True)
     if not data or "text" not in data:
         return jsonify({"error": "Request body must be JSON with a 'text' field."}), 400
     raw_text = data["text"]
     text = _strip_think_tags(raw_text)
-    top_n = data.get("top_n", DEFAULT_TOP_N)
     if not isinstance(top_n, int) or top_n < 1:
         top_n = DEFAULT_TOP_N
     if len(text) < 20:
-        return jsonify({"error": "Text too short (minimum 20 characters after stripping think tags)."}), 400
-    X = pipeline.transform([text])
-    X_t = torch.tensor(X.toarray(), dtype=torch.float32).to(device)
-    with torch.no_grad():
-        prov_logits = net(X_t)
-    prov_proba = torch.softmax(prov_logits.float(), dim=1)[0].cpu().numpy()
-    top_idxs = np.argsort(prov_proba)[::-1][:top_n]
     top_providers = [
-        {
-            "name": provider_enc.inverse_transform([i])[0],
-            "confidence": round(float(prov_proba[i] * 100), 2),
-        }
-        for i in top_idxs
     ]
     return jsonify(
@@ -176,71 +100,80 @@ def v1_classify():
 @app.route("/api/correct", methods=["POST"])
 def correct():
-    """Train on a corrected example."""
-    data = request.json
-    text = data.get("text", "")
-    correct_provider = data.get("correct_provider", "")
-    if not text or not correct_provider:
-        return jsonify({"error": "Missing text or correct_provider"}), 400
-    try:
-        prov_idx = provider_enc.transform([correct_provider])[0]
-    except ValueError as e:
-        return jsonify({"error": f"Unknown provider: {e}"}), 400
-    X = pipeline.transform([text])
-    X_t = torch.tensor(X.toarray(), dtype=torch.float32).to(device)
-    y_prov = torch.tensor([prov_idx], dtype=torch.long).to(device)
-    net.train()
-    for module in net.modules():
-        if isinstance(module, nn.modules.batchnorm._BatchNorm):
-            module.eval()
-    optimizer = torch.optim.AdamW(net.parameters(), lr=1e-4, weight_decay=1e-4)
-    optimizer.zero_grad(set_to_none=True)
-    prov_criterion = nn.CrossEntropyLoss()
-    prov_logits = net(X_t)
-    loss = prov_criterion(prov_logits, y_prov)
-    loss.backward()
-    torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm=1.0)
-    optimizer.step()
-    net.eval()
-    checkpoint["state_dict"] = net.state_dict()
-    return jsonify({"success": True, "loss": float(loss.item())})
 @app.route("/api/save", methods=["POST"])
 def save_model():
-    """Save the current model state to a file for export."""
-    global checkpoint
-    data = request.json
-    filename = data.get("filename", "aifinder_model.pt")
-    save_path = os.path.join(MODEL_DIR, filename)
-    torch.save(checkpoint, save_path)
-    return jsonify({"success": True, "filename": filename})
 @app.route("/models/<filename>")
 def download_model(filename):
-    """Download exported model file."""
     return send_from_directory(MODEL_DIR, filename)
 @app.route("/api/status", methods=["GET"])
 def status():
-    """Check if models are loaded."""
     return jsonify(
         {
-            "loaded": net is not None,
-            "device": str(device) if device else None,
         }
     )
@@ -248,5 +181,8 @@ def status():
 if __name__ == "__main__":
     print("Loading models...")
     load_models()
-    print(f"Ready on {device}")
     app.run(host="0.0.0.0", port=7860)

 """
+AIFinder Flask API
+Serves the trained sklearn ensemble via the AIFinder inference class.
 """
 import os
 import re
 import joblib
 import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+from flask import Flask, jsonify, request, send_from_directory, render_template
 from flask_cors import CORS
 from flask_limiter import Limiter
 from flask_limiter.util import get_remote_address
 from config import MODEL_DIR
+from inference import AIFinder
+app = Flask(__name__)
 CORS(app)
+limiter = Limiter(get_remote_address, app=app)
+finder: AIFinder | None = None
+community_finder: AIFinder | None = None
+using_community = False
+DEFAULT_TOP_N = 4
+COMMUNITY_DIR = os.path.join(MODEL_DIR, "community")
+CORRECTIONS_FILE = os.path.join(COMMUNITY_DIR, "corrections.joblib")
+corrections: list[dict] = []
 def load_models():
+    global finder, community_finder, corrections
+    finder = AIFinder(model_dir=MODEL_DIR)
+    os.makedirs(COMMUNITY_DIR, exist_ok=True)
+    if os.path.exists(CORRECTIONS_FILE):
+        corrections = joblib.load(CORRECTIONS_FILE)
+    if os.path.exists(os.path.join(COMMUNITY_DIR, "rf_4provider.joblib")):
+        try:
+            community_finder = AIFinder(model_dir=COMMUNITY_DIR)
+        except Exception:
+            community_finder = None
+def _active_finder():
+    return community_finder if using_community and community_finder else finder
 def _strip_think_tags(text):
     text = re.sub(r"<think(?:ing)?>.*?</think(?:ing)?>", "", text, flags=re.DOTALL)
     return text.strip()
+@app.route("/")
+def index():
+    return render_template("index.html")
+@app.route("/api/classify", methods=["POST"])
 @app.route("/v1/classify", methods=["POST"])
 @limiter.limit("60/minute")
 def v1_classify():
     data = request.get_json(silent=True)
     if not data or "text" not in data:
         return jsonify({"error": "Request body must be JSON with a 'text' field."}), 400
     raw_text = data["text"]
     text = _strip_think_tags(raw_text)
+    af = _active_finder()
+    top_n = min(data.get("top_n", DEFAULT_TOP_N), len(af.le.classes_))
     if not isinstance(top_n, int) or top_n < 1:
         top_n = DEFAULT_TOP_N
     if len(text) < 20:
+        return jsonify(
+            {
+                "error": "Text too short (minimum 20 characters after stripping think tags)."
+            }
+        ), 400
+    proba = af.predict_proba(text)
+    sorted_providers = sorted(proba.items(), key=lambda x: x[1], reverse=True)[:top_n]
     top_providers = [
+        {"name": name, "confidence": round(float(conf * 100), 2)}
+        for name, conf in sorted_providers
     ]
     return jsonify(
 @app.route("/api/correct", methods=["POST"])
 def correct():
+    global community_finder
+    data = request.get_json(silent=True)
+    if not data or "text" not in data or "correct_provider" not in data:
+        return jsonify({"error": "Need 'text' and 'correct_provider'."}), 400
+    provider = data["correct_provider"]
+    if provider not in list(finder.le.classes_):
+        return jsonify({"error": f"Unknown provider: {provider}"}), 400
+    text = _strip_think_tags(data["text"])
+    corrections.append({"text": text, "provider": provider})
+    texts = [c["text"] for c in corrections]
+    providers = [c["provider"] for c in corrections]
+    X = finder.pipeline.transform(texts)
+    y = finder.le.transform(providers)
+    rf = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
+    rf.fit(X, y)
+    joblib.dump([rf], os.path.join(COMMUNITY_DIR, "rf_4provider.joblib"))
+    joblib.dump(finder.pipeline, os.path.join(COMMUNITY_DIR, "pipeline_4provider.joblib"))
+    joblib.dump(finder.le, os.path.join(COMMUNITY_DIR, "enc_4provider.joblib"))
+    joblib.dump(corrections, CORRECTIONS_FILE)
+    community_finder = AIFinder(model_dir=COMMUNITY_DIR)
+    return jsonify({"status": "ok", "loss": 0.0, "corrections": len(corrections)})
 @app.route("/api/save", methods=["POST"])
 def save_model():
+    if community_finder is None:
+        return jsonify({"error": "No community model trained yet."}), 400
+    filename = "community_rf_4provider.joblib"
+    return jsonify({"status": "ok", "filename": filename})
+@app.route("/api/toggle_community", methods=["POST"])
+def toggle_community():
+    global using_community
+    data = request.get_json(silent=True) or {}
+    using_community = bool(data.get("enabled", not using_community))
+    return jsonify({"using_community": using_community, "available": community_finder is not None})
 @app.route("/models/<filename>")
 def download_model(filename):
+    if filename.startswith("community_"):
+        return send_from_directory(COMMUNITY_DIR, filename.replace("community_", "", 1))
     return send_from_directory(MODEL_DIR, filename)
 @app.route("/api/status", methods=["GET"])
 def status():
+    af = _active_finder()
     return jsonify(
         {
+            "loaded": af is not None,
+            "device": "cpu",
+            "providers": list(af.le.classes_) if af else [],
+            "num_providers": len(af.le.classes_) if af else 0,
+            "using_community": using_community,
+            "community_available": community_finder is not None,
+            "corrections_count": len(corrections),
+        }
+    )
+@app.route("/api/providers", methods=["GET"])
+def providers():
+    return jsonify(
+        {
+            "providers": list(finder.le.classes_) if finder else [],
         }
     )
 if __name__ == "__main__":
     print("Loading models...")
     load_models()
+    print(
+        f"Ready on cpu — {len(finder.le.classes_)} providers: "
+        f"{', '.join(finder.le.classes_)}"
+    )
     app.run(host="0.0.0.0", port=7860)

config.py CHANGED Viewed

@@ -15,91 +15,136 @@ MODEL_DIR = os.path.join(BASE_DIR, "models")
 DATASET_REGISTRY = [
     # Anthropic
     ("TeichAI/claude-4.5-opus-high-reasoning-250x", "Anthropic", "Claude 4.5 Opus", {}),
-    ("TeichAI/claude-sonnet-4.5-high-reasoning-250x", "Anthropic", "Claude Sonnet 4.5", {}),
-    ("Roman1111111/claude-opus-4.6-10000x", "Anthropic", "Claude Opus 4.6", {"max_samples": 1500}),
     # OpenAI
     ("TeichAI/gpt-5.2-high-reasoning-250x", "OpenAI", "GPT-5.2", {}),
     ("TeichAI/gpt-5.1-high-reasoning-1000x", "OpenAI", "GPT-5.1", {}),
     ("TeichAI/gpt-5.1-codex-max-1000x", "OpenAI", "GPT-5.1 Codex Max", {}),
     ("TeichAI/gpt-5-codex-250x", "OpenAI", "GPT-5 Codex", {}),
     ("TeichAI/gpt-5-codex-1000x", "OpenAI", "GPT-5 Codex", {}),
     # Google
     ("TeichAI/gemini-3-pro-preview-high-reasoning-1000x", "Google", "Gemini 3 Pro", {}),
     ("TeichAI/gemini-3-pro-preview-high-reasoning-250x", "Google", "Gemini 3 Pro", {}),
-    ("TeichAI/gemini-2.5-flash-11000x", "Google", "Gemini 2.5 Flash", {"max_samples": 1500}),
     ("TeichAI/Gemini-3-Flash-Preview-VIBE", "Google", "Gemini 3 Flash", {}),
     ("TeichAI/gemini-3-flash-preview-1000x", "Google", "Gemini 3 Flash", {}),
     ("TeichAI/gemini-3-flash-preview-complex-1000x", "Google", "Gemini 3 Flash", {}),
     # xAI
     ("TeichAI/brainstorm-v3.1-grok-4-fast-200x", "xAI", "Grok 4 Fast", {}),
-    ("TeichAI/sherlock-thinking-alpha-11000x", "xAI", "Grok 4.1 Fast", {"max_samples": 1500}),
     ("TeichAI/sherlock-dash-alpha-1000x", "xAI", "Grok 4.1 Fast", {}),
     ("TeichAI/sherlock-think-alpha-1000x", "xAI", "Grok 4.1 Fast", {}),
     ("TeichAI/grok-code-fast-1-1000x", "xAI", "Grok Code Fast 1", {}),
     # MoonshotAI
     ("TeichAI/kimi-k2-thinking-250x", "MoonshotAI", "Kimi K2", {}),
     ("TeichAI/kimi-k2-thinking-1000x", "MoonshotAI", "Kimi K2", {}),
     # Mistral
     ("TeichAI/mistral-small-creative-500x", "Mistral", "Mistral Small", {}),
     # MiniMax
-    ("TeichAI/MiniMax-M2.1-Code-SFT", "MiniMax", "MiniMax M2.1", {}),
     ("TeichAI/convo-v1", "MiniMax", "MiniMax M2.1", {}),
     # StepFun
-    ("TeichAI/Step-3.5-Flash-2600x", "StepFun", "Step 3.5 Flash", {"max_samples": 1500}),
     # Zhipu
     ("TeichAI/Pony-Alpha-15k", "Zhipu", "GLM-5", {"max_samples": 1500}),
     # DeepSeek (TeichAI)
     ("TeichAI/deepseek-v3.2-speciale-1000x", "DeepSeek", "DeepSeek V3.2 Speciale", {}),
-    ("TeichAI/deepseek-v3.2-speciale-openr1-math-3k", "DeepSeek", "DeepSeek V3.2 Speciale", {"max_samples": 1500}),
 ]
 # DeepSeek (a-m-team) — different format, handled separately
 DEEPSEEK_AM_DATASETS = [
-    ("a-m-team/AM-DeepSeek-R1-Distilled-1.4M", "DeepSeek", "DeepSeek R1", {"name": "am_0.9M_sample_1k", "max_samples": 1000}),
 ]
 # --- All providers and models ---
 PROVIDERS = [
-    "Anthropic", "OpenAI", "Google", "xAI", "MoonshotAI",
-    "Mistral", "MiniMax", "StepFun", "Zhipu", "DeepSeek"
 ]
 # --- Feature parameters ---
 TFIDF_WORD_PARAMS = {
     "analyzer": "word",
     "ngram_range": (1, 2),
-    "max_features": 20000,
     "sublinear_tf": True,
     "min_df": 3,
 }
 TFIDF_CHAR_PARAMS = {
     "analyzer": "char_wb",
-    "ngram_range": (3, 5),
-    "max_features": 20000,
     "sublinear_tf": True,
     "min_df": 3,
 }
-# --- Train/test split ---
-TEST_SIZE = 0.2
 RANDOM_STATE = 42
 # --- Neural Network ---
-HIDDEN_DIM = 1024
-EMBED_DIM = 256
-DROPOUT = 0.3
-BATCH_SIZE = 2048
-EPOCHS = 50
-EARLY_STOP_PATIENCE = 8
-LEARNING_RATE = 1e-3
-WEIGHT_DECAY = 1e-4

 DATASET_REGISTRY = [
     # Anthropic
     ("TeichAI/claude-4.5-opus-high-reasoning-250x", "Anthropic", "Claude 4.5 Opus", {}),
+    (
+        "TeichAI/claude-sonnet-4.5-high-reasoning-250x",
+        "Anthropic",
+        "Claude Sonnet 4.5",
+        {},
+    ),
+    (
+        "Roman1111111/claude-opus-4.6-10000x",
+        "Anthropic",
+        "Claude Opus 4.6",
+        {"max_samples": 1500},
+    ),
     # OpenAI
     ("TeichAI/gpt-5.2-high-reasoning-250x", "OpenAI", "GPT-5.2", {}),
     ("TeichAI/gpt-5.1-high-reasoning-1000x", "OpenAI", "GPT-5.1", {}),
     ("TeichAI/gpt-5.1-codex-max-1000x", "OpenAI", "GPT-5.1 Codex Max", {}),
     ("TeichAI/gpt-5-codex-250x", "OpenAI", "GPT-5 Codex", {}),
     ("TeichAI/gpt-5-codex-1000x", "OpenAI", "GPT-5 Codex", {}),
     # Google
     ("TeichAI/gemini-3-pro-preview-high-reasoning-1000x", "Google", "Gemini 3 Pro", {}),
     ("TeichAI/gemini-3-pro-preview-high-reasoning-250x", "Google", "Gemini 3 Pro", {}),
+    (
+        "TeichAI/gemini-2.5-flash-11000x",
+        "Google",
+        "Gemini 2.5 Flash",
+        {"max_samples": 1500},
+    ),
     ("TeichAI/Gemini-3-Flash-Preview-VIBE", "Google", "Gemini 3 Flash", {}),
     ("TeichAI/gemini-3-flash-preview-1000x", "Google", "Gemini 3 Flash", {}),
     ("TeichAI/gemini-3-flash-preview-complex-1000x", "Google", "Gemini 3 Flash", {}),
     # xAI
     ("TeichAI/brainstorm-v3.1-grok-4-fast-200x", "xAI", "Grok 4 Fast", {}),
+    (
+        "TeichAI/sherlock-thinking-alpha-11000x",
+        "xAI",
+        "Grok 4.1 Fast",
+        {"max_samples": 1500},
+    ),
     ("TeichAI/sherlock-dash-alpha-1000x", "xAI", "Grok 4.1 Fast", {}),
     ("TeichAI/sherlock-think-alpha-1000x", "xAI", "Grok 4.1 Fast", {}),
     ("TeichAI/grok-code-fast-1-1000x", "xAI", "Grok Code Fast 1", {}),
     # MoonshotAI
     ("TeichAI/kimi-k2-thinking-250x", "MoonshotAI", "Kimi K2", {}),
     ("TeichAI/kimi-k2-thinking-1000x", "MoonshotAI", "Kimi K2", {}),
     # Mistral
     ("TeichAI/mistral-small-creative-500x", "Mistral", "Mistral Small", {}),
     # MiniMax
+    ("TeichAI/MiniMax-M2.1-Code-SFT", "MiniMax", "MiniMax M2.1", {"max_samples": 1500}),
     ("TeichAI/convo-v1", "MiniMax", "MiniMax M2.1", {}),
     # StepFun
+    (
+        "TeichAI/Step-3.5-Flash-2600x",
+        "StepFun",
+        "Step 3.5 Flash",
+        {"max_samples": 1500},
+    ),
     # Zhipu
     ("TeichAI/Pony-Alpha-15k", "Zhipu", "GLM-5", {"max_samples": 1500}),
     # DeepSeek (TeichAI)
     ("TeichAI/deepseek-v3.2-speciale-1000x", "DeepSeek", "DeepSeek V3.2 Speciale", {}),
+    (
+        "TeichAI/deepseek-v3.2-speciale-openr1-math-3k",
+        "DeepSeek",
+        "DeepSeek V3.2 Speciale",
+        {"max_samples": 1500},
+    ),
 ]
 # DeepSeek (a-m-team) — different format, handled separately
 DEEPSEEK_AM_DATASETS = [
+    (
+        "a-m-team/AM-DeepSeek-R1-Distilled-1.4M",
+        "DeepSeek",
+        "DeepSeek R1",
+        {"name": "am_0.9M", "max_samples": 1000},
+    ),
 ]
+# Conversational datasets disabled
+CONVERSATIONAL_DATASETS = []
 # --- All providers and models ---
 PROVIDERS = [
+    "Anthropic",
+    "OpenAI",
+    "Google",
+    "xAI",
+    "MoonshotAI",
+    "Mistral",
+    "MiniMax",
+    "StepFun",
+    "Zhipu",
+    "DeepSeek",
 ]
 # --- Feature parameters ---
 TFIDF_WORD_PARAMS = {
     "analyzer": "word",
     "ngram_range": (1, 2),
+    "max_features": 20,
     "sublinear_tf": True,
     "min_df": 3,
+    "max_df": 0.7,
 }
 TFIDF_CHAR_PARAMS = {
     "analyzer": "char_wb",
+    "ngram_range": (2, 4),
+    "max_features": 20,
     "sublinear_tf": True,
     "min_df": 3,
+    "max_df": 0.7,
+    "smooth_idf": True,
 }
+# Equal samples per provider
+MAX_SAMPLES_PER_PROVIDER = 1000
+# --- Train/val/test split ---
+TEST_SIZE = 0.15
+VAL_SIZE = 0.10
 RANDOM_STATE = 42
 # --- Neural Network ---
+HIDDEN_DIM = 256
+EMBED_DIM = 128
+DROPOUT = 0.7
+BATCH_SIZE = 128
+EPOCHS = 80
+EARLY_STOP_PATIENCE = 25
+LEARNING_RATE = 3e-5
+WEIGHT_DECAY = 8e-2
+LABEL_SMOOTHING = 0.3

inference.py ADDED Viewed

	@@ -0,0 +1,52 @@

+"""
+AIFinder Inference Module
+Load the trained model and predict AI provider
+"""
+import joblib
+import numpy as np
+from config import MODEL_DIR
+class AIFinder:
+    def __init__(self, model_dir=MODEL_DIR):
+        self.models = joblib.load(f"{model_dir}/rf_4provider.joblib")
+        self.pipeline = joblib.load(f"{model_dir}/pipeline_4provider.joblib")
+        self.le = joblib.load(f"{model_dir}/enc_4provider.joblib")
+    def predict(self, text):
+        """Predict the provider for a given text"""
+        X = self.pipeline.transform([text])
+        proba = np.mean([m.predict_proba(X) for m in self.models], axis=0)
+        pred_idx = np.argmax(proba[0])
+        return self.le.classes_[pred_idx]
+    def predict_proba(self, text):
+        """Get prediction probabilities"""
+        X = self.pipeline.transform([text])
+        proba = np.mean([m.predict_proba(X) for m in self.models], axis=0)
+        return dict(zip(self.le.classes_, proba[0]))
+    def predict_with_confidence(self, text):
+        """Predict with confidence score"""
+        proba = self.predict_proba(text)
+        provider = max(proba, key=proba.get)
+        confidence = proba[provider]
+        return provider, confidence
+if __name__ == "__main__":
+    finder = AIFinder()
+    test_texts = [
+        "AI is like a really smart robot helper.",
+        "Yes, coding is one of my stronger skills!",
+        "A lot, depending on what you need.",
+    ]
+    for text in test_texts:
+        provider, conf = finder.predict_with_confidence(text)
+        print(f"Text: {text[:50]}...")
+        print(f"Provider: {provider} (confidence: {conf:.2f})")
+        print()

models/1773449581 (1).png ADDED Viewed

Git LFS Details

SHA256: c4040e312991db4fff1cb3aa18184f4b7477878e5642666748b13b98d60815db
Pointer size: 132 Bytes
Size of remote file: 2.08 MB

models/enc_4provider.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9125a24e56ba5808ed41a62f5d321134e5cc6b23b862679ea15f09ea6dc64985
+size 471

models/pipeline_4provider.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86b1810b0c76194f98e785c24cecb911948f21f3410cf57d43e74def8f967b20
+size 5015

models/rf_4provider.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c24c29e35c11b789f9b1eb5dc562f68d0621a963b598c0b391aaad0b02162a73
+size 44121754

templates/index.html ADDED Viewed

	@@ -0,0 +1,1373 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>AIFinder - Identify AI Responses</title>
+    <style>
+        @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;600&family=Outfit:wght@300;400;500;600;700&display=swap');
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+        :root {
+            --bg-primary: #0d0d0d;
+            --bg-secondary: #171717;
+            --bg-tertiary: #1f1f1f;
+            --bg-elevated: #262626;
+            --text-primary: #f5f5f5;
+            --text-secondary: #a3a3a3;
+            --text-muted: #737373;
+            --accent: #e85d04;
+            --accent-hover: #f48c06;
+            --accent-muted: #9c4300;
+            --success: #22c55e;
+            --success-muted: #166534;
+            --border: #333333;
+            --border-light: #404040;
+        }
+        body {
+            font-family: 'Outfit', -apple-system, sans-serif;
+            background: var(--bg-primary);
+            color: var(--text-primary);
+            min-height: 100vh;
+            line-height: 1.6;
+        }
+        .container {
+            max-width: 900px;
+            margin: 0 auto;
+            padding: 2rem 1.5rem;
+        }
+        header {
+            text-align: center;
+            margin-bottom: 3rem;
+            padding-top: 1rem;
+        }
+        .logo {
+            font-size: 2.5rem;
+            font-weight: 700;
+            letter-spacing: -0.05em;
+            margin-bottom: 0.5rem;
+        }
+        .logo span {
+            color: var(--accent);
+        }
+        .tagline {
+            color: var(--text-secondary);
+            font-size: 1rem;
+            font-weight: 300;
+        }
+        .card {
+            background: var(--bg-secondary);
+            border: 1px solid var(--border);
+            border-radius: 12px;
+            padding: 1.5rem;
+            margin-bottom: 1.5rem;
+            transition: border-color 0.2s ease;
+        }
+        .card:focus-within {
+            border-color: var(--border-light);
+        }
+        .card-label {
+            font-size: 0.75rem;
+            text-transform: uppercase;
+            letter-spacing: 0.1em;
+            color: var(--text-muted);
+            margin-bottom: 0.75rem;
+            font-weight: 500;
+        }
+        textarea {
+            width: 100%;
+            background: var(--bg-tertiary);
+            border: 1px solid var(--border);
+            border-radius: 8px;
+            padding: 1rem;
+            color: var(--text-primary);
+            font-family: 'JetBrains Mono', monospace;
+            font-size: 0.875rem;
+            resize: vertical;
+            min-height: 180px;
+            transition: border-color 0.2s ease;
+        }
+        textarea:focus {
+            outline: none;
+            border-color: var(--accent-muted);
+        }
+        textarea::placeholder {
+            color: var(--text-muted);
+        }
+        .btn {
+            display: inline-flex;
+            align-items: center;
+            justify-content: center;
+            gap: 0.5rem;
+            padding: 0.75rem 1.5rem;
+            border-radius: 8px;
+            font-family: 'Outfit', sans-serif;
+            font-size: 0.9rem;
+            font-weight: 500;
+            cursor: pointer;
+            transition: all 0.2s ease;
+            border: none;
+        }
+        .btn-primary {
+            background: var(--accent);
+            color: white;
+        }
+        .btn-primary:hover:not(:disabled) {
+            background: var(--accent-hover);
+        }
+        .btn-primary:disabled {
+            opacity: 0.5;
+            cursor: not-allowed;
+        }
+        .btn-secondary {
+            background: var(--bg-tertiary);
+            color: var(--text-primary);
+            border: 1px solid var(--border);
+        }
+        .btn-secondary:hover:not(:disabled) {
+            background: var(--bg-elevated);
+            border-color: var(--border-light);
+        }
+        .btn-group {
+            display: flex;
+            gap: 0.75rem;
+            flex-wrap: wrap;
+        }
+        .results {
+            display: none;
+        }
+        .results.visible {
+            display: block;
+            animation: fadeIn 0.3s ease;
+        }
+        @keyframes fadeIn {
+            from { opacity: 0; transform: translateY(10px); }
+            to { opacity: 1; transform: translateY(0); }
+        }
+        .result-main {
+            display: flex;
+            align-items: center;
+            justify-content: space-between;
+            padding: 1.25rem;
+            background: var(--bg-tertiary);
+            border-radius: 8px;
+            margin-bottom: 1rem;
+        }
+        .result-provider {
+            font-size: 1.5rem;
+            font-weight: 600;
+        }
+        .result-confidence {
+            font-size: 1.25rem;
+            font-weight: 500;
+            color: var(--accent);
+        }
+        .result-bar {
+            height: 8px;
+            background: var(--bg-elevated);
+            border-radius: 4px;
+            margin-bottom: 1rem;
+            overflow: hidden;
+        }
+        .result-bar-fill {
+            height: 100%;
+            background: var(--accent);
+            border-radius: 4px;
+            transition: width 0.5s ease;
+        }
+        .result-list {
+            list-style: none;
+        }
+        .result-item {
+            display: flex;
+            align-items: center;
+            justify-content: space-between;
+            padding: 0.75rem 0;
+            border-bottom: 1px solid var(--border);
+        }
+        .result-item:last-child {
+            border-bottom: none;
+        }
+        .result-name {
+            font-weight: 500;
+        }
+        .result-percent {
+            font-family: 'JetBrains Mono', monospace;
+            color: var(--text-secondary);
+            font-size: 0.875rem;
+        }
+        .correction {
+            display: none;
+            margin-top: 1.5rem;
+            padding-top: 1.5rem;
+            border-top: 1px solid var(--border);
+        }
+        .correction.visible {
+            display: block;
+            animation: fadeIn 0.3s ease;
+        }
+        .correction-title {
+            font-size: 0.875rem;
+            font-weight: 500;
+            margin-bottom: 0.75rem;
+            color: var(--text-secondary);
+        }
+        select {
+            width: 100%;
+            padding: 0.75rem 1rem;
+            background: var(--bg-tertiary);
+            border: 1px solid var(--border);
+            border-radius: 8px;
+            color: var(--text-primary);
+            font-family: 'Outfit', sans-serif;
+            font-size: 0.9rem;
+            margin-bottom: 0.75rem;
+            cursor: pointer;
+        }
+        select:focus {
+            outline: none;
+            border-color: var(--accent-muted);
+        }
+        .stats {
+            display: flex;
+            gap: 1.5rem;
+            margin-bottom: 1.5rem;
+            flex-wrap: wrap;
+        }
+        .stat {
+            background: var(--bg-secondary);
+            border: 1px solid var(--border);
+            border-radius: 8px;
+            padding: 1rem 1.25rem;
+            flex: 1;
+            min-width: 120px;
+        }
+        .stat-value {
+            font-size: 1.5rem;
+            font-weight: 600;
+            color: var(--accent);
+        }
+        .stat-label {
+            font-size: 0.75rem;
+            color: var(--text-muted);
+            text-transform: uppercase;
+            letter-spacing: 0.05em;
+        }
+        .actions {
+            display: flex;
+            gap: 0.75rem;
+            margin-top: 1rem;
+        }
+        .toast {
+            position: fixed;
+            bottom: 2rem;
+            right: 2rem;
+            background: var(--bg-elevated);
+            border: 1px solid var(--border);
+            border-radius: 8px;
+            padding: 1rem 1.5rem;
+            color: var(--text-primary);
+            font-size: 0.9rem;
+            opacity: 0;
+            transform: translateY(20px);
+            transition: all 0.3s ease;
+            z-index: 1000;
+        }
+        .toast.visible {
+            opacity: 1;
+            transform: translateY(0);
+        }
+        .toast.success {
+            border-color: var(--success-muted);
+        }
+        .footer {
+            text-align: center;
+            margin-top: 3rem;
+            padding: 1.5rem;
+            color: var(--text-muted);
+            font-size: 0.8rem;
+        }
+        .footer a {
+            color: var(--text-secondary);
+            text-decoration: none;
+        }
+        .footer a:hover {
+            color: var(--accent);
+        }
+        .loading {
+            display: inline-block;
+            width: 16px;
+            height: 16px;
+            border: 2px solid var(--text-muted);
+            border-top-color: var(--accent);
+            border-radius: 50%;
+            animation: spin 0.8s linear infinite;
+        }
+        @keyframes spin {
+            to { transform: rotate(360deg); }
+        }
+        .status-indicator {
+            display: inline-flex;
+            align-items: center;
+            gap: 0.5rem;
+            font-size: 0.8rem;
+            color: var(--text-muted);
+            margin-bottom: 1rem;
+        }
+        .status-dot {
+            width: 8px;
+            height: 8px;
+            border-radius: 50%;
+            background: var(--success);
+        }
+        .status-dot.loading {
+            background: var(--accent);
+            animation: pulse 1s ease infinite;
+        }
+        @keyframes pulse {
+            0%, 100% { opacity: 1; }
+            50% { opacity: 0.5; }
+        }
+        .empty-state {
+            text-align: center;
+            padding: 3rem 1rem;
+            color: var(--text-muted);
+        }
+        .empty-state-icon {
+            font-size: 3rem;
+            margin-bottom: 1rem;
+            opacity: 0.5;
+        }
+        /* ── Tabs ── */
+        .tabs {
+            display: flex;
+            gap: 0;
+            margin-bottom: 2rem;
+            border-bottom: 1px solid var(--border);
+        }
+        .tab {
+            padding: 0.75rem 1.5rem;
+            font-family: 'Outfit', sans-serif;
+            font-size: 0.9rem;
+            font-weight: 500;
+            color: var(--text-muted);
+            background: none;
+            border: none;
+            border-bottom: 2px solid transparent;
+            cursor: pointer;
+            transition: all 0.2s ease;
+        }
+        .tab:hover {
+            color: var(--text-secondary);
+        }
+        .tab.active {
+            color: var(--accent);
+            border-bottom-color: var(--accent);
+        }
+        .tab-content {
+            display: none;
+        }
+        .tab-content.active {
+            display: block;
+            animation: fadeIn 0.3s ease;
+        }
+        /* ── API Docs ── */
+        .docs-section {
+            margin-bottom: 2rem;
+        }
+        .docs-section h2 {
+            font-size: 1.25rem;
+            font-weight: 600;
+            margin-bottom: 0.75rem;
+            color: var(--text-primary);
+        }
+        .docs-section h3 {
+            font-size: 1rem;
+            font-weight: 500;
+            margin-top: 1.25rem;
+            margin-bottom: 0.5rem;
+            color: var(--text-secondary);
+        }
+        .docs-section p {
+            color: var(--text-secondary);
+            font-size: 0.9rem;
+            margin-bottom: 0.75rem;
+            line-height: 1.7;
+        }
+        .docs-endpoint {
+            display: inline-flex;
+            align-items: center;
+            gap: 0.5rem;
+            background: var(--bg-tertiary);
+            border: 1px solid var(--border);
+            border-radius: 6px;
+            padding: 0.5rem 1rem;
+            margin-bottom: 1rem;
+            font-family: 'JetBrains Mono', monospace;
+            font-size: 0.85rem;
+        }
+        .docs-method {
+            color: var(--success);
+            font-weight: 600;
+        }
+        .docs-path {
+            color: var(--text-primary);
+        }
+        .docs-badge {
+            display: inline-block;
+            font-size: 0.7rem;
+            font-weight: 600;
+            text-transform: uppercase;
+            letter-spacing: 0.05em;
+            padding: 0.2rem 0.6rem;
+            border-radius: 4px;
+            margin-left: 0.5rem;
+        }
+        .docs-badge.free {
+            background: var(--success-muted);
+            color: var(--success);
+        }
+        .docs-badge.limit {
+            background: var(--accent-muted);
+            color: var(--accent-hover);
+        }
+        .docs-code-block {
+            position: relative;
+            background: var(--bg-tertiary);
+            border: 1px solid var(--border);
+            border-radius: 8px;
+            margin-bottom: 1rem;
+            overflow: hidden;
+        }
+        .docs-code-header {
+            display: flex;
+            align-items: center;
+            justify-content: space-between;
+            padding: 0.5rem 1rem;
+            background: var(--bg-elevated);
+            border-bottom: 1px solid var(--border);
+            font-size: 0.75rem;
+            color: var(--text-muted);
+            text-transform: uppercase;
+            letter-spacing: 0.05em;
+        }
+        .docs-copy-btn {
+            background: none;
+            border: 1px solid var(--border);
+            border-radius: 4px;
+            color: var(--text-muted);
+            font-size: 0.7rem;
+            padding: 0.2rem 0.5rem;
+            cursor: pointer;
+            font-family: 'Outfit', sans-serif;
+            transition: all 0.2s ease;
+        }
+        .docs-copy-btn:hover {
+            color: var(--text-primary);
+            border-color: var(--border-light);
+        }
+        .docs-code-block pre {
+            padding: 1rem;
+            overflow-x: auto;
+            font-family: 'JetBrains Mono', monospace;
+            font-size: 0.8rem;
+            line-height: 1.6;
+            color: var(--text-primary);
+            margin: 0;
+        }
+        .docs-table {
+            width: 100%;
+            border-collapse: collapse;
+            font-size: 0.85rem;
+            margin-bottom: 1rem;
+        }
+        .docs-table th {
+            text-align: left;
+            padding: 0.6rem 0.75rem;
+            background: var(--bg-elevated);
+            color: var(--text-secondary);
+            font-weight: 500;
+            border-bottom: 1px solid var(--border);
+            font-size: 0.75rem;
+            text-transform: uppercase;
+            letter-spacing: 0.05em;
+        }
+        .docs-table td {
+            padding: 0.6rem 0.75rem;
+            border-bottom: 1px solid var(--border);
+            color: var(--text-secondary);
+        }
+        .docs-table tr:last-child td {
+            border-bottom: none;
+        }
+        .docs-table code {
+            font-family: 'JetBrains Mono', monospace;
+            font-size: 0.8rem;
+            background: var(--bg-tertiary);
+            padding: 0.15rem 0.4rem;
+            border-radius: 3px;
+            color: var(--accent-hover);
+        }
+        .docs-warning {
+            background: rgba(232, 93, 4, 0.08);
+            border: 1px solid var(--accent-muted);
+            border-radius: 8px;
+            padding: 1rem 1.25rem;
+            margin-bottom: 1rem;
+            font-size: 0.85rem;
+            color: var(--text-secondary);
+            line-height: 1.7;
+        }
+        .docs-warning strong {
+            color: var(--accent-hover);
+        }
+        .docs-inline-code {
+            font-family: 'JetBrains Mono', monospace;
+            font-size: 0.8rem;
+            background: var(--bg-tertiary);
+            padding: 0.15rem 0.4rem;
+            border-radius: 3px;
+            color: var(--accent-hover);
+        }
+        .docs-try-it {
+            background: var(--bg-tertiary);
+            border: 1px solid var(--border);
+            border-radius: 8px;
+            padding: 1.25rem;
+            margin-top: 1rem;
+        }
+        .docs-try-it textarea {
+            min-height: 100px;
+            margin-bottom: 0.75rem;
+        }
+        .docs-try-output {
+            background: var(--bg-primary);
+            border: 1px solid var(--border);
+            border-radius: 6px;
+            padding: 1rem;
+            font-family: 'JetBrains Mono', monospace;
+            font-size: 0.8rem;
+            color: var(--text-secondary);
+            white-space: pre-wrap;
+            word-break: break-word;
+            max-height: 300px;
+            overflow-y: auto;
+            display: none;
+        }
+        .docs-try-output.visible {
+            display: block;
+            animation: fadeIn 0.3s ease;
+        }
+        @media (max-width: 600px) {
+            .container {
+                padding: 1rem;
+            }
+            .logo {
+                font-size: 2rem;
+            }
+            .btn-group {
+                flex-direction: column;
+            }
+            .btn {
+                width: 100%;
+            }
+            .result-main {
+                flex-direction: column;
+                gap: 0.5rem;
+                text-align: center;
+            }
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <header>
+            <div class="logo">AI<span>Finder</span></div>
+            <p class="tagline">Identify which AI provider generated a response</p>
+        </header>
+        <div class="tabs">
+            <button class="tab active" data-tab="classify">Classify</button>
+            <button class="tab" data-tab="docs">API Docs</button>
+        </div>
+        <!-- ═══ Classify Tab ═══ -->
+        <div class="tab-content active" id="tab-classify">
+            <div class="status-indicator">
+                <span class="status-dot" id="statusDot"></span>
+                <span id="statusText">Connecting to API...</span>
+            </div>
+            <div class="card">
+                <div class="card-label">Paste AI Response</div>
+                <textarea id="inputText" placeholder="Paste an AI response here to identify which provider generated it..."></textarea>
+            </div>
+            <div class="btn-group">
+                <button class="btn btn-primary" id="classifyBtn" disabled>
+                    <span id="classifyBtnText">Classify</span>
+                </button>
+                <button class="btn btn-secondary" id="clearBtn">Clear</button>
+            </div>
+            <div class="results" id="results">
+                <div class="card">
+                    <div class="card-label">Result</div>
+                    <div class="result-main">
+                        <span class="result-provider" id="resultProvider">-</span>
+                        <span class="result-confidence" id="resultConfidence">-</span>
+                    </div>
+                    <div class="result-bar">
+                        <div class="result-bar-fill" id="resultBar" style="width: 0%"></div>
+                    </div>
+                    <ul class="result-list" id="resultList"></ul>
+                </div>
+                <div class="correction" id="correction">
+                    <div class="correction-title">Wrong? Correct the provider to train the model:</div>
+                    <select id="providerSelect"></select>
+                    <button class="btn btn-primary" id="trainBtn">Train & Save</button>
+                </div>
+            </div>
+            <div class="stats" id="stats" style="display: none;">
+                <div class="stat">
+                    <div class="stat-value" id="correctionsCount">0</div>
+                    <div class="stat-label">Corrections</div>
+                </div>
+                <div class="stat">
+                    <div class="stat-value" id="sessionCount">0</div>
+                    <div class="stat-label">Session</div>
+                </div>
+            </div>
+            <div class="actions" id="actions" style="display: none;">
+                <button class="btn btn-secondary" id="exportBtn">Export Trained Model</button>
+                <button class="btn btn-secondary" id="communityBtn" style="display:none;">Use Community Model</button>
+                <button class="btn btn-secondary" id="resetBtn">Reset Training</button>
+            </div>
+            <div id="communityWarning" style="display:none; margin-top:1rem; background:rgba(232,93,4,0.12); border:1px solid var(--accent-muted); border-radius:8px; padding:1rem 1.25rem; font-size:0.85rem; color:var(--text-secondary); line-height:1.7;">
+                ⚠️ <strong style="color:var(--accent-hover);">Community Model Active</strong> — This is a community-trained version. It could be <strong style="color:var(--accent-hover);">VERY wrong</strong>. Results may be unreliable. Use at your own risk.
+            </div>
+        </div>
+        <!-- ═══ API Docs Tab ═══ -->
+        <div class="tab-content" id="tab-docs">
+            <div class="docs-section">
+                <h2>Public Classification API</h2>
+                <p>
+                    AIFinder exposes a free, public endpoint for programmatic classification.
+                    No API key required.
+                </p>
+                <div>
+                    <div class="docs-endpoint">
+                        <span class="docs-method">POST</span>
+                        <span class="docs-path">/v1/classify</span>
+                    </div>
+                    <span class="docs-badge free">No API Key</span>
+                    <span class="docs-badge limit">60 req/min</span>
+                </div>
+            </div>
+            <!-- ── Request ── -->
+            <div class="docs-section">
+                <h2>Request</h2>
+                <p>Send a JSON body with <span class="docs-inline-code">Content-Type: application/json</span>.</p>
+                <table class="docs-table">
+                    <thead>
+                        <tr><th>Field</th><th>Type</th><th>Required</th><th>Description</th></tr>
+                    </thead>
+                    <tbody>
+                        <tr>
+                            <td><code>text</code></td>
+                            <td>string</td>
+                            <td>Yes</td>
+                            <td>The AI-generated text to classify (min 20 chars)</td>
+                        </tr>
+                        <tr>
+                            <td><code>top_n</code></td>
+                            <td>integer</td>
+                            <td>No</td>
+                            <td>Number of results to return (default: <strong>5</strong>)</td>
+                        </tr>
+                    </tbody>
+                </table>
+                <div class="docs-warning">
+                    <strong>⚠️ Strip thought tags!</strong><br>
+                    Many reasoning models wrap chain-of-thought in
+                    <span class="docs-inline-code">&lt;think&gt;…&lt;/think&gt;</span> or
+                    <span class="docs-inline-code">&lt;thinking&gt;…&lt;/thinking&gt;</span> blocks.
+                    These confuse the classifier. The API strips them automatically, but you should
+                    remove them on your side too to save bandwidth.
+                </div>
+            </div>
+            <!-- ── Response ── -->
+            <div class="docs-section">
+                <h2>Response</h2>
+                <div class="docs-code-block">
+                    <div class="docs-code-header">
+                        <span>JSON</span>
+                        <button class="docs-copy-btn" onclick="copyCode(this)">Copy</button>
+                    </div>
+                    <pre>{
+  "provider": "Anthropic",
+  "confidence": 87.42,
+  "top_providers": [
+    { "name": "Anthropic", "confidence": 87.42 },
+    { "name": "OpenAI",    "confidence": 6.15  },
+    { "name": "Google",    "confidence": 3.28  },
+    { "name": "xAI",       "confidence": 1.74  },
+    { "name": "DeepSeek",  "confidence": 0.89  }
+  ]
+}</pre>
+                </div>
+                <table class="docs-table">
+                    <thead>
+                        <tr><th>Field</th><th>Type</th><th>Description</th></tr>
+                    </thead>
+                    <tbody>
+                        <tr>
+                            <td><code>provider</code></td>
+                            <td>string</td>
+                            <td>Best-matching provider name</td>
+                        </tr>
+                        <tr>
+                            <td><code>confidence</code></td>
+                            <td>float</td>
+                            <td>Confidence % for the top provider</td>
+                        </tr>
+                        <tr>
+                            <td><code>top_providers</code></td>
+                            <td>array</td>
+                            <td>Ranked list of <code>{ name, confidence }</code> objects</td>
+                        </tr>
+                    </tbody>
+                </table>
+            </div>
+            <!-- ── Errors ── -->
+            <div class="docs-section">
+                <h2>Errors</h2>
+                <table class="docs-table">
+                    <thead>
+                        <tr><th>Status</th><th>Meaning</th></tr>
+                    </thead>
+                    <tbody>
+                        <tr><td><code>400</code></td><td>Missing <code>text</code> field or text shorter than 20 characters</td></tr>
+                        <tr><td><code>429</code></td><td>Rate limit exceeded (60 requests/minute per IP)</td></tr>
+                    </tbody>
+                </table>
+            </div>
+            <!-- ── Code Examples ── -->
+            <div class="docs-section">
+                <h2>Code Examples</h2>
+                <h3>cURL</h3>
+                <div class="docs-code-block">
+                    <div class="docs-code-header">
+                        <span>Bash</span>
+                        <button class="docs-copy-btn" onclick="copyCode(this)">Copy</button>
+                    </div>
+                    <pre>curl -X POST https://huggingface.co/spaces/CompactAI/AIFinder/v1/classify \
+  -H "Content-Type: application/json" \
+  -d '{
+    "text": "I would be happy to help you with that! Here is a detailed explanation of how neural networks work...",
+    "top_n": 5
+  }'</pre>
+                </div>
+                <h3>Python</h3>
+                <div class="docs-code-block">
+                    <div class="docs-code-header">
+                        <span>Python</span>
+                        <button class="docs-copy-btn" onclick="copyCode(this)">Copy</button>
+                    </div>
+                    <pre>import re
+import requests
+API_URL = "https://huggingface.co/spaces/CompactAI/AIFinder/v1/classify"
+def strip_think_tags(text):
+    """Remove &lt;think&gt;/&lt;thinking&gt; blocks before classifying."""
+    return re.sub(r"&lt;think(?:ing)?&gt;.*?&lt;/think(?:ing)?&gt;",
+                  "", text, flags=re.DOTALL).strip()
+text = """I'd be happy to help! Neural networks are
+computational models inspired by the human brain..."""
+# Strip thought tags first (the API does this too,
+# but saves bandwidth to do it client-side)
+cleaned = strip_think_tags(text)
+response = requests.post(API_URL, json={
+    "text": cleaned,
+    "top_n": 5
+})
+data = response.json()
+print(f"Provider: {data['provider']}  ({data['confidence']:.1f}%)")
+for p in data["top_providers"]:
+    print(f"  {p['name']:&lt;20s} {p['confidence']:5.1f}%")</pre>
+                </div>
+                <h3>JavaScript (fetch)</h3>
+                <div class="docs-code-block">
+                    <div class="docs-code-header">
+                        <span>JavaScript</span>
+                        <button class="docs-copy-btn" onclick="copyCode(this)">Copy</button>
+                    </div>
+                    <pre>const API_URL = "https://huggingface.co/spaces/CompactAI/AIFinder/v1/classify";
+function stripThinkTags(text) {
+  return text.replace(/&lt;think(?:ing)?&gt;[\s\S]*?&lt;\/think(?:ing)?&gt;/g, "").trim();
+}
+async function classify(text, topN = 5) {
+  const cleaned = stripThinkTags(text);
+  const res = await fetch(API_URL, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({ text: cleaned, top_n: topN })
+  });
+  return res.json();
+}
+// Usage
+classify("I'd be happy to help you understand...")
+  .then(data =&gt; {
+    console.log(`Provider: ${data.provider} (${data.confidence}%)`);
+    data.top_providers.forEach(p =&gt;
+      console.log(`  ${p.name}: ${p.confidence}%`)
+    );
+  });</pre>
+                </div>
+                <h3>Node.js</h3>
+                <div class="docs-code-block">
+                    <div class="docs-code-header">
+                        <span>JavaScript (Node)</span>
+                        <button class="docs-copy-btn" onclick="copyCode(this)">Copy</button>
+                    </div>
+                    <pre>const API_URL = "https://huggingface.co/spaces/CompactAI/AIFinder/v1/classify";
+async function classify(text, topN = 5) {
+  const cleaned = text
+    .replace(/&lt;think(?:ing)?&gt;[\s\S]*?&lt;\/think(?:ing)?&gt;/g, "")
+    .trim();
+  const res = await fetch(API_URL, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({ text: cleaned, top_n: topN })
+  });
+  if (!res.ok) {
+    const err = await res.json();
+    throw new Error(err.error || `HTTP ${res.status}`);
+  }
+  return res.json();
+}
+// Example
+(async () =&gt; {
+  const result = await classify(
+    "Let me think about this step by step...",
+    3
+  );
+  console.log(result);
+})();</pre>
+                </div>
+            </div>
+            <!-- ── Try It ── -->
+            <div class="docs-section">
+                <h2>Try It</h2>
+                <p>Test the API right here — paste any AI-generated text and hit Send.</p>
+                <div class="docs-try-it">
+                    <textarea id="docsTestInput" placeholder="Paste AI-generated text here..."></textarea>
+                    <div class="btn-group">
+                        <button class="btn btn-primary" id="docsTestBtn">Send Request</button>
+                    </div>
+                    <div class="docs-try-output" id="docsTestOutput"></div>
+                </div>
+            </div>
+            <!-- ── Providers ── -->
+            <div class="docs-section">
+                <h2>Supported Providers</h2>
+                <p>The classifier currently supports these providers:</p>
+                <div id="docsProviderList" style="display: flex; flex-wrap: wrap; gap: 0.5rem; margin-top: 0.5rem;"></div>
+            </div>
+        </div>
+        <div class="footer">
+            <p>AIFinder &mdash; Train on corrections to improve accuracy</p>
+            <p style="margin-top: 0.5rem;">
+                Want to contribute? Test this and post to the
+                <a href="https://huggingface.co/spaces" target="_blank">HuggingFace Spaces Community</a>
+                if you want it merged!
+            </p>
+        </div>
+    </div>
+    <div class="toast" id="toast"></div>
+    <script>
+        const API_BASE = window.location.hostname === 'localhost' || window.location.hostname === '127.0.0.1'
+            ? 'http://localhost:7860'
+            : '';
+        let providers = [];
+        let correctionsCount = 0;
+        let sessionCorrections = 0;
+        const inputText = document.getElementById('inputText');
+        const classifyBtn = document.getElementById('classifyBtn');
+        const classifyBtnText = document.getElementById('classifyBtnText');
+        const clearBtn = document.getElementById('clearBtn');
+        const results = document.getElementById('results');
+        const resultProvider = document.getElementById('resultProvider');
+        const resultConfidence = document.getElementById('resultConfidence');
+        const resultBar = document.getElementById('resultBar');
+        const resultList = document.getElementById('resultList');
+        const correction = document.getElementById('correction');
+        const providerSelect = document.getElementById('providerSelect');
+        const trainBtn = document.getElementById('trainBtn');
+        const stats = document.getElementById('stats');
+        const correctionsCountEl = document.getElementById('correctionsCount');
+        const sessionCountEl = document.getElementById('sessionCount');
+        const actions = document.getElementById('actions');
+        const exportBtn = document.getElementById('exportBtn');
+        const communityBtn = document.getElementById('communityBtn');
+        const communityWarning = document.getElementById('communityWarning');
+        const resetBtn = document.getElementById('resetBtn');
+        const toast = document.getElementById('toast');
+        const statusDot = document.getElementById('statusDot');
+        const statusText = document.getElementById('statusText');
+        let usingCommunity = false;
+        function showToast(message, type = 'info') {
+            toast.textContent = message;
+            toast.className = 'toast visible' + (type === 'success' ? ' success' : '');
+            setTimeout(() => {
+                toast.classList.remove('visible');
+            }, 3000);
+        }
+        async function checkStatus() {
+            try {
+                const res = await fetch(`${API_BASE}/api/status`);
+                const data = await res.json();
+                if (data.loaded) {
+                    statusDot.classList.remove('loading');
+                    statusText.textContent = data.using_community ? 'Ready — Community Model (cpu)' : `Ready (${data.device})`;
+                    classifyBtn.disabled = false;
+                    usingCommunity = data.using_community;
+                    updateCommunityUI(data.community_available);
+                    if (data.corrections_count > 0) {
+                        correctionsCount = data.corrections_count;
+                        correctionsCountEl.textContent = correctionsCount;
+                        stats.style.display = 'flex';
+                        actions.style.display = 'flex';
+                    }
+                    loadProviders();
+                    loadStats();
+                } else {
+                    setTimeout(checkStatus, 1000);
+                }
+            } catch (e) {
+                statusDot.classList.add('loading');
+                statusText.textContent = 'Connecting to API...';
+                setTimeout(checkStatus, 2000);
+            }
+        }
+        function updateCommunityUI(available) {
+            if (available) {
+                communityBtn.style.display = '';
+                communityBtn.textContent = usingCommunity ? 'Use Official Model' : 'Use Community Model';
+                communityWarning.style.display = usingCommunity ? 'block' : 'none';
+                actions.style.display = 'flex';
+            } else {
+                communityBtn.style.display = 'none';
+                communityWarning.style.display = 'none';
+            }
+        }
+        async function loadProviders() {
+            const res = await fetch(`${API_BASE}/api/providers`);
+            const data = await res.json();
+            providers = data.providers;
+            providerSelect.innerHTML = providers.map(p =>
+                `<option value="${p}">${p}</option>`
+            ).join('');
+        }
+        function loadStats() {
+            const saved = localStorage.getItem('aifinder_corrections');
+            if (saved) {
+                correctionsCount = parseInt(saved, 10);
+                correctionsCountEl.textContent = correctionsCount;
+                stats.style.display = 'flex';
+                actions.style.display = 'flex';
+            }
+            sessionCountEl.textContent = sessionCorrections;
+        }
+        function saveStats() {
+            localStorage.setItem('aifinder_corrections', correctionsCount.toString());
+        }
+        async function classify() {
+            const text = inputText.value.trim();
+            if (text.length < 20) {
+                showToast('Text must be at least 20 characters');
+                return;
+            }
+            classifyBtn.disabled = true;
+            classifyBtnText.innerHTML = '<span class="loading"></span>';
+            try {
+                const res = await fetch(`${API_BASE}/api/classify`, {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ text })
+                });
+                if (!res.ok) {
+                    throw new Error('Classification failed');
+                }
+                const data = await res.json();
+                showResults(data);
+            } catch (e) {
+                showToast('Error: ' + e.message);
+            } finally {
+                classifyBtn.disabled = false;
+                classifyBtnText.textContent = 'Classify';
+            }
+        }
+        function showResults(data) {
+            resultProvider.textContent = data.provider;
+            resultConfidence.textContent = data.confidence.toFixed(1) + '%';
+            resultBar.style.width = data.confidence + '%';
+            resultList.innerHTML = data.top_providers.map(p => `
+                <li class="result-item">
+                    <span class="result-name">${p.name}</span>
+                    <span class="result-percent">${p.confidence.toFixed(1)}%</span>
+                </li>
+            `).join('');
+            providerSelect.value = data.provider;
+            results.classList.add('visible');
+            correction.classList.add('visible');
+            if (correctionsCount > 0 || sessionCorrections > 0) {
+                stats.style.display = 'flex';
+                actions.style.display = 'flex';
+            }
+        }
+        async function train() {
+            const text = inputText.value.trim();
+            const correctProvider = providerSelect.value;
+            trainBtn.disabled = true;
+            trainBtn.innerHTML = '<span class="loading"></span>';
+            try {
+                const res = await fetch(`${API_BASE}/api/correct`, {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ text, correct_provider: correctProvider })
+                });
+                if (!res.ok) {
+                    throw new Error('Training failed');
+                }
+                const data = await res.json();
+                correctionsCount = data.corrections || correctionsCount + 1;
+                sessionCorrections++;
+                saveStats();
+                correctionsCountEl.textContent = correctionsCount;
+                sessionCountEl.textContent = sessionCorrections;
+                showToast('Correction saved & community model retrained!', 'success');
+                stats.style.display = 'flex';
+                actions.style.display = 'flex';
+                updateCommunityUI(true);
+                classify();
+            } catch (e) {
+                showToast('Error: ' + e.message);
+            } finally {
+                trainBtn.disabled = false;
+                trainBtn.textContent = 'Train & Save';
+            }
+        }
+        async function exportModel() {
+            exportBtn.disabled = true;
+            exportBtn.innerHTML = '<span class="loading"></span>';
+            try {
+                const res = await fetch(`${API_BASE}/api/save`, {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ filename: 'aifinder_trained.pt' })
+                });
+                if (!res.ok) {
+                    throw new Error('Save failed');
+                }
+                const data = await res.json();
+                const link = document.createElement('a');
+                link.href = `${API_BASE}/models/${data.filename}`;
+                link.download = data.filename;
+                link.click();
+                showToast('Model exported!', 'success');
+            } catch (e) {
+                showToast('Error: ' + e.message);
+            } finally {
+                exportBtn.disabled = false;
+                exportBtn.textContent = 'Export Trained Model';
+            }
+        }
+        function resetTraining() {
+            if (!confirm('Reset all training data? This cannot be undone.')) {
+                return;
+            }
+            correctionsCount = 0;
+            sessionCorrections = 0;
+            localStorage.removeItem('aifinder_corrections');
+            correctionsCountEl.textContent = '0';
+            sessionCountEl.textContent = '0';
+            stats.style.display = 'none';
+            actions.style.display = 'none';
+            showToast('Training data reset');
+        }
+        classifyBtn.addEventListener('click', classify);
+        clearBtn.addEventListener('click', () => {
+            inputText.value = '';
+            results.classList.remove('visible');
+            correction.classList.remove('visible');
+        });
+        trainBtn.addEventListener('click', train);
+        exportBtn.addEventListener('click', exportModel);
+        resetBtn.addEventListener('click', resetTraining);
+        communityBtn.addEventListener('click', async () => {
+            communityBtn.disabled = true;
+            try {
+                const res = await fetch(`${API_BASE}/api/toggle_community`, {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ enabled: !usingCommunity })
+                });
+                const data = await res.json();
+                usingCommunity = data.using_community;
+                updateCommunityUI(data.available);
+                statusText.textContent = usingCommunity ? 'Ready — Community Model (cpu)' : 'Ready (cpu)';
+                showToast(usingCommunity ? 'Switched to community model' : 'Switched to official model', 'success');
+            } catch (e) {
+                showToast('Error: ' + e.message);
+            } finally {
+                communityBtn.disabled = false;
+            }
+        });
+        inputText.addEventListener('keydown', (e) => {
+            if (e.key === 'Enter' && e.ctrlKey) {
+                classify();
+            }
+        });
+        // ── Tab switching ──
+        document.querySelectorAll('.tab').forEach(tab => {
+            tab.addEventListener('click', () => {
+                document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
+                document.querySelectorAll('.tab-content').forEach(c => c.classList.remove('active'));
+                tab.classList.add('active');
+                document.getElementById('tab-' + tab.dataset.tab).classList.add('active');
+            });
+        });
+        // ── Copy button for code blocks ──
+        function copyCode(btn) {
+            const pre = btn.closest('.docs-code-block').querySelector('pre');
+            navigator.clipboard.writeText(pre.textContent).then(() => {
+                btn.textContent = 'Copied!';
+                setTimeout(() => { btn.textContent = 'Copy'; }, 1500);
+            });
+        }
+        // ── Docs: populate provider badges ──
+        function populateDocsProviders() {
+            const list = document.getElementById('docsProviderList');
+            if (!list || !providers.length) return;
+            list.innerHTML = providers.map(p =>
+                `<span class="docs-inline-code" style="padding:0.3rem 0.75rem;">${p}</span>`
+            ).join('');
+        }
+        // ── Docs: "Try It" live tester ──
+        const docsTestBtn = document.getElementById('docsTestBtn');
+        const docsTestInput = document.getElementById('docsTestInput');
+        const docsTestOutput = document.getElementById('docsTestOutput');
+        if (docsTestBtn) {
+            docsTestBtn.addEventListener('click', async () => {
+                const text = docsTestInput.value.trim();
+                if (text.length < 20) {
+                    docsTestOutput.textContent = '{"error": "Text too short (minimum 20 characters)"}';
+                    docsTestOutput.classList.add('visible');
+                    return;
+                }
+                docsTestBtn.disabled = true;
+                docsTestBtn.innerHTML = '<span class="loading"></span>';
+                try {
+                    const res = await fetch(`${API_BASE}/v1/classify`, {
+                        method: 'POST',
+                        headers: { 'Content-Type': 'application/json' },
+                        body: JSON.stringify({ text, top_n: 5 })
+                    });
+                    const data = await res.json();
+                    docsTestOutput.textContent = JSON.stringify(data, null, 2);
+                } catch (e) {
+                    docsTestOutput.textContent = `{"error": "${e.message}"}`;
+                }
+                docsTestOutput.classList.add('visible');
+                docsTestBtn.disabled = false;
+                docsTestBtn.textContent = 'Send Request';
+            });
+        }
+        // Hook provider list population into the existing load flow
+        const _origLoadProviders = loadProviders;
+        loadProviders = async function() {
+            await _origLoadProviders();
+            populateDocsProviders();
+        };
+        checkStatus();
+    </script>
+</body>
+</html>