AIGENCY-V4-Demo / app.py
aigencydev's picture
Add abuse mitigation: 10 msg/min, 50 msg/session, 2000 char limit, 5000 daily global cap
b530bea verified
"""AIGENCY V4 β€” Interactive demo Space.
This Gradio app proxies user prompts to the eCloud production AIGENCY V4 API.
Setup (when deploying to HuggingFace Spaces):
1. Add `AIGENCY_API_TOKEN` as a Space secret (Space settings β†’ Variables and secrets).
2. Optionally add `AIGENCY_API_BASE` (default https://aigency.dev/api/v2)
`AIGENCY_ASSISTANT_ID` (default 277)
`AIGENCY_ASSISTANT_SLUG` (default alparslan-v4).
The Space supports text and image-with-text input (one image per request,
≀ 30 MB, image/* MIME).
"""
from __future__ import annotations
import io
import os
import time
import threading
from collections import deque
from datetime import datetime, timezone
import gradio as gr
import requests
from PIL import Image
# ── Config ─────────────────────────────────────────────────────────
API_BASE = os.environ.get("AIGENCY_API_BASE", "https://aigency.dev/api/v2")
API_TOKEN = os.environ.get("AIGENCY_API_TOKEN", "") # set via Space secret
ASSISTANT_ID = int(os.environ.get("AIGENCY_ASSISTANT_ID", "277"))
ASSISTANT_SLUG = os.environ.get("AIGENCY_ASSISTANT_SLUG", "alparslan-v4")
TIMEOUT = 60 # seconds
DEMO_BANNER = (
"AIGENCY V4 β€” sovereign, multimodal, Turkish-first AI Β· "
"128B parameters Β· 278K context Β· KVKK-resident"
)
# ── Abuse mitigation knobs ─────────────────────────────────────────
RATE_PER_MIN_PER_SESSION = int(os.environ.get("RATE_PER_MIN", "10"))
MAX_PER_SESSION = int(os.environ.get("MAX_PER_SESSION", "50"))
MAX_PROMPT_CHARS = int(os.environ.get("MAX_PROMPT_CHARS", "2000"))
DAILY_CAP_GLOBAL = int(os.environ.get("DAILY_CAP", "5000"))
# ── Global daily counter (thread-safe) ─────────────────────────────
_global_lock = threading.Lock()
_global_counter = {"date": "", "count": 0}
def _utc_today() -> str:
return datetime.now(timezone.utc).strftime("%Y-%m-%d")
def _check_and_inc_daily() -> tuple[bool, int]:
"""Increment the global daily counter; return (allowed, current_count)."""
today = _utc_today()
with _global_lock:
if _global_counter["date"] != today:
_global_counter["date"] = today
_global_counter["count"] = 0
if _global_counter["count"] >= DAILY_CAP_GLOBAL:
return False, _global_counter["count"]
_global_counter["count"] += 1
return True, _global_counter["count"]
PLACEHOLDER_MSG = (
"πŸ”’ The interactive chat is being activated.\n\n"
"While the demo is finalised, you can already:\n"
" Β· Browse the **Benchmark Leaderboard** tab β€” 22 benchmarks, 13,344 calls\n"
" Β· Read the [model card](https://huggingface.co/aigencydev/AIGENCY-V4)\n"
" Β· Inspect the [evaluation dataset](https://huggingface.co/datasets/aigencydev/aigency-v4-evaluation)\n"
" Β· Read the whitepaper (32 pages, EN/TR) on [GitHub](https://github.com/ecloud-bh/aigency-v4-whitepaper)\n\n"
"For production access: **info@e-cloud.web.tr Β· ai@aigency.dev**"
)
# ── Bench numbers (static, mirrored from the model card) ───────────
BENCH = [
("Belebele-TR (TR reading)", 0.8733, "Turkish Β· #1 globally"),
("ARC-Challenge", 0.9488, "Frontier-tied"),
("GSM8K", 0.9462, "Frontier-tied"),
("HellaSwag", 0.8860, "β‰ˆ6pp behind frontier"),
("MBPP", 0.8482, "Upper-mid frontier"),
("HumanEval", 0.8415, "Upper-mid frontier"),
("TQuAD (TR QA)", 0.8240, "Turkish Β· #1 globally"),
("IFEval (strict)", 0.8022, "Frontier lower-mid"),
("MMLU", 0.8010, "Frontier lower-mid"),
("HumanEval+", 0.7988, "Upper-mid frontier"),
("DocVQA (β‰₯0.5 ANLS)", 0.7917, "Multimodal (first-gen)"),
("TR Grammar", 0.7900, "Turkish Β· #1 globally"),
("MBPP+", 0.7804, "Upper-mid frontier"),
("TruthfulQA MC1", 0.7638, "Frontier-tied"),
("WinoGrande", 0.7466, "β‰ˆ11pp behind frontier"),
("XNLI-TR", 0.7340, "Turkish Β· #1 globally"),
("TR-MMLU", 0.7080, "Turkish Β· #1 globally"),
("ChartQA (relaxed)", 0.6768, "Multimodal (first-gen)"),
("MMMU (val)", 0.5333, "Multimodal (first-gen)"),
("MMLU-Pro", 0.5020, "Development area"),
("GPQA Diamond", 0.3788, "Development area"),
("MathVista (testmini)", 0.3413, "Multimodal (first-gen)"),
]
# ── Two-step API protocol (token in URL path) ─────────────────────
def new_chat(message: str) -> tuple[str, str]:
"""Open a chat. Returns (chat_id, first_response)."""
url = f"{API_BASE}/newChat/{ASSISTANT_ID}/{API_TOKEN}"
r = requests.post(url, json={"message": message}, timeout=TIMEOUT)
r.raise_for_status()
body = r.json()
return body.get("chat_id", ""), body.get("message", "")
def send_message(chat_id: str, message: str) -> str:
url = f"{API_BASE}/sendMessage/{API_TOKEN}"
files = {
"chat_id": (None, str(chat_id)),
"message": (None, message),
}
r = requests.post(url, files=files, timeout=TIMEOUT)
r.raise_for_status()
return r.json().get("message", "")
def send_with_image(chat_id: str, message: str, image: Image.Image) -> str:
buf = io.BytesIO()
image.save(buf, format="PNG")
buf.seek(0)
url = f"{API_BASE}/sendMessage/{API_TOKEN}"
files = {
"chat_id": (None, str(chat_id)),
"message": (None, message),
"attachements": ("image.png", buf.getvalue(), "image/png"), # canonical typo
}
r = requests.post(url, files=files, timeout=TIMEOUT)
r.raise_for_status()
return r.json().get("message", "")
# ── Gradio handler with session-scoped chat_id + rate limiting ────
def chat(prompt, image, history, chat_id_state, ts_log_state, count_state):
"""
ts_log_state: deque of recent timestamps (sliding 60s window) for per-minute rate
count_state: total messages in this session
"""
history = history or []
chat_id = chat_id_state or ""
ts_log = ts_log_state or deque()
count = count_state or 0
if not prompt.strip():
return history, "", chat_id, ts_log, count
# 1) Prompt length limit
if len(prompt) > MAX_PROMPT_CHARS:
history.append((prompt[:200] + " […]", (
f"⚠️ Prompt too long ({len(prompt)} chars). "
f"Max {MAX_PROMPT_CHARS} chars per message in this demo. "
f"For longer contexts, use the production API at aigency.dev."
)))
return history, "", chat_id, ts_log, count
# 2) Per-session total
if count >= MAX_PER_SESSION:
history.append((prompt, (
f"⚠️ Session limit reached ({MAX_PER_SESSION} messages). "
f"Refresh the page to start a new session, "
f"or contact info@e-cloud.web.tr Β· ai@aigency.dev for production access."
)))
return history, "", chat_id, ts_log, count
# 3) Per-minute rate (sliding window)
now = time.time()
while ts_log and now - ts_log[0] > 60:
ts_log.popleft()
if len(ts_log) >= RATE_PER_MIN_PER_SESSION:
wait = int(60 - (now - ts_log[0]))
history.append((prompt, (
f"⚠️ Slow down β€” max {RATE_PER_MIN_PER_SESSION} messages/minute. "
f"Try again in {wait}s."
)))
return history, "", chat_id, ts_log, count
# 4) Global daily cap
allowed, daily_count = _check_and_inc_daily()
if not allowed:
history.append((prompt, (
f"⚠️ The demo has reached today's global limit ({DAILY_CAP_GLOBAL} requests). "
f"It resets at 00:00 UTC. For uninterrupted access, contact "
f"info@e-cloud.web.tr Β· ai@aigency.dev."
)))
return history, "", chat_id, ts_log, count
if not API_TOKEN:
history.append((prompt, PLACEHOLDER_MSG))
return history, "", chat_id, ts_log, count
# 5) Actual API call
try:
if not chat_id:
if image is None:
cid, answer = new_chat(prompt)
chat_id = cid
else:
cid, _ = new_chat("Bir gΓΆrsel inceleyeceksin.")
chat_id = cid
answer = send_with_image(chat_id, prompt, image)
else:
if image is None:
answer = send_message(chat_id, prompt)
else:
answer = send_with_image(chat_id, prompt, image)
except Exception as e:
answer = f"Error: {e}"
ts_log.append(now)
history.append((prompt, answer))
return history, "", chat_id, ts_log, count + 1
def reset_session():
return [], "", "", deque(), 0
def make_leaderboard():
return [(name, f"{val:.2%}", note) for name, val, note in BENCH]
# ── UI ─────────────────────────────────────────────────────────────
TR_INTRO = """
### πŸ‡ΉπŸ‡· AIGENCY V4 Demo
**128 milyar parametreli yerli yapay zekΓ’.** TΓΌrkΓ§e okuma anlamada dΓΌnya
lideri, fen muhakemesi ve grade-school matematikte frontier seviyesinde.
KVKK-yerel, tam-bağımsız mimari.
Aşağıdaki sohbet kutusuna Türkçe veya İngilizce bir istem yazın; isteğe
bağlı olarak bir gârsel ekleyin. Sonuçlar canlı API'den gelir.
"""
EN_INTRO = """
### πŸ‡¬πŸ‡§ AIGENCY V4 Demo
**128B-parameter sovereign AI**, world-leader on Turkish reading
comprehension and frontier-level on grade-school math and scientific
reasoning. KVKK-resident, fully sovereign architecture.
Enter a Turkish or English prompt below, optionally attach an image.
Responses are served live by the production API.
"""
with gr.Blocks(title="AIGENCY V4 Demo", theme=gr.themes.Soft()) as demo:
gr.Markdown(f"# AIGENCY V4\n\n*{DEMO_BANNER}*")
chat_id_state = gr.State("")
ts_log_state = gr.State(lambda: deque())
count_state = gr.State(0)
with gr.Tab("Chat"):
with gr.Row():
with gr.Column(scale=1):
gr.Markdown(EN_INTRO)
with gr.Column(scale=1):
gr.Markdown(TR_INTRO)
chatbot = gr.Chatbot(height=420, label="Conversation", type="tuples")
with gr.Row():
with gr.Column(scale=4):
msg = gr.Textbox(
label="Prompt",
placeholder="Type a question in Turkish or English…",
lines=2,
)
with gr.Column(scale=1):
img = gr.Image(label="Optional image", type="pil", height=200)
with gr.Row():
send = gr.Button("Send", variant="primary")
clear = gr.Button("New conversation")
gr.Markdown(
f"*Demo limits: ≀ {MAX_PROMPT_CHARS} chars/message Β· "
f"{RATE_PER_MIN_PER_SESSION} msg/min Β· "
f"{MAX_PER_SESSION} msg/session Β· "
f"{DAILY_CAP_GLOBAL} requests/day globally. "
f"For unlimited production access: info@e-cloud.web.tr Β· ai@aigency.dev*"
)
chat_inputs = [msg, img, chatbot, chat_id_state, ts_log_state, count_state]
chat_outputs = [chatbot, msg, chat_id_state, ts_log_state, count_state]
send.click(chat, chat_inputs, chat_outputs)
msg.submit(chat, chat_inputs, chat_outputs)
clear.click(reset_session, [], [chatbot, msg, chat_id_state, ts_log_state, count_state])
with gr.Tab("Benchmark Leaderboard"):
gr.Markdown(
"## 22 benchmarks Β· 13,344 real API calls Β· Wilson 95% CI\n"
"Methodology: temperature 0.0, deterministic seed=42, single "
"session 27 April 2026.\n\n"
"*See [the dataset](https://huggingface.co/datasets/aigencydev/aigency-v4-evaluation) "
"for the full reproducibility capsule.*"
)
gr.Dataframe(
headers=["Benchmark", "AIGENCY V4", "Position vs frontier"],
value=make_leaderboard(),
interactive=False,
wrap=True,
)
with gr.Tab("About"):
gr.Markdown("""
### About AIGENCY V4
AIGENCY V4 is the multimodal successor to AIGENCY V3, developed by
**eCloud YazΔ±lΔ±m Teknolojileri**. Released to production in Q2 2026.
- **Architecture**: 120B sovereign decoder transformer + 8B vision encoder
- **Context**: 278K tokens (HBM 3-tier with TG-Decay)
- **Languages**: Turkish (primary), English
- **Licence**: API-only commercial β€” see https://aigency.dev/license
**Resources**
- πŸ“„ Whitepaper (EN): https://github.com/ecloud-bh/aigency-v4-whitepaper/blob/main/AIGENCY-V4-Whitepaper-EN.pdf
- πŸ“„ Whitepaper (TR): https://github.com/ecloud-bh/aigency-v4-whitepaper/blob/main/AIGENCY-V4-Whitepaper-TR.pdf
- πŸ€— Model card: https://huggingface.co/aigencydev/AIGENCY-V4
- πŸ“Š Evaluation dataset: https://huggingface.co/datasets/aigencydev/aigency-v4-evaluation
- πŸ”§ Benchmark code: https://github.com/ecloud-bh/aigency-benchmarks
- πŸ”— Production API: https://aigency.dev
**Contact**
info@e-cloud.web.tr Β· ai@aigency.dev Β· Β© 2026 eCloud YazΔ±lΔ±m Teknolojileri
""")
if __name__ == "__main__":
demo.queue(max_size=8).launch()