Spaces:

aigencydev
/

AIGENCY-V4-Demo

Sleeping

App Files Files Community

AIGENCY-V4-Demo / app.py

aigencydev

Add abuse mitigation: 10 msg/min, 50 msg/session, 2000 char limit, 5000 daily global cap

b530bea verified 16 days ago

raw

history blame contribute delete

13.3 kB

	"""AIGENCY V4 — Interactive demo Space.

	This Gradio app proxies user prompts to the eCloud production AIGENCY V4 API.

	Setup (when deploying to HuggingFace Spaces):
	1. Add `AIGENCY_API_TOKEN` as a Space secret (Space settings → Variables and secrets).
	2. Optionally add `AIGENCY_API_BASE` (default https://aigency.dev/api/v2)
	`AIGENCY_ASSISTANT_ID` (default 277)
	`AIGENCY_ASSISTANT_SLUG` (default alparslan-v4).

	The Space supports text and image-with-text input (one image per request,
	≤ 30 MB, image/* MIME).
	"""
	from __future__ import annotations

	import io
	import os
	import time
	import threading
	from collections import deque
	from datetime import datetime, timezone

	import gradio as gr
	import requests
	from PIL import Image

	# ── Config ─────────────────────────────────────────────────────────
	API_BASE = os.environ.get("AIGENCY_API_BASE", "https://aigency.dev/api/v2")
	API_TOKEN = os.environ.get("AIGENCY_API_TOKEN", "") # set via Space secret
	ASSISTANT_ID = int(os.environ.get("AIGENCY_ASSISTANT_ID", "277"))
	ASSISTANT_SLUG = os.environ.get("AIGENCY_ASSISTANT_SLUG", "alparslan-v4")
	TIMEOUT = 60 # seconds
	DEMO_BANNER = (
	"AIGENCY V4 — sovereign, multimodal, Turkish-first AI · "
	"128B parameters · 278K context · KVKK-resident"
	)

	# ── Abuse mitigation knobs ─────────────────────────────────────────
	RATE_PER_MIN_PER_SESSION = int(os.environ.get("RATE_PER_MIN", "10"))
	MAX_PER_SESSION = int(os.environ.get("MAX_PER_SESSION", "50"))
	MAX_PROMPT_CHARS = int(os.environ.get("MAX_PROMPT_CHARS", "2000"))
	DAILY_CAP_GLOBAL = int(os.environ.get("DAILY_CAP", "5000"))

	# ── Global daily counter (thread-safe) ─────────────────────────────
	_global_lock = threading.Lock()
	_global_counter = {"date": "", "count": 0}


	def _utc_today() -> str:
	return datetime.now(timezone.utc).strftime("%Y-%m-%d")


	def _check_and_inc_daily() -> tuple[bool, int]:
	"""Increment the global daily counter; return (allowed, current_count)."""
	today = _utc_today()
	with _global_lock:
	if _global_counter["date"] != today:
	_global_counter["date"] = today
	_global_counter["count"] = 0
	if _global_counter["count"] >= DAILY_CAP_GLOBAL:
	return False, _global_counter["count"]
	_global_counter["count"] += 1
	return True, _global_counter["count"]

	PLACEHOLDER_MSG = (
	"🔒 The interactive chat is being activated.\n\n"
	"While the demo is finalised, you can already:\n"
	" · Browse the Benchmark Leaderboard tab — 22 benchmarks, 13,344 calls\n"
	" · Read the [model card](https://huggingface.co/aigencydev/AIGENCY-V4)\n"
	" · Inspect the [evaluation dataset](https://huggingface.co/datasets/aigencydev/aigency-v4-evaluation)\n"
	" · Read the whitepaper (32 pages, EN/TR) on [GitHub](https://github.com/ecloud-bh/aigency-v4-whitepaper)\n\n"
	"For production access: info@e-cloud.web.tr · ai@aigency.dev"
	)


	# ── Bench numbers (static, mirrored from the model card) ───────────
	BENCH = [
	("Belebele-TR (TR reading)", 0.8733, "Turkish · #1 globally"),
	("ARC-Challenge", 0.9488, "Frontier-tied"),
	("GSM8K", 0.9462, "Frontier-tied"),
	("HellaSwag", 0.8860, "≈6pp behind frontier"),
	("MBPP", 0.8482, "Upper-mid frontier"),
	("HumanEval", 0.8415, "Upper-mid frontier"),
	("TQuAD (TR QA)", 0.8240, "Turkish · #1 globally"),
	("IFEval (strict)", 0.8022, "Frontier lower-mid"),
	("MMLU", 0.8010, "Frontier lower-mid"),
	("HumanEval+", 0.7988, "Upper-mid frontier"),
	("DocVQA (≥0.5 ANLS)", 0.7917, "Multimodal (first-gen)"),
	("TR Grammar", 0.7900, "Turkish · #1 globally"),
	("MBPP+", 0.7804, "Upper-mid frontier"),
	("TruthfulQA MC1", 0.7638, "Frontier-tied"),
	("WinoGrande", 0.7466, "≈11pp behind frontier"),
	("XNLI-TR", 0.7340, "Turkish · #1 globally"),
	("TR-MMLU", 0.7080, "Turkish · #1 globally"),
	("ChartQA (relaxed)", 0.6768, "Multimodal (first-gen)"),
	("MMMU (val)", 0.5333, "Multimodal (first-gen)"),
	("MMLU-Pro", 0.5020, "Development area"),
	("GPQA Diamond", 0.3788, "Development area"),
	("MathVista (testmini)", 0.3413, "Multimodal (first-gen)"),
	]


	# ── Two-step API protocol (token in URL path) ─────────────────────
	def new_chat(message: str) -> tuple[str, str]:
	"""Open a chat. Returns (chat_id, first_response)."""
	url = f"{API_BASE}/newChat/{ASSISTANT_ID}/{API_TOKEN}"
	r = requests.post(url, json={"message": message}, timeout=TIMEOUT)
	r.raise_for_status()
	body = r.json()
	return body.get("chat_id", ""), body.get("message", "")


	def send_message(chat_id: str, message: str) -> str:
	url = f"{API_BASE}/sendMessage/{API_TOKEN}"
	files = {
	"chat_id": (None, str(chat_id)),
	"message": (None, message),
	}
	r = requests.post(url, files=files, timeout=TIMEOUT)
	r.raise_for_status()
	return r.json().get("message", "")


	def send_with_image(chat_id: str, message: str, image: Image.Image) -> str:
	buf = io.BytesIO()
	image.save(buf, format="PNG")
	buf.seek(0)
	url = f"{API_BASE}/sendMessage/{API_TOKEN}"
	files = {
	"chat_id": (None, str(chat_id)),
	"message": (None, message),
	"attachements": ("image.png", buf.getvalue(), "image/png"), # canonical typo
	}
	r = requests.post(url, files=files, timeout=TIMEOUT)
	r.raise_for_status()
	return r.json().get("message", "")


	# ── Gradio handler with session-scoped chat_id + rate limiting ────
	def chat(prompt, image, history, chat_id_state, ts_log_state, count_state):
	"""
	ts_log_state: deque of recent timestamps (sliding 60s window) for per-minute rate
	count_state: total messages in this session
	"""
	history = history or []
	chat_id = chat_id_state or ""
	ts_log = ts_log_state or deque()
	count = count_state or 0

	if not prompt.strip():
	return history, "", chat_id, ts_log, count

	# 1) Prompt length limit
	if len(prompt) > MAX_PROMPT_CHARS:
	history.append((prompt[:200] + " […]", (
	f"⚠️ Prompt too long ({len(prompt)} chars). "
	f"Max {MAX_PROMPT_CHARS} chars per message in this demo. "
	f"For longer contexts, use the production API at aigency.dev."
	)))
	return history, "", chat_id, ts_log, count

	# 2) Per-session total
	if count >= MAX_PER_SESSION:
	history.append((prompt, (
	f"⚠️ Session limit reached ({MAX_PER_SESSION} messages). "
	f"Refresh the page to start a new session, "
	f"or contact info@e-cloud.web.tr · ai@aigency.dev for production access."
	)))
	return history, "", chat_id, ts_log, count

	# 3) Per-minute rate (sliding window)
	now = time.time()
	while ts_log and now - ts_log[0] > 60:
	ts_log.popleft()
	if len(ts_log) >= RATE_PER_MIN_PER_SESSION:
	wait = int(60 - (now - ts_log[0]))
	history.append((prompt, (
	f"⚠️ Slow down — max {RATE_PER_MIN_PER_SESSION} messages/minute. "
	f"Try again in {wait}s."
	)))
	return history, "", chat_id, ts_log, count

	# 4) Global daily cap
	allowed, daily_count = _check_and_inc_daily()
	if not allowed:
	history.append((prompt, (
	f"⚠️ The demo has reached today's global limit ({DAILY_CAP_GLOBAL} requests). "
	f"It resets at 00:00 UTC. For uninterrupted access, contact "
	f"info@e-cloud.web.tr · ai@aigency.dev."
	)))
	return history, "", chat_id, ts_log, count

	if not API_TOKEN:
	history.append((prompt, PLACEHOLDER_MSG))
	return history, "", chat_id, ts_log, count

	# 5) Actual API call
	try:
	if not chat_id:
	if image is None:
	cid, answer = new_chat(prompt)
	chat_id = cid
	else:
	cid, _ = new_chat("Bir görsel inceleyeceksin.")
	chat_id = cid
	answer = send_with_image(chat_id, prompt, image)
	else:
	if image is None:
	answer = send_message(chat_id, prompt)
	else:
	answer = send_with_image(chat_id, prompt, image)
	except Exception as e:
	answer = f"Error: {e}"

	ts_log.append(now)
	history.append((prompt, answer))
	return history, "", chat_id, ts_log, count + 1


	def reset_session():
	return [], "", "", deque(), 0


	def make_leaderboard():
	return [(name, f"{val:.2%}", note) for name, val, note in BENCH]


	# ── UI ─────────────────────────────────────────────────────────────
	TR_INTRO = """
	### 🇹🇷 AIGENCY V4 Demo

	128 milyar parametreli yerli yapay zekâ. Türkçe okuma anlamada dünya
	lideri, fen muhakemesi ve grade-school matematikte frontier seviyesinde.
	KVKK-yerel, tam-bağımsız mimari.

	Aşağıdaki sohbet kutusuna Türkçe veya İngilizce bir istem yazın; isteğe
	bağlı olarak bir görsel ekleyin. Sonuçlar canlı API'den gelir.
	"""

	EN_INTRO = """
	### 🇬🇧 AIGENCY V4 Demo

	128B-parameter sovereign AI, world-leader on Turkish reading
	comprehension and frontier-level on grade-school math and scientific
	reasoning. KVKK-resident, fully sovereign architecture.

	Enter a Turkish or English prompt below, optionally attach an image.
	Responses are served live by the production API.
	"""

	with gr.Blocks(title="AIGENCY V4 Demo", theme=gr.themes.Soft()) as demo:
	gr.Markdown(f"# AIGENCY V4\n\n{DEMO_BANNER}")
	chat_id_state = gr.State("")
	ts_log_state = gr.State(lambda: deque())
	count_state = gr.State(0)

	with gr.Tab("Chat"):
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown(EN_INTRO)
	with gr.Column(scale=1):
	gr.Markdown(TR_INTRO)
	chatbot = gr.Chatbot(height=420, label="Conversation", type="tuples")
	with gr.Row():
	with gr.Column(scale=4):
	msg = gr.Textbox(
	label="Prompt",
	placeholder="Type a question in Turkish or English…",
	lines=2,
	)
	with gr.Column(scale=1):
	img = gr.Image(label="Optional image", type="pil", height=200)
	with gr.Row():
	send = gr.Button("Send", variant="primary")
	clear = gr.Button("New conversation")
	gr.Markdown(
	f"*Demo limits: ≤ {MAX_PROMPT_CHARS} chars/message · "
	f"{RATE_PER_MIN_PER_SESSION} msg/min · "
	f"{MAX_PER_SESSION} msg/session · "
	f"{DAILY_CAP_GLOBAL} requests/day globally. "
	f"For unlimited production access: info@e-cloud.web.tr · ai@aigency.dev*"
	)
	chat_inputs = [msg, img, chatbot, chat_id_state, ts_log_state, count_state]
	chat_outputs = [chatbot, msg, chat_id_state, ts_log_state, count_state]
	send.click(chat, chat_inputs, chat_outputs)
	msg.submit(chat, chat_inputs, chat_outputs)
	clear.click(reset_session, [], [chatbot, msg, chat_id_state, ts_log_state, count_state])

	with gr.Tab("Benchmark Leaderboard"):
	gr.Markdown(
	"## 22 benchmarks · 13,344 real API calls · Wilson 95% CI\n"
	"Methodology: temperature 0.0, deterministic seed=42, single "
	"session 27 April 2026.\n\n"
	"*See [the dataset](https://huggingface.co/datasets/aigencydev/aigency-v4-evaluation) "
	"for the full reproducibility capsule.*"
	)
	gr.Dataframe(
	headers=["Benchmark", "AIGENCY V4", "Position vs frontier"],
	value=make_leaderboard(),
	interactive=False,
	wrap=True,
	)

	with gr.Tab("About"):
	gr.Markdown("""
	### About AIGENCY V4

	AIGENCY V4 is the multimodal successor to AIGENCY V3, developed by
	eCloud Yazılım Teknolojileri. Released to production in Q2 2026.

	- Architecture: 120B sovereign decoder transformer + 8B vision encoder
	- Context: 278K tokens (HBM 3-tier with TG-Decay)
	- Languages: Turkish (primary), English
	- Licence: API-only commercial — see https://aigency.dev/license

	Resources

	- 📄 Whitepaper (EN): https://github.com/ecloud-bh/aigency-v4-whitepaper/blob/main/AIGENCY-V4-Whitepaper-EN.pdf
	- 📄 Whitepaper (TR): https://github.com/ecloud-bh/aigency-v4-whitepaper/blob/main/AIGENCY-V4-Whitepaper-TR.pdf
	- 🤗 Model card: https://huggingface.co/aigencydev/AIGENCY-V4
	- 📊 Evaluation dataset: https://huggingface.co/datasets/aigencydev/aigency-v4-evaluation
	- 🔧 Benchmark code: https://github.com/ecloud-bh/aigency-benchmarks
	- 🔗 Production API: https://aigency.dev

	Contact

	info@e-cloud.web.tr · ai@aigency.dev · © 2026 eCloud Yazılım Teknolojileri
	""")

	if __name__ == "__main__":
	demo.queue(max_size=8).launch()