ModuleMind / app.py
Quazim0t0's picture
Update app.py
281ec36 verified
Raw
History Blame Contribute Delete
30.8 kB
"""
app.py -- Modular Mind: Boss Fight (HuggingFace Space entry point).
A 2D Dark-Souls-style duel. The boss (Demon Slime) is driven by a tiny Modular
Mind: six specialist networks emit latents that a RecursiveLink merges into one
shared latent, and a coordinator reads it to pick the boss's next move. The brain
was trained by self-play reinforcement learning (see train.py / duel_sim.py).
The browser renders the fight at 60fps; at each decision point it calls the Python
brain through this app's /decide endpoint and shows the Modular Mind deciding live.
"""
import json
import os
import sys
from urllib.parse import quote
import gradio as gr
import modular_mind
import online
# the MoE-experts experiment lives in ./agents (added to the bottom of the page)
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "agents"))
try:
from panel import build_moe_panel
except Exception as _e: # agents optional -> game still runs without it
build_moe_panel = None
print(f"[app] MoE experiment panel unavailable ({_e})")
HERE = os.path.dirname(os.path.abspath(__file__))
# the self-playing piano (a Modular Mind trained on a song) lives in ./piano
sys.path.insert(0, os.path.join(HERE, "piano"))
_PIANO = {"player": None, "tried": False}
def _get_piano():
if not _PIANO["tried"]:
_PIANO["tried"] = True
try:
from poly_mind import PolyPlayer
_PIANO["player"] = PolyPlayer()
except Exception as e:
print(f"[app] piano Modular Mind unavailable ({e})")
return _PIANO["player"]
try:
_pmeta = json.load(open(os.path.join(HERE, "piano", "poly_notes.json")))
PIANO_LO, PIANO_HI, PIANO_FPS = _pmeta["midi_lo"], _pmeta["midi_hi"], _pmeta.get("fps", 8)
except Exception:
PIANO_LO, PIANO_HI, PIANO_FPS = 56, 86, 8
# the performance is restyled live into A minor with the bass lifted away (see
# piano/poly_mind.py stylize_midi), so the on-screen keyboard starts at middle C
PIANO_LO = max(PIANO_LO, 60)
_get_piano() # warm the piano Modular Mind at app startup (so the first play is instant)
def _read(path):
with open(path, "r", encoding="utf-8") as f:
return f.read()
CSS = _read(os.path.join(HERE, "web", "game.css"))
GAME_JS = _read(os.path.join(HERE, "web", "game.js"))
ASSETS_JS = _read(os.path.join(HERE, "assets_data.js"))
INDEX_HTML = _read(os.path.join(HERE, "web", "index.html"))
# music/sfx are served as static files by Gradio (allowed_paths below); the game
# builds audio URLs from this base.
AUDIO_DIR = os.path.join(HERE, "audio")
# URL-encode the absolute path (it may contain spaces) but keep "/" and the drive ":"
AUDIO_BASE_URL = (
"/gradio_api/file=" + quote(AUDIO_DIR.replace(os.sep, "/"), safe="/:") + "/"
)
# real acoustic-grand-piano note samples (served static; the piano plays the nearest
# sample pitch-shifted to each note, for a real piano sound instead of an oscillator).
PIANO_SAMPLES_DIR = os.path.join(HERE, "piano", "samples")
try:
PIANO_SAMPLE_MIDIS = sorted(int(f[:-4]) for f in os.listdir(PIANO_SAMPLES_DIR) if f.endswith(".mp3"))
except Exception:
PIANO_SAMPLE_MIDIS = []
PIANO_SAMPLE_BASE = "/gradio_api/file=" + quote(PIANO_SAMPLES_DIR.replace(os.sep, "/"), safe="/:") + "/"
# warm the default brain
modular_mind.get_mind("hard")
def decide(state_json: str) -> str:
"""Called by the browser at each boss decision point. In: game-state JSON
(includes a "difficulty" tier). Out: chosen action + telemetry, as JSON."""
try:
state = json.loads(state_json)
except Exception:
state = {}
return json.dumps(modular_mind.decide(state))
def learn(traj_json: str) -> str:
"""Called by the browser at the end of a fight with the full decision trajectory
+ outcome. Buffers it and periodically finetunes the HARD brain (REINFORCE)."""
try:
traj = json.loads(traj_json)
except Exception:
return json.dumps({"error": "bad json"})
return json.dumps(online.record_fight(traj))
def piano(payload_json: str) -> str:
"""Called by the browser's self-playing piano: in = {history:[tokens], n}, out =
{notes:[midi...], history:[...]}. The Modular Mind autoregressively generates the
next `n` notes from the recent history (server-side; history kept client-side)."""
try:
req = json.loads(payload_json)
except Exception:
req = {}
player = _get_piano()
hist = list(req.get("history") or [])
n = max(1, min(64, int(req.get("n", 32))))
if player is None:
return json.dumps({"notes": [], "history": hist, "error": "piano unavailable"})
if not hist:
hist = [list(f) for f in player.seed]
frames, telem = [], []
for _ in range(n):
toks, midis, tl = player.next_frame(hist)
hist.append(toks); frames.append([int(x) for x in midis]); telem.append(tl)
return json.dumps({"frames": frames, "telem": telem,
"history": [list(map(int, f)) for f in hist[-player.K:]]})
# Bootstrap (runs in the browser): wire window.MM_DECIDE to this app's /decide
# endpoint via Gradio's REST API (no external CDN), then boot the game once the
# gr.HTML canvas is in the DOM.
BOOTSTRAP_JS = """
(function () {
// route each boss decision to the Python Modular Mind through /gradio_api/call
window.MM_DECIDE = async (state) => {
const post = await fetch('/gradio_api/call/decide', {
method: 'POST', headers: {'Content-Type': 'application/json'},
body: JSON.stringify({data: [JSON.stringify(state)]}),
});
const j = await post.json();
const res = await fetch('/gradio_api/call/decide/' + j.event_id);
const text = await res.text();
const line = text.split('\\n').filter(l => l.startsWith('data:')).pop();
const arr = JSON.parse(line.slice(5).trim());
return JSON.parse(arr[0]);
};
// send a finished fight's trajectory to the online learner (fire-and-forget)
window.MM_LEARN = async (traj) => {
try {
const post = await fetch('/gradio_api/call/learn', {
method: 'POST', headers: {'Content-Type': 'application/json'},
body: JSON.stringify({data: [JSON.stringify(traj)]}),
});
const j = await post.json();
await fetch('/gradio_api/call/learn/' + j.event_id);
} catch (e) { /* learning is best-effort */ }
};
const tryBoot = () => {
if (document.getElementById('mm-canvas') && window.__mmBoot) window.__mmBoot();
else setTimeout(tryBoot, 80);
};
if (document.readyState === 'loading') document.addEventListener('DOMContentLoaded', tryBoot);
else tryBoot();
})();
"""
# Force Gradio dark mode (matches the dark game) regardless of the visitor's browser
# setting, by ensuring the ?__theme=dark URL param is present before the app renders.
FORCE_DARK_JS = """
(function () {
try {
var p = new URLSearchParams(window.location.search);
if (p.get('__theme') !== 'dark') {
p.set('__theme', 'dark');
window.location.replace(window.location.pathname + '?' + p.toString() + window.location.hash);
}
} catch (e) {}
})();
"""
# ---- self-playing piano (a Modular Mind trained on the song) -----------------
PIANO_CSS = """
#mm-piano-wrap{max-width:920px;margin:6px auto 2px;font-family:system-ui,sans-serif}
#mm-piano-stage{position:relative;background:radial-gradient(ellipse at 50% 110%,#1b1430 0%,#0c0c14 62%,#08080e 100%);
border:1px solid #2a2a35;border-radius:10px;padding:0 8px 12px;overflow-x:auto;overflow-y:hidden}
#mm-piano-roll{display:block;width:100%;height:150px}
#mm-piano{display:flex;align-items:flex-end;justify-content:center;gap:2px;height:112px}
.pk{box-sizing:border-box;border:1px solid #05050a;border-radius:0 0 4px 4px;flex:0 0 auto;
transition:background .07s ease,box-shadow .07s ease}
.pk.white{width:20px;height:100px;background:linear-gradient(180deg,#f4f4f8 0%,#d6d6e0 88%,#b9b9c6 100%)}
.pk.black{width:14px;height:62px;background:linear-gradient(180deg,#3a3a46 0%,#1b1b23 100%)}
.pk.on{transform:translateY(1px)}
#mm-piano-ctrl{display:flex;gap:14px;align-items:center;justify-content:center;margin:8px auto 4px}
#mm-piano-btn{cursor:pointer;background:#2a9d6a;color:#fff;border:none;border-radius:6px;
padding:9px 18px;font-weight:700;font-size:14px}
#mm-piano-btn:hover{background:#33b87c}
#mm-piano-note{color:#9bd;font-size:13px;min-width:96px;text-align:left}
#mm-piano-specs{display:flex;gap:8px;justify-content:center;flex-wrap:wrap;margin:8px auto 2px;max-width:800px}
.psp{width:110px;background:#16161e;border:1px solid #2a2a35;border-radius:6px;padding:6px 9px}
.psp .nm{font-weight:700;font-size:12px}
.psp .ow{opacity:.55;font-size:10px;color:#aaa;margin-top:1px}
.psp .bar{height:7px;background:#2a2a33;border-radius:4px;margin-top:6px;overflow:hidden}
.psp .fill{height:100%;width:4%;border-radius:4px;transition:width .12s ease}
#mm-piano-lbl{text-align:center;color:#888;font-size:11px;margin-top:8px}
#mm-piano-latent{display:flex;gap:3px;justify-content:center;align-items:flex-end;height:22px;margin:5px auto}
#mm-piano-latent .lc{width:8px;height:3px;border-radius:2px;background:#3a3a48;transition:height .12s ease}
"""
PIANO_GLOBALS = (f"window.MM_PIANO_LO={PIANO_LO};window.MM_PIANO_HI={PIANO_HI};"
f"window.MM_PIANO_FPS={PIANO_FPS};"
f"window.MM_PIANO_SAMPLE_BASE={json.dumps(PIANO_SAMPLE_BASE)};"
f"window.MM_PIANO_SAMPLE_MIDIS={json.dumps(PIANO_SAMPLE_MIDIS)};")
PIANO_JS = r"""
(function(){
var SPC={Bass:'#4da6ff',Tenor:'#2ecc71',Soprano:'#ff6b9d',Sustain:'#1abc9c',Rest:'#95a5a6',Onset:'#e67e22',Phrase:'#9b59b6'};
window.__pianoBoot = function(){
var wrap=document.getElementById('mm-piano');
if(!wrap || wrap.dataset.built) return; wrap.dataset.built='1';
var LO=window.MM_PIANO_LO||56, HI=window.MM_PIANO_HI||86, BLACK={1:1,3:1,6:1,8:1,10:1};
for(var m=LO;m<=HI;m++){var k=document.createElement('div');
k.className='pk '+(BLACK[m%12]?'black':'white'); k.id='pk-'+m; wrap.appendChild(k);}
var audio=null, playing=false, queue=[], history=[], fetching=false, timer=null, voices={};
var specFills={}, built=false, buffers={}, loaded=false;
var PLAY_MS=Math.round(1000/(window.MM_PIANO_FPS||8))+58; // a touch slower = calmer feel
var noteEl=document.getElementById('mm-piano-note'), btn=document.getElementById('mm-piano-btn');
var specBox=document.getElementById('mm-piano-specs'), latBox=document.getElementById('mm-piano-latent');
var NN=['C','C#','D','D#','E','F','F#','G','G#','A','A#','B'];
function name(m){return NN[m%12]+(Math.floor(m/12)-1);}
// ---- light show: glowing note trails rise off the keys while they sound ----
var roll=document.getElementById('mm-piano-roll');
var rctx=roll?roll.getContext('2d'):null, trails=[], sparks=[], keyTrail={}, rafOn=false;
function hue(m){return ((m%12)*30+200)%360;} // pitch class -> color wheel
function ensureRaf(){ if(rctx && !rafOn){ rafOn=true; requestAnimationFrame(draw); } }
function draw(){
var w=roll.clientWidth, hgt=roll.clientHeight, now=performance.now(), v=0.05;
if(roll.width!==w) roll.width=w; if(roll.height!==hgt) roll.height=hgt;
rctx.clearRect(0,0,w,hgt);
for(var i=trails.length-1;i>=0;i--){ var tr=trails[i];
var top=hgt-(now-tr.t0)*v, bot=hgt-(tr.t1?(now-tr.t1)*v:0);
if(bot<-30){ trails.splice(i,1); continue; }
top=Math.max(top,-30);
var g=rctx.createLinearGradient(0,top,0,bot);
g.addColorStop(0,'hsla('+tr.h+',85%,62%,0)');
g.addColorStop(1,'hsla('+tr.h+',85%,62%,0.9)');
rctx.shadowColor='hsl('+tr.h+',85%,60%)'; rctx.shadowBlur=10;
rctx.fillStyle=g; rctx.fillRect(tr.x,top,tr.w,Math.max(2,bot-top));
}
rctx.shadowBlur=0;
for(var j=sparks.length-1;j>=0;j--){ var s=sparks[j], a=1-(now-s.t0)/650;
if(a<=0){ sparks.splice(j,1); continue; }
s.x+=s.vx; s.y+=s.vy;
rctx.fillStyle='hsla('+s.h+',95%,72%,'+a.toFixed(2)+')';
rctx.fillRect(s.x,s.y,2.2,2.2);
}
if(!playing && !trails.length && !sparks.length){ rafOn=false; rctx.clearRect(0,0,w,hgt); return; }
requestAnimationFrame(draw);
}
function strikeFx(m,el){
if(!rctx||!el) return;
var x=el.offsetLeft-roll.offsetLeft, wd=el.offsetWidth, hh=hue(m), now=performance.now();
var tr={x:x,w:wd,h:hh,t0:now,t1:null};
trails.push(tr); keyTrail[m]=tr;
for(var i=0;i<6;i++) sparks.push({x:x+wd/2,y:roll.clientHeight-3,
vx:(Math.random()-0.5)*1.6, vy:-(0.6+Math.random()*1.4), h:hh, t0:now});
ensureRaf();
}
function endTrail(m){ if(keyTrail[m]){ keyTrail[m].t1=performance.now(); delete keyTrail[m]; } }
function lightKey(m,on){
var el=document.getElementById('pk-'+m); if(!el) return el;
if(on){ el.classList.add('on'); var hh=hue(m);
el.style.background='hsl('+hh+',82%,'+(BLACK[m%12]?'46%':'68%')+')';
el.style.boxShadow='0 0 18px hsl('+hh+',85%,60%)';
} else { el.classList.remove('on'); el.style.background=''; el.style.boxShadow=''; }
return el;
}
function buildSpecs(telem){
if(built || !specBox || !telem) return; built=true;
telem.spec.forEach(function(s){
var c=SPC[s.name]||'#888', card=document.createElement('div'); card.className='psp';
card.innerHTML='<div class="nm" style="color:'+c+'">'+s.name+'</div>'+
'<div class="ow">'+(s.owns?('owns '+s.owns):'modulator')+'</div>'+
'<div class="bar"><div class="fill" style="background:'+c+'"></div></div>';
specBox.appendChild(card); specFills[s.name]=card.querySelector('.fill');
});
if(latBox){ for(var i=0;i<8;i++){var lc=document.createElement('div'); lc.className='lc'; latBox.appendChild(lc);} }
}
function updateSpecs(telem){
if(!telem) return; buildSpecs(telem);
telem.spec.forEach(function(s){
var h;
if(s.owns!=null && s.drive!=null){ h=Math.abs(s.drive)/4.0*100; } // owners: by drive
else { h=(s.act-16.0)/10.0*100; } // modulators: by latent pulse
h=Math.max(4,Math.min(100,h));
if(specFills[s.name]) specFills[s.name].style.width=h+'%';
});
if(latBox && telem.shared){ var lc=latBox.children;
for(var i=0;i<lc.length && i<telem.shared.length;i++){
lc[i].style.height=Math.max(2,Math.min(20,Math.abs(telem.shared[i])*9))+'px';
lc[i].style.background=telem.shared[i]>=0?'#5bbcdf':'#df7a5b';
} }
}
async function fetchPhrase(){
if(fetching) return; fetching=true;
try{
var post=await fetch('/gradio_api/call/piano',{method:'POST',
headers:{'Content-Type':'application/json'},
body:JSON.stringify({data:[JSON.stringify({history:history,n:32})]})});
var j=await post.json();
var res=await fetch('/gradio_api/call/piano/'+j.event_id);
var text=await res.text();
var line=text.split('\n').filter(function(l){return l.indexOf('data:')===0;}).pop();
var out=JSON.parse(JSON.parse(line.slice(5).trim())[0]);
history=out.history||history;
var fr=out.frames||[]; for(var i=0;i<fr.length;i++) queue.push({f:fr[i], t:(out.telem&&out.telem[i])||null});
}catch(e){}
fetching=false;
}
function loadSamples(){
if(loaded || !audio) return; loaded=true; // background load; play() upgrades to samples as they arrive
var ms=window.MM_PIANO_SAMPLE_MIDIS||[], base=window.MM_PIANO_SAMPLE_BASE||'';
ms.forEach(function(sm){
var ctl=('AbortController' in window)?new AbortController():null;
var to=ctl?setTimeout(function(){ctl.abort();},8000):0;
fetch(base+sm+'.mp3', ctl?{signal:ctl.signal}:{}).then(function(r){return r.arrayBuffer();})
.then(function(ab){audio.decodeAudioData(ab,function(buf){buffers[sm]=buf;},function(){});})
.catch(function(){}).finally(function(){if(to)clearTimeout(to);});
});
}
function nearest(m){ var ks=Object.keys(buffers); if(!ks.length) return null;
return ks.map(Number).reduce(function(a,b){return Math.abs(b-m)<Math.abs(a-m)?b:a;}); }
function voice(m, vol){ // real sample if it's loaded, else an oscillator -> ALWAYS audible
if(!audio) return null;
var sm=nearest(m), t=audio.currentTime;
if(sm!=null && buffers[sm]){
var src=audio.createBufferSource(); src.buffer=buffers[sm];
src.playbackRate.value=Math.pow(2,(m-sm)/12);
var g=audio.createGain(); g.gain.value=vol;
src.connect(g); g.connect(audio.destination); src.start(t);
return {src:src, gain:g};
}
var f=440*Math.pow(2,(m-69)/12);
var o1=audio.createOscillator(); o1.type='triangle'; o1.frequency.value=f;
var o2=audio.createOscillator(); o2.type='sine'; o2.frequency.value=f*2;
var g2=audio.createGain(); g2.gain.value=0.18;
var lp=audio.createBiquadFilter(); lp.type='lowpass'; lp.frequency.value=2600;
var g=audio.createGain();
g.gain.setValueAtTime(0.0001,t); g.gain.exponentialRampToValueAtTime(vol,t+0.014);
g.gain.exponentialRampToValueAtTime(Math.max(0.0001,vol*0.3),t+1.6);
o1.connect(lp); o2.connect(g2); g2.connect(lp); lp.connect(g); g.connect(audio.destination);
o1.start(t); o2.start(t);
return {oscs:[o1,o2], gain:g};
}
function releaseNode(nd){
if(!nd || !audio) return; var t=audio.currentTime;
try{ nd.gain.gain.cancelScheduledValues(t);
nd.gain.gain.setValueAtTime(Math.max(nd.gain.gain.value,0.0001),t);
nd.gain.gain.linearRampToValueAtTime(0.0001,t+0.10);
if(nd.src) nd.src.stop(t+0.13); if(nd.oscs) nd.oscs.forEach(function(o){o.stop(t+0.13);});
}catch(e){}
}
function releaseAll(){
for(var mk in voices){ releaseNode(voices[mk]); lightKey(+mk,false); endTrail(+mk); }
voices={};
}
function playFrame(midis){ // polyphony: strike new notes, hold sustained ones, release dropped ones
var nw={}; (midis||[]).forEach(function(m){ if(m>0) nw[m]=1; });
for(var mk in voices){ if(!nw[mk]){ releaseNode(voices[mk]);
lightKey(+mk,false); endTrail(+mk); delete voices[mk]; } }
var on=Object.keys(nw), vol=on.length>2?0.5:0.65;
on.forEach(function(ms){ var m=+ms; if(!voices[m]){ var v=voice(m,vol); if(v) voices[m]=v;
strikeFx(m, lightKey(m,true)); } });
if(noteEl){ noteEl.textContent= on.length ? ('β™ͺ '+on.map(function(ms){return name(+ms);}).join(' ')) : 'β™ͺ (rest)'; }
}
function tick(){ if(!playing) return; if(queue.length<10 && !fetching) fetchPhrase();
if(queue.length>0){ var it=queue.shift(); playFrame(it.f); updateSpecs(it.t); } }
function start(){
if(!audio) audio=new (window.AudioContext||window.webkitAudioContext)();
if(audio.state==='suspended'){ try{audio.resume();}catch(e){} }
loadSamples(); // real piano loads in background; oscillator plays until then
playing=true; btn.textContent='⏸ Pause'; ensureRaf();
if(queue.length===0) fetchPhrase();
if(!timer) timer=setInterval(tick, PLAY_MS);
}
function stop(){ playing=false; btn.textContent='β–Ά Let the Modular Mind play'; releaseAll(); }
btn.onclick=function(){ playing?stop():start(); };
};
var t=function(){ if(document.getElementById('mm-piano')) window.__pianoBoot(); else setTimeout(t,120); };
if(document.readyState==='loading') document.addEventListener('DOMContentLoaded',t); else t();
})();
"""
PIANO_HTML = """
<div id="mm-piano-wrap">
<div id="mm-piano-stage">
<canvas id="mm-piano-roll"></canvas>
<div id="mm-piano"></div>
</div>
<div id="mm-piano-ctrl">
<button id="mm-piano-btn">β–Ά Let the Modular Mind play</button>
<span id="mm-piano-note">β™ͺ</span>
</div>
<div id="mm-piano-lbl">restyled live into <b>A minor</b> β€” every note is lifted out of the bass and snapped to the minor scale Β· Bass / Tenor / Soprano own a register; Sustain / Onset / Phrase are modulators that only write to the shared latent</div>
<div id="mm-piano-specs"></div>
<div id="mm-piano-latent" title="RecursiveLink shared latent"></div>
</div>
"""
# Injected verbatim into the page <head>: dark-mode forcer, stylesheet, embedded sprite
# atlases, the game engine, the piano engine, and the bootstrap. (Inline <script> in <head>
# runs reliably; gr.HTML's innerHTML scripts do not.)
HEAD = (
f"<script>{FORCE_DARK_JS}</script>\n"
f"<style>{CSS}</style>\n"
f"<style>{PIANO_CSS}</style>\n"
f"<script>{ASSETS_JS}</script>\n"
f"<script>window.MM_AUDIO_BASE = {json.dumps(AUDIO_BASE_URL)};</script>\n"
f"<script>{PIANO_GLOBALS}</script>\n"
f"<script>{GAME_JS}</script>\n"
f"<script>{PIANO_JS}</script>\n"
f"<script>{BOOTSTRAP_JS}</script>\n"
)
INTRO = """
# πŸ„ Modular Mind
A mini **Dark-Souls-style** duel where the boss is controlled by a **Modular Mind** β€” six tiny
specialist networks that communicate through a **shared latent** (RecursiveLink) and a coordinator
that picks each move. The brain was **trained by self-play reinforcement learning**, not scripted.
Watch the right-hand panel: every boss decision shows which specialists fired and how the modulators
steer the fight through the latent. **Click *Enter the Fog* and click the game once to focus, then play.**
"""
# ---- placeholder repo link (replace REPO_URL with your real GitHub URL) ------
REPO_URL = "#" # TODO: replace with the real repo
REPO_MD = f"""
"""
TECH_MD = r"""
## 🧠 How the model works (technical breakdown)
The boss brain is a faithful, **specialist-scale** implementation of the Modular Mind
architecture β€” small enough (~**4,500 parameters**, pure-NumPy inference) to decide in
well under a millisecond on a free CPU, yet structurally identical to the big idea:
**many small domain specialists that communicate through one shared latent.**
### The pieces
- **7 specialists** (tiny 2-layer MLPs). Five *own an action* and two *modulators* own none:
| Specialist | Owns | Role |
|---|---|---|
| **Aggressor** | `CLEAVE` | attack when in range |
| **Stalker** | `APPROACH` | close the distance |
| **Survivor** | `RETREAT` | reset spacing when it can't swing |
| **Baiter** | `IDLE` | wait / bait a whiff |
| **Defender** | `BLOCK` | guard the player's melee when it can't punish |
| **Punisher** | β€” *(modulator)* | detects "the player is open / recovering" |
| **Enrage** | β€” *(modulator)* | detects "we're low on HP β†’ go berserk" |
- **`RecursiveLink`** β€” a ReGLU + residual block that merges the six latents into **one shared latent** (the "bridge").
- **Coordinator** β€” a linear read-out of the shared latent that nudges every action's score.
### What every specialist is doing *at one moment* (a single decision tick)
A souls boss commits to one move at a time, so the brain only fires when the boss is free
(~2–4 times/second). In that one forward pass, **all six specialists run in parallel**:
1. **Perceive** β€” the live game state is compressed to a **10-D feature vector** (distance, in-range?, boss HP, player HP, cooldown ready?, is the player attacking / recovering / blocking?).
2. **Specialise** β€” each specialist computes `h = tanh(W₁·features)` and emits a **latent vector** `zα΅’` (its "opinion"); the four action-owners also emit a scalar **drive** for their move.
3. **Communicate** β€” the six latents are summed and pushed through the **`RecursiveLink`** to form the **shared latent** `s`. This is the only channel the **modulators** have: *Punisher* writes "player is open" and *Enrage* writes "HP is low" into `s` β€” they cast no direct vote.
4. **Coordinate** β€” the **coordinator** reads `s` and produces a **modulation** added to each action's score. So `score(action) = (owner's drive) + (coordinator modulation)`. This is where "the player is open" turns *Aggressor's* CLEAVE up, or "we're low HP" makes the boss commit harder.
5. **Act** β€” the boss takes the top-scoring legal action (CLEAVE is masked while on cooldown). A small per-difficulty *mistake rate* adds the easy/normal/hard feel.
That whole loop is the **4-bar specialist panel + shared-latent strip** you see updating in the game β€” a live X-ray of the model thinking.
### How it learned
Trained by **self-play REINFORCE** (policy gradient + value baseline) in a headless duel
simulator: reward = *damage dealt βˆ’ damage taken*, plus shaping that rewards pressuring in
range and punishes stalling. Over ~700 batches the win-rate climbed against a near-optimal
dodging opponent and the tactics β€” spacing, punishing recovery frames, blocking your punish,
enraging at low HP β€” **emerged**; none of it is hand-scripted. The **difficulty tiers are the
same trained brain at different decision-noise levels** (Easy makes more exploitable mistakes,
Hard plays sharp β‰ˆ0.95 win vs the dodger).
### Why the structure matters
- **Modular** β€” you can retrain or swap one specialist without touching the others (e.g. the **Defender/BLOCK** specialist was added later and the rest were untouched).
- **Explainable** β€” at any instant you can read *which* specialist drove the decision and how the modulators bent it.
- **Cheap** β€” specialists are small and run in parallel; the latent bridge is one tiny matmul.
### It finetunes from *your* fights (online learning)
Because the model is tiny, a gradient step is microseconds β€” so the boss can learn
from real play **on this CPU**. Every HARD-tier fight is logged (state, action, HP per
decision) and sent to a `/learn` endpoint; we rebuild the per-decision rewards (damage
dealt βˆ’ taken, + kill / βˆ’ death), compute REINFORCE returns, and take **one Adam step**
that nudges the HARD brain toward what worked against real humans β€” the backprop is
hand-written in numpy and verified against PyTorch to ~1e-8. A frozen copy of the
sim-trained weights is an **anchor** (gentle pull-back) so it can't drift into nonsense,
and with a `HF_TOKEN` + `MM_DATASET_REPO` secret the adapted weights persist to a
HuggingFace Dataset across Space restarts. (Only HARD fights train, so the adaptation data
stays on-policy.)
"""
USES_MD = r"""
## 🌍 Three real-world applications of this architecture
The reusable idea isn't "a boss" β€” it's **small, independently-trainable specialists that
coordinate through a shared latent instead of through brittle hand-written rules or one giant
monolithic model.** That pattern transfers well beyond games:
**1. On-device / edge robotics & IoT control.**
A drone, robot arm, or wearable can't run a huge policy. Give it a handful of tiny specialists
β€” *balance*, *obstacle-avoidance*, *navigation*, *battery/thermal management* β€” each cheap
enough for a microcontroller, coordinating through one shared latent. You can **add or replace
a specialist** (e.g., a new sensor) without retraining the whole stack, and the latent bridge
fuses their context in a single cheap step β€” exactly what this boss does at 2–4 Hz on a CPU.
**2. Explainable, designer-tunable AI for games & simulations.**
Studios want NPC/boss/crowd AI that's *steerable and inspectable*, not a black box. With this
pattern a designer can tune or hot-swap one behavior specialist (more aggressive, more cautious)
and **see exactly which specialist fired** for any decision β€” the same live panel shown here.
That makes balancing, debugging, and difficulty tuning tractable in ways a single end-to-end
policy isn't.
**3. Modular AI agents / mixture-of-specialists that talk in latent space.**
The original Modular Mind motivation: instead of an "agent chain" that re-serializes everything
to **text** at every hop (lossy, slow), let domain specialists β€” *math*, *code*, *retrieval*,
*safety/policy* β€” communicate through a **latent bridge** (`RecursiveLink` + a residual highway
for deep chains). A small language model can consult a math or tool specialist **without
flattening to tokens**, each specialist is trained/upgraded independently, and the system stays
auditable. Useful for cost-sensitive assistants, industrial decisioning (risk + liquidity +
fraud specialists), or clinical triage (modular diagnostic experts) where you must know *why*.
"""
PIANO_INTRO = """
### 🎹 This may be bad: a self-playing piano β€” same Modular Mind method, trained on a song
Under the boss fight, the *same architecture* (tiny specialists β†’ `RecursiveLink` β†’ a coordinator)
applied to **playing piano in chords**. It was trained by **multi-note next-frame prediction** on a
*polyphonic* transcription of a song: six specialists (Bass / Tenor / Soprano registers + Sustain /
Onset / Phrase modulators) emit latents, the bridge merges them, and the coordinator picks the **set
of notes** to play next. It plays itself with **real recorded acoustic-piano samples**, and the
performance is **restyled live into A minor** β€” every note is lifted out of the bass register and
snapped to the minor scale before it reaches the keys. Press **play** and watch each note send a
glowing trail of light off the keyboard.
<sub>Rough by design β€” one song, a tiny model, crude polyphonic transcription β€” the *method carrying over* is the point.</sub>
"""
with gr.Blocks(title="Quazim0t0's πŸ„ Thousand Token Wood Entry") as demo:
gr.Markdown(INTRO)
gr.HTML(INDEX_HTML)
gr.Markdown(PIANO_INTRO)
gr.HTML(PIANO_HTML)
gr.Markdown(REPO_MD)
with gr.Accordion("🧠 How the Modular Mind works (technical breakdown)", open=False):
gr.Markdown(TECH_MD)
with gr.Accordion("🌍 Three real-world applications", open=False):
gr.Markdown(USES_MD)
# the third application, made real: a live mixture-of-experts at the bottom
if build_moe_panel is not None:
build_moe_panel()
# hidden API plumbing: the browser calls /decide via the Gradio REST API
inp = gr.Textbox(visible=False)
out = gr.Textbox(visible=False)
trigger = gr.Button(visible=False)
trigger.click(decide, inp, out, api_name="decide")
linp = gr.Textbox(visible=False)
lout = gr.Textbox(visible=False)
ltrigger = gr.Button(visible=False)
ltrigger.click(learn, linp, lout, api_name="learn")
pinp = gr.Textbox(visible=False)
pout = gr.Textbox(visible=False)
ptrigger = gr.Button(visible=False)
ptrigger.click(piano, pinp, pout, api_name="piano")
if __name__ == "__main__":
demo.queue(default_concurrency_limit=8).launch(
server_name="0.0.0.0",
server_port=int(os.environ.get("PORT", "7860")),
allowed_paths=[AUDIO_DIR, PIANO_SAMPLES_DIR],
# Gradio 6 moved these from the Blocks constructor to launch()
theme=gr.themes.Base(),
head=HEAD,
)