LilyScript / app.py
k-l-lambda's picture
sampler mask from blacklist added.
4252956
"""LilyScript — a symbolic-music AIGC app built on the LilyletNotaGen model.
Left column:
(1) generation parameter panel (2) streaming run log
(3) .lyl file list (session outputs + built-in examples) (4) editable lyl editor
Right column:
sheet-music panel (placeholder for now; later a Lilylet music score renderer
reusing the lilylet-live-editor pipeline).
Generation streams patch-by-patch: raw decoded text (with `[r:x/y]` stream
markers) goes to the run log, while the measure-segmented postprocessed text
fills the editor. The backend is the int8 + two-level KV-cache ONNX generator
(see lilyscript/generator.py); weights are pulled from the HF model repo
`k-l-lambda/LilyNota` on first use (override with LILYSCRIPT_MODEL_DIR locally).
"""
import os
import re
import time
import json
import random
import logging
from collections import deque
import gradio as gr
from lilyscript.generator import StreamingLilyletGenerator
from lilyscript.postprocess import postprocess
from lilyscript.mask_monitor import MaskMonitor, load_blacklist
HERE = os.path.dirname(os.path.abspath(__file__))
# Model weights are pulled from the HuggingFace model repo `k-l-lambda/LilyNota`
# at first use (the int8 + KV-cache ONNX bundle lives under its `onnx/` dir).
# For local development, point LILYSCRIPT_MODEL_DIR at a local onnx dir to skip
# the download.
HF_MODEL_REPO = os.environ.get('LILYSCRIPT_MODEL_REPO', 'k-l-lambda/LilyNota')
HF_MODEL_SUBDIR = 'onnx' # weights + geometry + tokenizer live here in the repo
MODEL_DIR = os.environ.get('LILYSCRIPT_MODEL_DIR') # set -> use this local dir instead of the hub
ASSET_DIR = os.path.join(HERE, 'assets')
EXAMPLES_DIR = os.path.join(HERE, 'examples')
OUTPUT_DIR = os.path.join(HERE, 'outputs')
WEB_DIR = os.path.join(HERE, 'web') # vendored browser libs + score player (gitignored bundles)
EXAMPLE_PREFIX = '\U0001F4C4 ' # 📄 examples
OUTPUT_PREFIX = '✨ ' # ✨ session outputs
# Suggested metadata values (editable — the dropdowns allow custom input), loaded
# from assets/styles.json. Drawn from the NotaGenX period/instrumentation
# vocabulary + values seen in examples.
_STYLES = json.load(open(os.path.join(ASSET_DIR, 'styles.json'), encoding='utf-8'))
COMPOSERS = _STYLES['composers']
GENRES = _STYLES['genres']
INSTRUMENTS = _STYLES['instruments']
_GEN = None
# Syntax-blacklist mask: discovered 2-grams whose forbidden next-tokens are masked
# during sampling so the model can't emit those locally-illegal continuations.
# Loaded once at startup; a fresh (stateful) MaskMonitor is built per generation.
# Empty/missing file -> no masking (behavior unchanged).
BLACKLIST_PATH = os.path.join(ASSET_DIR, 'lilylet-blacklist.json')
_BLACKLIST = load_blacklist(BLACKLIST_PATH)
# ---- system log capture ----------------------------------------------------
# A ring buffer that mirrors Python logging (lifecycle messages, warnings, and
# errors) into the Logs panel, alongside the streamed generation text.
LOG = logging.getLogger('lilyscript')
class _RingBufferHandler (logging.Handler):
'''Keep the most recent N log records as formatted strings.'''
def __init__ (self, capacity=400):
super().__init__()
self.records = deque(maxlen=capacity)
def emit (self, record):
try:
self.records.append(self.format(record))
except Exception:
pass
def text (self):
return '\n'.join(self.records)
_LOG_BUFFER = _RingBufferHandler()
_LOG_BUFFER.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(name)s: %(message)s', datefmt='%H:%M:%S'))
def _init_logging ():
'''Route app + library logs and Python warnings into the ring buffer (once),
and also echo to stderr so the terminal keeps a copy.'''
logging.captureWarnings(True)
root = logging.getLogger()
if _LOG_BUFFER not in root.handlers:
root.addHandler(_LOG_BUFFER)
stderr_h = logging.StreamHandler()
stderr_h.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(name)s: %(message)s', datefmt='%H:%M:%S'))
root.addHandler(stderr_h)
root.setLevel(logging.INFO)
LOG.setLevel(logging.INFO)
_init_logging()
def resolve_model_dir ():
'''Where the ONNX weights live. If LILYSCRIPT_MODEL_DIR is set, use it as-is
(local dev). Otherwise pull the `onnx/` bundle from the HF model repo and
return its local snapshot path. The tokenizer is NOT pulled — it's read from
the app's own assets/ dir — so we only fetch the weight files.'''
if MODEL_DIR:
return MODEL_DIR
from huggingface_hub import snapshot_download
LOG.info('downloading model weights from hf:%s (%s/) ...', HF_MODEL_REPO, HF_MODEL_SUBDIR)
local = snapshot_download(
repo_id=HF_MODEL_REPO,
allow_patterns=[f'{HF_MODEL_SUBDIR}/patch_kv_int8.onnx', f'{HF_MODEL_SUBDIR}/token_kv_int8.onnx',
f'{HF_MODEL_SUBDIR}/wte.npy', f'{HF_MODEL_SUBDIR}/geometry.json'],
)
return os.path.join(local, HF_MODEL_SUBDIR)
def get_generator ():
'''Lazily build the (heavy) ONNX generator on first use.'''
global _GEN
if _GEN is None:
model_dir = resolve_model_dir()
LOG.info('loading ONNX generator from %s ...', model_dir)
t0 = time.perf_counter()
_GEN = StreamingLilyletGenerator(model_dir, ASSET_DIR)
LOG.info('generator ready (%.1fs)', time.perf_counter() - t0)
LOG.info('syntax blacklist: %d contexts / %d forbidden pairs from %s',
len(_BLACKLIST), sum(len(v) for v in _BLACKLIST.values()), BLACKLIST_PATH)
return _GEN
def load_examples ():
'''Read built-in example .lyl files into a {label: text} dict.
Examples may be stored raw (with inline `[r:x/y]` stream markers and
run-together header lines); run them through `postprocess` so the editor —
and the score renderer it feeds — get syntactically clean Lilylet. It's a
no-op on already-clean text.'''
store = {}
if os.path.isdir(EXAMPLES_DIR):
for name in sorted(os.listdir(EXAMPLES_DIR)):
if name.endswith('.lyl'):
with open(os.path.join(EXAMPLES_DIR, name), encoding='utf-8') as f:
store[EXAMPLE_PREFIX + name] = postprocess(f.read())
return store
def load_outputs ():
'''Read previously-generated .lyl files from the outputs dir into a
{label: text} dict, so past session outputs survive a server restart.'''
store = {}
if os.path.isdir(OUTPUT_DIR):
for name in sorted(os.listdir(OUTPUT_DIR)):
if name.endswith('.lyl'):
with open(os.path.join(OUTPUT_DIR, name), encoding='utf-8') as f:
store[OUTPUT_PREFIX + name[:-4]] = f.read()
return store
def load_library ():
'''Initial file list: built-in examples + any persisted session outputs.'''
return {**load_examples(), **load_outputs()}
_STYLE_LINE_RE = re.compile(r'^\[(composer|genre|instrument)\s+".*"\]\s*$')
def sync_prompt (composer, genre, instrument, current):
'''Rewrite the metadata-prompt text from the three style dropdowns.
The `[composer/genre/instrument "..."]` lines are regenerated from the
dropdowns and placed at the top; any other lines the user typed (e.g.
`[key "..."]`) are preserved below in their original order.
'''
lines = []
for field, value in (('composer', composer), ('genre', genre), ('instrument', instrument)):
value = (value or '').strip()
if value:
lines.append(f'[{field} "{value}"]')
# keep every line that isn't one of the three managed style lines
for ln in (current or '').splitlines():
if not _STYLE_LINE_RE.match(ln.strip()):
if ln.strip():
lines.append(ln)
return '\n'.join(lines)
# Marker line written to the log buffer at the moment generation output begins.
# `_log_panel` replaces this marker with the live generation text, so the streamed
# output appears in true chronological position — after the "requested"/"ready"
# lines and before the later "timing"/"done" lines.
_GEN_MARKER = '⁣__GENERATION_OUTPUT__⁣'
def _log_panel (raw=''):
'''Render the Logs panel in chronological order: the captured system log,
with the generation-output marker (if present) expanded to the live text.'''
sys_log = _LOG_BUFFER.text()
if _GEN_MARKER in sys_log:
block = '--- generation output ---\n' + raw if raw else '--- generation output ---'
return sys_log.replace(_GEN_MARKER, block)
# no marker yet (e.g. before generation): fall back to appending raw
if raw:
return (sys_log + '\n' if sys_log else '') + '--- generation output ---\n' + raw
return sys_log
def run_generation (prompt, measures, temperature, max_patches, seed, store, top_k=0, top_p=0.9):
'''Streaming generate callback. Yields updates for (log, editor, file_list, store, seed, gen_btn).
store: {label: lyl_text} dict held in gr.State; the produced document is added
to it under a timestamped label once generation finishes.
top_k / top_p have fixed defaults (no UI controls); pass them explicitly to override.
Progress is shown on the Generate button itself: its label becomes
"Generating… M/N" (by completed measures when a measure count is requested, else
by patches out of max_patches) during the run, and reverts to "Generate" at the
end. (Gradio's native progress bar can't render on a Button, so we drive the
label directly.)
The output file is named with the seed used for THIS generation; on completion
the seed slider is randomized (final yield) so the next click uses a fresh seed.
'''
meas = int(measures) if measures and int(measures) > 0 else None
store = dict(store or {})
LOG.info('generation requested: measures=%s temperature=%s max_patches=%s seed=%s',
meas, temperature, max_patches, seed)
raw = pretty = ''
n_yields = 0
mp = int(max_patches)
t0 = time.perf_counter()
try:
gen = get_generator()
# build a fresh per-run mask monitor (stateful: tracks the running 2-gram).
# None when no blacklist is loaded -> sampling path unchanged.
monitor = MaskMonitor(gen, _BLACKLIST) if _BLACKLIST else None
# drop a marker in the log timeline; _log_panel expands it to the live output,
# so subsequent log lines (timing/done) land *after* the generation text.
LOG.info(_GEN_MARKER)
for raw, pretty, done in gen.generate_stream(
prompt_text=prompt or '', max_patches=mp, temperature=float(temperature),
top_k=int(top_k), top_p=float(top_p), measures=meas, seed=int(seed), monitor=monitor):
if not done:
n_yields += 1
# progress on the Generate button label: by measures (completed `|`
# separators vs target) when a measure count was requested, else by patches.
if meas:
btn_label = 'Generating… %d/%d' % (min(raw.count('|'), meas), meas)
else:
btn_label = 'Generating… %d/%d' % (max(0, n_yields - 1), mp)
# The log streams every patch (raw text). The editor, however, must stay
# syntactically valid: only sync it at a measure boundary — i.e. when the
# accumulated text ends with the measure separator `|` (so it never shows a
# half-generated, incomplete measure). `done` forces a final sync.
at_boundary = raw.rstrip().endswith('|')
editor_update = pretty if (at_boundary or done) else gr.update()
yield _log_panel(raw), editor_update, gr.update(), store, gr.update(), gr.update(value=btn_label)
except Exception as e:
LOG.exception('generation failed: %s', e)
yield _log_panel(raw), pretty, gr.update(), store, gr.update(), gr.update(value='Generate')
return
# timing: the stream yields once for prefill + once per generated patch, so the
# patch count is the non-done yields minus that initial prefill yield.
elapsed = time.perf_counter() - t0
n_patches = max(0, n_yields - 1)
per_patch = (elapsed / n_patches) if n_patches else 0.0
LOG.info('timing: %.2fs total, %d patches, %.3fs/patch (%.1f patches/s)',
elapsed, n_patches, per_patch, (n_patches / elapsed if elapsed else 0.0))
# finished: persist the document, refresh the file list, select the new entry
label = OUTPUT_PREFIX + time.strftime('%Y%m%d_%H%M%S') + ('_m%d' % meas if meas else '') + '_s%d' % int(seed)
store[label] = pretty
os.makedirs(OUTPUT_DIR, exist_ok=True)
out_path = os.path.join(OUTPUT_DIR, label.replace(OUTPUT_PREFIX, '') + '.lyl')
with open(out_path, 'w', encoding='utf-8') as f:
f.write(pretty)
LOG.info('generation done: %d chars -> %s', len(pretty), os.path.basename(out_path))
# randomize the seed slider for the next run (the file above already used the
# seed this generation ran with, so naming is unaffected)
next_seed = random.randint(0, 2147483647)
yield _log_panel(raw), pretty, gr.update(choices=list(store.keys()), value=label), store, gr.update(value=next_seed), gr.update(value='Generate')
def load_file (label, store):
'''File-list selection -> load that document into the editor.'''
return (store or {}).get(label, '')
SHEET_PLACEHOLDER = '''
<div id="ls-score" class="ls-score-mount" style="height:100%;min-height:600px;">
<div style="display:flex;align-items:center;justify-content:center;height:100%;
min-height:600px;border:1px dashed #c9c9c9;border-radius:8px;color:#999;
font-family:sans-serif;text-align:center;">
<div>
<div style="font-size:42px;margin-bottom:8px;">&#127932;</div>
<div>Loading score renderer…</div>
</div>
</div>
</div>
'''
# Static-file URL prefix Gradio serves allowed_paths under (verified at 6.18.0).
def _file_url (path):
return '/gradio_api/file=' + path
def build_head ():
'''<head> injection: load the vendored browser libs (lilylet bundle, Verovio
WASM, music-widgets) then the score player, and point the soundfont loader at
the vendored copy. Gradio delivers this via its client config and injects it
on the frontend, so absolute `/gradio_api/file=` URLs resolve correctly.'''
vendor = os.path.join(WEB_DIR, 'vendor')
scripts = [
os.path.join(vendor, 'lilylet.bundle.js'),
os.path.join(vendor, 'verovio.bundle.js'),
os.path.join(vendor, 'musicWidgetsBrowser.umd.min.js'),
os.path.join(WEB_DIR, 'score-player.js'),
]
tags = ['<script>window.__LILYSCRIPT_SOUNDFONT_URL=%r;</script>'
% (_file_url(os.path.join(WEB_DIR, 'soundfont')) + '/')]
tags.append('<link rel="stylesheet" href="%s">' % _file_url(os.path.join(WEB_DIR, 'score-player.css')))
for s in scripts:
tags.append('<script src="%s"></script>' % _file_url(s))
return '\n'.join(tags)
# ---- client-side glue (Gradio `js=` handlers) -------------------------------
# These run in the browser. They wait for LilyScore (score-player.js) to load,
# then mount it into #ls-score and drive render / generation-gating.
_JS_HELPERS = '''
function () {
// poll until the score player + its #ls-score mount exist, then mount once.
const tryMount = () => {
const root = document.getElementById('ls-score');
if (window.LilyScore && root) { window.LilyScore.mount(root); return true; }
return false;
};
if (!tryMount()) {
const iv = setInterval(() => { if (tryMount()) clearInterval(iv); }, 200);
setTimeout(() => clearInterval(iv), 20000);
}
}
'''
# Render the editor text to SVG. The text is passed in by Gradio as the event's
# input value — we must NOT scrape it from the DOM: gr.Code is a CodeMirror editor
# that virtualises long documents (only the rows near the viewport exist in the
# DOM), so `.cm-content`.innerText is truncated for long scores and the render
# comes out incomplete. Taking the value as an argument gives the full text.
_JS_RENDER = '''
function (text) {
if (window.LilyScore) window.LilyScore.render(text || '');
return [];
}
'''
# NB: when js= runs before a backend fn, Gradio passes the event's input values
# to the js function and uses its RETURN value as the fn's inputs. So the gate
# must return its args unchanged — returning nothing makes Gradio send null for
# every input (which breaks e.g. the temperature Slider's preprocessor).
_JS_GEN_START = '''
function (...args) {
if (window.LilyScore) window.LilyScore.setGenerating(true);
// turn Generate yellow + Stop red while running
['gen-btn', 'stop-btn'].forEach(function (idv) {
const b = document.getElementById(idv);
if (b) b.classList.add('ls-generating');
});
return args;
}
'''
_JS_GEN_END = '''
function () {
if (window.LilyScore) window.LilyScore.setGenerating(false);
['gen-btn', 'stop-btn'].forEach(function (idv) {
const b = document.getElementById(idv);
if (b) b.classList.remove('ls-generating');
});
}
'''
CUSTOM_CSS = '''
/* Score List: truncate long file names to a single line with an ellipsis. */
.score-list label {
max-width: 100%;
}
.score-list label > span {
display: inline-block;
max-width: 100%;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
vertical-align: middle;
}
/* on hover, reveal the full (otherwise ellipsised) file name */
.score-list label:hover > span {
overflow: visible;
white-space: normal;
word-break: break-all;
}
/* Score List: fixed height with an auto scrollbar, matching the editor's
18-line viewport (gr.Code lines=18 ≈ 266px). */
.score-list {
max-height: 324px;
overflow-y: auto;
}
/* Generate button turns yellow while a generation is running (the .ls-generating
class is toggled by the generation-gate js). !important beats the primary variant. */
#gen-btn.ls-generating {
background: #f5c518 !important;
background-image: none !important;
border-color: #d4a800 !important;
color: #3a2f00 !important;
}
/* Stop button: grey by default (theme look), solid red only while generating
(the .ls-generating class is toggled by the generation-gate js). */
#stop-btn.ls-generating {
background: #e23b3b !important;
background-image: none !important;
border-color: #c42b2b !important;
color: #fff !important;
}
#stop-btn.ls-generating:hover {
background: #cf2e2e !important;
}
'''
def build_ui ():
examples = load_library()
with gr.Blocks(title='LilyScript') as demo:
gr.Markdown('## 🎼 LilyScript — symbolic music generation with Lilylet')
store = gr.State(examples)
with gr.Row(equal_height=True):
# ---------------- LEFT ----------------
with gr.Column(scale=5):
# (1) compose params, with (2) the collapsible run log stacked below
with gr.Group():
gr.Markdown('## Compose')
with gr.Group():
gr.Markdown('- Style Options')
with gr.Row():
composer = gr.Dropdown(label='composer', choices=COMPOSERS, value='',
allow_custom_value=True)
genre = gr.Dropdown(label='genre', choices=GENRES, value='',
allow_custom_value=True)
instrument = gr.Dropdown(label='instrument', choices=INSTRUMENTS, value='',
allow_custom_value=True)
prompt = gr.Textbox(label='Metadata prompt', lines=3, value='',
placeholder='extra metadata lines, e.g.\n[key "C major"]\n(optional)')
gr.Markdown('- Length')
with gr.Row():
measures = gr.Number(label='Measures (0 = let model decide)', value=0, precision=0)
max_patches = gr.Number(label='max patches', value=1024, precision=0)
gr.Markdown('- Sampler')
with gr.Row():
temperature = gr.Slider(0.0, 2.0, value=1.0, step=0.05, label='temperature')
seed = gr.Slider(0, 2147483647, value=42, step=1, label='seed')
with gr.Row():
gen_btn = gr.Button('Generate', variant='primary', elem_id='gen-btn')
stop_btn = gr.Button('Stop', variant='stop', elem_id='stop-btn')
with gr.Accordion('Logs', open=True):
log = gr.Textbox(show_label=False, lines=10, max_lines=10,
autoscroll=True, interactive=False, container=False)
# bottom row: (3) file list | (4) editor
with gr.Row(equal_height=True):
with gr.Column(scale=2, min_width=160):
with gr.Group():
gr.Markdown('## Score List')
file_list = gr.Radio(show_label=False, choices=list(examples.keys()),
value=None, interactive=True, container=False,
elem_classes=['score-list'])
with gr.Column(scale=5):
with gr.Group():
gr.Markdown('## Lilylet editor')
editor = gr.Code(show_label=False, language=None, lines=18,
max_lines=18, interactive=True, elem_id='ls-editor')
# ---------------- RIGHT ----------------
with gr.Column(scale=6):
with gr.Group():
gr.Markdown('## Sheet music')
gr.HTML(SHEET_PLACEHOLDER)
# ---- wiring ----
# mount the score player once the page (and LilyScore) is ready
demo.load(None, None, None, js=_JS_HELPERS)
# style dropdowns -> keep the metadata-prompt text box in sync
for field in (composer, genre, instrument):
field.change(sync_prompt, inputs=[composer, genre, instrument, prompt], outputs=[prompt])
# Generate: a single click dep that runs the gate js (SVG-only, player
# hidden) and then the streaming model fn — `js=` on a backend click runs
# first and, returning nothing, leaves the declared `inputs` untouched. A
# trailing `.then` js lifts the gate + reveals the player when it finishes.
gen_event = gen_btn.click(
run_generation,
inputs=[prompt, measures, temperature, max_patches, seed, store],
outputs=[log, editor, file_list, store, seed, gen_btn],
js=_JS_GEN_START,
# progress is shown on the Generate button's own label ("Generating… M/N"),
# driven by run_generation. Hide Gradio's native progress overlay (it can't
# render on a Button and otherwise covers the Logs/editor outputs).
show_progress='hidden',
)
gen_event.then(None, None, None, js=_JS_GEN_END)
# every editor change (streaming syncs + manual edits + file loads) -> re-render
# SVG. Pass the editor value as input so the js receives the FULL text (gr.Code
# virtualises long docs in the DOM, so scraping it client-side truncates).
editor.change(None, inputs=[editor], outputs=None, js=_JS_RENDER)
# Stop: cancel generation, reset the Generate button label (the cancelled
# run never reaches its final yield, so it'd otherwise stay "Generating…"),
# then lift the gate (js) so the player returns + button colors revert.
stop_btn.click(
lambda: gr.update(value='Generate'), None, outputs=[gen_btn], cancels=[gen_event],
).then(None, None, None, js=_JS_GEN_END)
file_list.select(load_file, inputs=[file_list, store], outputs=[editor])
return demo
if __name__ == '__main__':
demo = build_ui()
demo.queue().launch(
theme=gr.themes.Soft(),
css=CUSTOM_CSS,
head=build_head(),
allowed_paths=[WEB_DIR],
)