Spaces:
Running
Per-turn telemetry + RAG few-shot on phrasebook miss
Browse filesTelemetry (src/engine/turn_logger.py)
Thread-safe JSONL append logger writing one self-contained row per turn
to data/field_turns.jsonl (overridable via FIELD_TURNS_PATH). Captures
ts, tab, langs, transcript/user_text, phrasebook hit details, llm model
+ ms, reply, tts ms, total ms, error. Foundation for field-test review,
phrasebook hit-rate measurement, and Stage-4 training-data curation.
Never raises — telemetry must not break the user-facing pipeline.
RAG few-shot (src/llm/phrasebook.py + src/llm/minimal_client.py)
- phrasebook.top_k(text, lang, k=3): nearest-K phrasebook entries
regardless of threshold, sorted by descending fuzzy score.
- MinimalClient.chat() now accepts extra_examples; _build_system_prompt
appends them as a second 'Additional reference phrases relevant to the
current user input' section after the 30-pair anchor list.
app_minimal.py
- Both run_pipeline (voice) and run_text_pipeline (text) now share a
single _resolve_reply() helper: phrasebook short-circuit first, on
miss inject top-3 nearest entries into the LLM call, log the turn.
- Stage timings (transcribe_ms, llm_ms, tts_ms, total_ms) captured per
turn and emitted via TurnLogger.
- app_minimal.py +134 -42
- src/engine/turn_logger.py +80 -0
- src/llm/minimal_client.py +36 -4
- src/llm/phrasebook.py +32 -0
|
@@ -36,9 +36,10 @@ except ImportError:
|
|
| 36 |
# Local imports — the four modules the baseline-rebuild plan authorizes.
|
| 37 |
# Everything else in src/ is intentionally unused here.
|
| 38 |
from src.data.bam_normalize import normalize as bam_normalize
|
|
|
|
| 39 |
from src.engine.whisper_base import WhisperBackbone
|
| 40 |
from src.llm.minimal_client import MinimalClient
|
| 41 |
-
from src.llm.phrasebook import lookup as phrasebook_lookup
|
| 42 |
from src.tts.mms_tts import MMSTTSEngine
|
| 43 |
|
| 44 |
logging.basicConfig(
|
|
@@ -71,9 +72,10 @@ LANG_TO_WHISPER_HINT = {
|
|
| 71 |
|
| 72 |
|
| 73 |
# ── Service singletons (lazy-loaded) ────────────────────────────────────────
|
| 74 |
-
_backbone:
|
| 75 |
-
_llm:
|
| 76 |
-
_tts:
|
|
|
|
| 77 |
|
| 78 |
|
| 79 |
def _resolve_device() -> str:
|
|
@@ -180,6 +182,8 @@ def run_pipeline(
|
|
| 180 |
Returns (transcript, reply_text, reply_audio). Graceful degradation: any
|
| 181 |
stage failure yields a readable string and None audio instead of raising.
|
| 182 |
"""
|
|
|
|
|
|
|
| 183 |
if audio is None:
|
| 184 |
return "", "(no audio received)", None
|
| 185 |
|
|
@@ -187,44 +191,75 @@ def run_pipeline(
|
|
| 187 |
if audio_np.size == 0:
|
| 188 |
return "", "(empty audio)", None
|
| 189 |
|
|
|
|
|
|
|
| 190 |
try:
|
| 191 |
transcript = transcribe(audio_np, sample_rate, input_lang)
|
| 192 |
except Exception as exc: # pragma: no cover — field-safety
|
| 193 |
logger.exception("Transcription failed")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
return "", f"(STT error: {exc})", None
|
|
|
|
| 195 |
|
| 196 |
if not transcript:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
return "", "(no speech detected)", None
|
| 198 |
|
| 199 |
-
# ── Phrasebook
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
hit
|
|
|
|
|
|
|
|
|
|
| 207 |
)
|
| 208 |
-
|
| 209 |
-
else:
|
| 210 |
-
try:
|
| 211 |
-
# Dialect-anchored plain-string reply (see MinimalClient).
|
| 212 |
-
reply_text = get_llm().chat(transcript, target_lang=output_lang)
|
| 213 |
-
except Exception as exc: # pragma: no cover
|
| 214 |
-
logger.exception("LLM call failed")
|
| 215 |
-
return transcript, f"(LLM error: {exc})", None
|
| 216 |
-
|
| 217 |
-
reply_text = reply_text or "(empty reply)"
|
| 218 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
try:
|
| 220 |
wav, sr = get_tts().synthesize(
|
| 221 |
reply_text, language=output_lang, device=_resolve_device()
|
| 222 |
)
|
|
|
|
|
|
|
| 223 |
except Exception as exc:
|
| 224 |
logger.exception("TTS failed")
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
|
| 230 |
def run_text_pipeline(
|
|
@@ -241,36 +276,93 @@ def run_text_pipeline(
|
|
| 241 |
reads it as-is and replies in `output_lang`. Skips Whisper entirely; this
|
| 242 |
is the fast dev-loop path.
|
| 243 |
"""
|
|
|
|
|
|
|
| 244 |
text = (text or "").strip()
|
| 245 |
if not text:
|
| 246 |
return "(no text entered)", None
|
| 247 |
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
hit
|
|
|
|
|
|
|
|
|
|
| 254 |
)
|
| 255 |
-
|
| 256 |
-
else:
|
| 257 |
-
try:
|
| 258 |
-
reply_text = get_llm().chat(text, target_lang=output_lang)
|
| 259 |
-
except Exception as exc: # pragma: no cover
|
| 260 |
-
logger.exception("LLM call failed")
|
| 261 |
-
return f"(LLM error: {exc})", None
|
| 262 |
-
|
| 263 |
-
reply_text = reply_text or "(empty reply)"
|
| 264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
try:
|
| 266 |
wav, sr = get_tts().synthesize(
|
| 267 |
reply_text, language=output_lang, device=_resolve_device()
|
| 268 |
)
|
|
|
|
|
|
|
| 269 |
except Exception as exc:
|
| 270 |
logger.exception("TTS failed")
|
| 271 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
|
| 275 |
|
| 276 |
# ── Gradio UI ────────────────────────────────────────────────────────────────
|
|
|
|
| 36 |
# Local imports — the four modules the baseline-rebuild plan authorizes.
|
| 37 |
# Everything else in src/ is intentionally unused here.
|
| 38 |
from src.data.bam_normalize import normalize as bam_normalize
|
| 39 |
+
from src.engine.turn_logger import TurnLogger
|
| 40 |
from src.engine.whisper_base import WhisperBackbone
|
| 41 |
from src.llm.minimal_client import MinimalClient
|
| 42 |
+
from src.llm.phrasebook import lookup as phrasebook_lookup, top_k as phrasebook_top_k
|
| 43 |
from src.tts.mms_tts import MMSTTSEngine
|
| 44 |
|
| 45 |
logging.basicConfig(
|
|
|
|
| 72 |
|
| 73 |
|
| 74 |
# ── Service singletons (lazy-loaded) ────────────────────────────────────────
|
| 75 |
+
_backbone: Optional[WhisperBackbone] = None
|
| 76 |
+
_llm: Optional[MinimalClient] = None
|
| 77 |
+
_tts: Optional[MMSTTSEngine] = None
|
| 78 |
+
_turn_logger: TurnLogger = TurnLogger()
|
| 79 |
|
| 80 |
|
| 81 |
def _resolve_device() -> str:
|
|
|
|
| 182 |
Returns (transcript, reply_text, reply_audio). Graceful degradation: any
|
| 183 |
stage failure yields a readable string and None audio instead of raising.
|
| 184 |
"""
|
| 185 |
+
import time
|
| 186 |
+
t0 = time.perf_counter()
|
| 187 |
if audio is None:
|
| 188 |
return "", "(no audio received)", None
|
| 189 |
|
|
|
|
| 191 |
if audio_np.size == 0:
|
| 192 |
return "", "(empty audio)", None
|
| 193 |
|
| 194 |
+
# ── 1. Transcribe ─────────────────────────────────────────────────────
|
| 195 |
+
t_stt = time.perf_counter()
|
| 196 |
try:
|
| 197 |
transcript = transcribe(audio_np, sample_rate, input_lang)
|
| 198 |
except Exception as exc: # pragma: no cover — field-safety
|
| 199 |
logger.exception("Transcription failed")
|
| 200 |
+
_turn_logger.log(
|
| 201 |
+
tab="voice", input_lang=input_lang, output_lang=output_lang,
|
| 202 |
+
user_text=None, transcript=None, transcribe_ms=None,
|
| 203 |
+
phrasebook=None, llm_model=None, llm_ms=None,
|
| 204 |
+
reply_text=None, tts_ms=None,
|
| 205 |
+
total_ms=int((time.perf_counter() - t0) * 1000),
|
| 206 |
+
error=f"stt: {exc}",
|
| 207 |
+
)
|
| 208 |
return "", f"(STT error: {exc})", None
|
| 209 |
+
transcribe_ms = int((time.perf_counter() - t_stt) * 1000)
|
| 210 |
|
| 211 |
if not transcript:
|
| 212 |
+
_turn_logger.log(
|
| 213 |
+
tab="voice", input_lang=input_lang, output_lang=output_lang,
|
| 214 |
+
user_text=None, transcript="", transcribe_ms=transcribe_ms,
|
| 215 |
+
phrasebook=None, llm_model=None, llm_ms=None,
|
| 216 |
+
reply_text=None, tts_ms=None,
|
| 217 |
+
total_ms=int((time.perf_counter() - t0) * 1000),
|
| 218 |
+
error="no_speech",
|
| 219 |
+
)
|
| 220 |
return "", "(no speech detected)", None
|
| 221 |
|
| 222 |
+
# ── 2. Phrasebook → LLM (with RAG few-shot on miss) → reply ──────────
|
| 223 |
+
reply_text, hit, llm_ms = _resolve_reply(transcript, output_lang)
|
| 224 |
+
if reply_text is None:
|
| 225 |
+
_turn_logger.log(
|
| 226 |
+
tab="voice", input_lang=input_lang, output_lang=output_lang,
|
| 227 |
+
user_text=transcript, transcript=transcript,
|
| 228 |
+
transcribe_ms=transcribe_ms,
|
| 229 |
+
phrasebook=hit, llm_model=LLM_MODEL_ID, llm_ms=llm_ms,
|
| 230 |
+
reply_text=None, tts_ms=None,
|
| 231 |
+
total_ms=int((time.perf_counter() - t0) * 1000),
|
| 232 |
+
error="llm_failed",
|
| 233 |
)
|
| 234 |
+
return transcript, "(LLM error)", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
|
| 236 |
+
# ── 3. TTS ────────────────────────────────────────────────────────────
|
| 237 |
+
t_tts = time.perf_counter()
|
| 238 |
+
tts_ms: Optional[int] = None
|
| 239 |
+
audio_out: Optional[Tuple[int, np.ndarray]] = None
|
| 240 |
+
tts_error: Optional[str] = None
|
| 241 |
try:
|
| 242 |
wav, sr = get_tts().synthesize(
|
| 243 |
reply_text, language=output_lang, device=_resolve_device()
|
| 244 |
)
|
| 245 |
+
audio_out = (sr, wav)
|
| 246 |
+
tts_ms = int((time.perf_counter() - t_tts) * 1000)
|
| 247 |
except Exception as exc:
|
| 248 |
logger.exception("TTS failed")
|
| 249 |
+
tts_error = f"tts: {exc}"
|
| 250 |
+
|
| 251 |
+
_turn_logger.log(
|
| 252 |
+
tab="voice", input_lang=input_lang, output_lang=output_lang,
|
| 253 |
+
user_text=transcript, transcript=transcript,
|
| 254 |
+
transcribe_ms=transcribe_ms,
|
| 255 |
+
phrasebook=hit,
|
| 256 |
+
llm_model=None if hit else LLM_MODEL_ID,
|
| 257 |
+
llm_ms=llm_ms,
|
| 258 |
+
reply_text=reply_text, tts_ms=tts_ms,
|
| 259 |
+
total_ms=int((time.perf_counter() - t0) * 1000),
|
| 260 |
+
error=tts_error,
|
| 261 |
+
)
|
| 262 |
+
return transcript, reply_text, audio_out
|
| 263 |
|
| 264 |
|
| 265 |
def run_text_pipeline(
|
|
|
|
| 276 |
reads it as-is and replies in `output_lang`. Skips Whisper entirely; this
|
| 277 |
is the fast dev-loop path.
|
| 278 |
"""
|
| 279 |
+
import time
|
| 280 |
+
t0 = time.perf_counter()
|
| 281 |
text = (text or "").strip()
|
| 282 |
if not text:
|
| 283 |
return "(no text entered)", None
|
| 284 |
|
| 285 |
+
reply_text, hit, llm_ms = _resolve_reply(text, output_lang)
|
| 286 |
+
if reply_text is None:
|
| 287 |
+
_turn_logger.log(
|
| 288 |
+
tab="text", input_lang=None, output_lang=output_lang,
|
| 289 |
+
user_text=text, transcript=None, transcribe_ms=None,
|
| 290 |
+
phrasebook=hit, llm_model=LLM_MODEL_ID, llm_ms=llm_ms,
|
| 291 |
+
reply_text=None, tts_ms=None,
|
| 292 |
+
total_ms=int((time.perf_counter() - t0) * 1000),
|
| 293 |
+
error="llm_failed",
|
| 294 |
)
|
| 295 |
+
return "(LLM error)", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
|
| 297 |
+
t_tts = time.perf_counter()
|
| 298 |
+
tts_ms: Optional[int] = None
|
| 299 |
+
audio_out: Optional[Tuple[int, np.ndarray]] = None
|
| 300 |
+
tts_error: Optional[str] = None
|
| 301 |
try:
|
| 302 |
wav, sr = get_tts().synthesize(
|
| 303 |
reply_text, language=output_lang, device=_resolve_device()
|
| 304 |
)
|
| 305 |
+
audio_out = (sr, wav)
|
| 306 |
+
tts_ms = int((time.perf_counter() - t_tts) * 1000)
|
| 307 |
except Exception as exc:
|
| 308 |
logger.exception("TTS failed")
|
| 309 |
+
tts_error = f"tts: {exc}"
|
| 310 |
+
|
| 311 |
+
_turn_logger.log(
|
| 312 |
+
tab="text", input_lang=None, output_lang=output_lang,
|
| 313 |
+
user_text=text, transcript=None, transcribe_ms=None,
|
| 314 |
+
phrasebook=hit,
|
| 315 |
+
llm_model=None if hit else LLM_MODEL_ID,
|
| 316 |
+
llm_ms=llm_ms,
|
| 317 |
+
reply_text=reply_text, tts_ms=tts_ms,
|
| 318 |
+
total_ms=int((time.perf_counter() - t0) * 1000),
|
| 319 |
+
error=tts_error,
|
| 320 |
+
)
|
| 321 |
+
return reply_text, audio_out
|
| 322 |
+
|
| 323 |
|
| 324 |
+
def _resolve_reply(
|
| 325 |
+
user_text: str,
|
| 326 |
+
output_lang: str,
|
| 327 |
+
) -> Tuple[Optional[str], Optional[dict], Optional[int]]:
|
| 328 |
+
"""Shared phrasebook → LLM resolver for both voice and text tabs.
|
| 329 |
+
|
| 330 |
+
Returns (reply_text, phrasebook_hit_or_None, llm_ms_or_None).
|
| 331 |
+
`reply_text` is None only if the LLM itself failed; in every other case
|
| 332 |
+
the caller is given a usable string (possibly an "(empty reply)" sentinel).
|
| 333 |
+
|
| 334 |
+
On phrasebook miss for bam/ful targets, the top-3 nearest gold pairs are
|
| 335 |
+
injected into the LLM system prompt as additional dynamic few-shot
|
| 336 |
+
(RAG-style anchoring). Misses on en/fr targets call the LLM with no
|
| 337 |
+
extras since the curated phrasebooks only cover bam/ful.
|
| 338 |
+
"""
|
| 339 |
+
import time
|
| 340 |
+
hit = phrasebook_lookup(user_text, output_lang)
|
| 341 |
+
if hit:
|
| 342 |
+
logger.info(
|
| 343 |
+
"Phrasebook hit (%s, score=%.2f): %r → %r [cat=%s]",
|
| 344 |
+
hit["match"], hit["score"], user_text, hit["target"], hit["category"],
|
| 345 |
+
)
|
| 346 |
+
reply = hit["target"] or "(empty reply)"
|
| 347 |
+
return reply, hit, None
|
| 348 |
+
|
| 349 |
+
extras = phrasebook_top_k(user_text, output_lang, k=3) or None
|
| 350 |
+
if extras:
|
| 351 |
+
logger.info(
|
| 352 |
+
"Phrasebook miss; RAG-injecting top-%d nearest (top score=%.2f)",
|
| 353 |
+
len(extras), extras[0]["score"],
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
t_llm = time.perf_counter()
|
| 357 |
+
try:
|
| 358 |
+
reply = get_llm().chat(
|
| 359 |
+
user_text, target_lang=output_lang, extra_examples=extras,
|
| 360 |
+
)
|
| 361 |
+
except Exception as exc: # pragma: no cover
|
| 362 |
+
logger.exception("LLM call failed")
|
| 363 |
+
return None, None, int((time.perf_counter() - t_llm) * 1000)
|
| 364 |
+
llm_ms = int((time.perf_counter() - t_llm) * 1000)
|
| 365 |
+
return (reply or "(empty reply)"), None, llm_ms
|
| 366 |
|
| 367 |
|
| 368 |
# ── Gradio UI ────────────────────────────────────────────────────────────────
|
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""TurnLogger — per-turn JSONL telemetry for the minimal baseline.
|
| 2 |
+
|
| 3 |
+
Every voice or text turn writes one self-contained line to
|
| 4 |
+
`data/field_turns.jsonl` (path overridable via FIELD_TURNS_PATH).
|
| 5 |
+
|
| 6 |
+
This is the foundation for:
|
| 7 |
+
- field-test review (read the JSONL after a session)
|
| 8 |
+
- phrasebook hit-rate measurement
|
| 9 |
+
- LLM A/B comparisons
|
| 10 |
+
- eventually, Stage-4 LoRA training-data curation
|
| 11 |
+
(every line already pairs an English/French input with a vetted
|
| 12 |
+
Bambara/Pular reply; we'll filter on phrasebook-hit + user-confirmed
|
| 13 |
+
turns later).
|
| 14 |
+
|
| 15 |
+
Schema (one JSON object per line):
|
| 16 |
+
{
|
| 17 |
+
"ts": "<ISO-8601 UTC>",
|
| 18 |
+
"tab": "voice" | "text",
|
| 19 |
+
"input_lang": "bam" | "ful" | "fr" | "en" | null,
|
| 20 |
+
"output_lang": "bam" | "ful" | "fr" | "en",
|
| 21 |
+
"user_text": "<raw input from text tab, or transcript for voice tab>",
|
| 22 |
+
"transcript": "<whisper output, voice tab only>" | null,
|
| 23 |
+
"transcribe_ms": <int> | null,
|
| 24 |
+
"phrasebook": { match, score, category, source, target } | null,
|
| 25 |
+
"llm_model": "<model id>" | null,
|
| 26 |
+
"llm_ms": <int> | null,
|
| 27 |
+
"reply_text": "<final reply that fed TTS>",
|
| 28 |
+
"tts_ms": <int> | null,
|
| 29 |
+
"total_ms": <int>,
|
| 30 |
+
"error": "<short error string>" | null
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
Notes:
|
| 34 |
+
- File path is gitignored (data/ is excluded by .gitignore).
|
| 35 |
+
- Append mode + line-buffered + lock — safe for the single-process Gradio
|
| 36 |
+
server. Not designed for multi-worker writes.
|
| 37 |
+
"""
|
| 38 |
+
from __future__ import annotations
|
| 39 |
+
|
| 40 |
+
import json
|
| 41 |
+
import logging
|
| 42 |
+
import os
|
| 43 |
+
import threading
|
| 44 |
+
from datetime import datetime, timezone
|
| 45 |
+
from pathlib import Path
|
| 46 |
+
from typing import Any, Optional
|
| 47 |
+
|
| 48 |
+
logger = logging.getLogger(__name__)
|
| 49 |
+
|
| 50 |
+
_DEFAULT_PATH = (
|
| 51 |
+
Path(__file__).resolve().parent.parent.parent / "data" / "field_turns.jsonl"
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _utcnow_iso() -> str:
|
| 56 |
+
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class TurnLogger:
|
| 60 |
+
"""Append-only JSONL logger. Thread-safe for one process."""
|
| 61 |
+
|
| 62 |
+
def __init__(self, path: Optional[str] = None) -> None:
|
| 63 |
+
env_path = os.environ.get("FIELD_TURNS_PATH")
|
| 64 |
+
self.path = Path(path or env_path or _DEFAULT_PATH)
|
| 65 |
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
| 66 |
+
self._lock = threading.Lock()
|
| 67 |
+
logger.info("TurnLogger writing to %s", self.path)
|
| 68 |
+
|
| 69 |
+
def log(self, **fields: Any) -> None:
|
| 70 |
+
"""Write one row. Always sets ts; leaves the rest to the caller.
|
| 71 |
+
|
| 72 |
+
Never raises — telemetry must not break the user-facing pipeline.
|
| 73 |
+
"""
|
| 74 |
+
row = {"ts": _utcnow_iso(), **fields}
|
| 75 |
+
try:
|
| 76 |
+
line = json.dumps(row, ensure_ascii=False)
|
| 77 |
+
with self._lock, self.path.open("a", encoding="utf-8") as fh:
|
| 78 |
+
fh.write(line + "\n")
|
| 79 |
+
except Exception as exc: # pragma: no cover
|
| 80 |
+
logger.warning("TurnLogger.log failed: %s", exc)
|
|
@@ -76,8 +76,17 @@ def _load_anchors(lang: str) -> list[dict]:
|
|
| 76 |
return data.get("pairs", [])
|
| 77 |
|
| 78 |
|
| 79 |
-
def _build_system_prompt(
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
full = LANG_FULL_NAME.get(target_lang, "English")
|
| 82 |
forbidden = FORBIDDEN_DRIFT.get(target_lang, "")
|
| 83 |
anchors = _load_anchors(target_lang)
|
|
@@ -114,6 +123,19 @@ def _build_system_prompt(target_lang: str) -> str:
|
|
| 114 |
if src and tgt:
|
| 115 |
lines.append(f"- {src} → {tgt}")
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
lines += [
|
| 118 |
"",
|
| 119 |
f"Always reply in {full}, even if the user writes to you in English, "
|
|
@@ -146,13 +168,23 @@ class MinimalClient:
|
|
| 146 |
self._client = InferenceClient(token=self.hf_token)
|
| 147 |
return self._client
|
| 148 |
|
| 149 |
-
def chat(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
"""Return a plain-text reply in `target_lang`.
|
| 151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
On any error returns a short parenthetical error string so the caller
|
| 153 |
can still feed something into TTS / display.
|
| 154 |
"""
|
| 155 |
-
system_prompt = _build_system_prompt(target_lang)
|
| 156 |
try:
|
| 157 |
client = self._get_client()
|
| 158 |
completion = client.chat_completion(
|
|
|
|
| 76 |
return data.get("pairs", [])
|
| 77 |
|
| 78 |
|
| 79 |
+
def _build_system_prompt(
|
| 80 |
+
target_lang: str,
|
| 81 |
+
extra_examples: Optional[list[dict]] = None,
|
| 82 |
+
) -> str:
|
| 83 |
+
"""Assemble the per-call system prompt for a target output language.
|
| 84 |
+
|
| 85 |
+
`extra_examples`, when supplied, are appended after the curated 30-pair
|
| 86 |
+
gold list as additional dynamic few-shot anchoring — used by app_minimal
|
| 87 |
+
to inject the top-K nearest phrasebook entries when the strict short-
|
| 88 |
+
circuit misses.
|
| 89 |
+
"""
|
| 90 |
full = LANG_FULL_NAME.get(target_lang, "English")
|
| 91 |
forbidden = FORBIDDEN_DRIFT.get(target_lang, "")
|
| 92 |
anchors = _load_anchors(target_lang)
|
|
|
|
| 123 |
if src and tgt:
|
| 124 |
lines.append(f"- {src} → {tgt}")
|
| 125 |
|
| 126 |
+
if extra_examples:
|
| 127 |
+
lines += [
|
| 128 |
+
"",
|
| 129 |
+
"Additional reference phrases relevant to the current user input "
|
| 130 |
+
f"(curated gold {full} translations — use the same orthography and "
|
| 131 |
+
"style):",
|
| 132 |
+
]
|
| 133 |
+
for item in extra_examples:
|
| 134 |
+
src = (item.get("source") or "").strip()
|
| 135 |
+
tgt = (item.get("target") or "").strip()
|
| 136 |
+
if src and tgt:
|
| 137 |
+
lines.append(f"- {src} → {tgt}")
|
| 138 |
+
|
| 139 |
lines += [
|
| 140 |
"",
|
| 141 |
f"Always reply in {full}, even if the user writes to you in English, "
|
|
|
|
| 168 |
self._client = InferenceClient(token=self.hf_token)
|
| 169 |
return self._client
|
| 170 |
|
| 171 |
+
def chat(
|
| 172 |
+
self,
|
| 173 |
+
user_text: str,
|
| 174 |
+
target_lang: str = "bam",
|
| 175 |
+
extra_examples: Optional[list[dict]] = None,
|
| 176 |
+
) -> str:
|
| 177 |
"""Return a plain-text reply in `target_lang`.
|
| 178 |
|
| 179 |
+
`extra_examples` (optional) — list of {source, target} dicts that get
|
| 180 |
+
appended to the system prompt as additional dynamic few-shot. Used by
|
| 181 |
+
app_minimal to RAG-inject the top-K nearest phrasebook entries when
|
| 182 |
+
the strict phrasebook short-circuit misses.
|
| 183 |
+
|
| 184 |
On any error returns a short parenthetical error string so the caller
|
| 185 |
can still feed something into TTS / display.
|
| 186 |
"""
|
| 187 |
+
system_prompt = _build_system_prompt(target_lang, extra_examples)
|
| 188 |
try:
|
| 189 |
client = self._get_client()
|
| 190 |
completion = client.chat_completion(
|
|
@@ -121,3 +121,35 @@ def lookup(
|
|
| 121 |
"match": "fuzzy",
|
| 122 |
}
|
| 123 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
"match": "fuzzy",
|
| 122 |
}
|
| 123 |
return None
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def top_k(user_text: str, target_lang: str, k: int = 3) -> list[dict]:
|
| 127 |
+
"""Return the k closest phrasebook entries to `user_text` regardless of threshold.
|
| 128 |
+
|
| 129 |
+
Used as RAG-style few-shot context when the strict `lookup()` misses but we
|
| 130 |
+
still want to anchor the LLM with locally relevant gold pairs. Returns
|
| 131 |
+
results sorted by descending score; never raises.
|
| 132 |
+
"""
|
| 133 |
+
pairs = _load_phrasebook(target_lang)
|
| 134 |
+
if not pairs:
|
| 135 |
+
return []
|
| 136 |
+
q = _normalize(user_text)
|
| 137 |
+
if not q:
|
| 138 |
+
return []
|
| 139 |
+
scored: list[tuple[float, dict]] = []
|
| 140 |
+
for p in pairs:
|
| 141 |
+
src = p.get("_norm", "")
|
| 142 |
+
if not src:
|
| 143 |
+
continue
|
| 144 |
+
score = 1.0 if src == q else SequenceMatcher(None, q, src).ratio()
|
| 145 |
+
scored.append((score, p))
|
| 146 |
+
scored.sort(key=lambda x: x[0], reverse=True)
|
| 147 |
+
out: list[dict] = []
|
| 148 |
+
for score, p in scored[:k]:
|
| 149 |
+
out.append({
|
| 150 |
+
"source": p.get("source"),
|
| 151 |
+
"target": p.get("target"),
|
| 152 |
+
"category": p.get("category"),
|
| 153 |
+
"score": round(score, 3),
|
| 154 |
+
})
|
| 155 |
+
return out
|