Spaces:

Islamckennon
/

mirage

Paused

App Files Files Community

MacBook pro commited on Sep 17

Commit

fc4f80f

1 Parent(s): cbbb792

feat(voice): add voice processor skeleton and integrate timing into audio metrics

Browse files

Files changed (2) hide show

app.py +10 -2
voice_processor.py +90 -0

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import traceback
 import time
 from metrics import metrics as _metrics_singleton, Metrics
 from config import config
 app = FastAPI(title="Mirage Phase 1+2 Scaffold")
@@ -49,11 +50,18 @@ async def _echo_websocket(websocket: WebSocket, kind: str):
                 interval = None
                 if last_ts is not None:
                     interval = now - last_ts
-                metrics.record_audio_chunk(size_bytes=size, loop_interval_ms=interval)
                 last_ts = now
             elif kind == "video":
                 metrics.record_video_frame(size_bytes=size)
-            # Echo straight back
             await websocket.send_bytes(data)
         except WebSocketDisconnect:
             # Silent disconnect

 import time
 from metrics import metrics as _metrics_singleton, Metrics
 from config import config
+from voice_processor import voice_processor
 app = FastAPI(title="Mirage Phase 1+2 Scaffold")
                 interval = None
                 if last_ts is not None:
                     interval = now - last_ts
+                infer_ms = None
+                if config.voice_enable:
+                    # Run through voice processor (pass-through currently)
+                    processed_view, infer_ms = voice_processor.process_pcm_int16(data, sample_rate=16000)
+                    # Use processed bytes for echo (still original length)
+                    data = processed_view.tobytes()
+                metrics.record_audio_chunk(size_bytes=size, loop_interval_ms=interval, infer_time_ms=infer_ms)
                 last_ts = now
             elif kind == "video":
                 metrics.record_video_frame(size_bytes=size)
+            # Echo straight back (audio maybe processed)
             await websocket.send_bytes(data)
         except WebSocketDisconnect:
             # Silent disconnect

voice_processor.py ADDED Viewed

	@@ -0,0 +1,90 @@

+"""Voice Processor Skeleton.
+Phase: B3
+Provides a minimal singleton VoiceProcessor with a lazy load() and a
+process_pcm_int16 method. For now it only measures timing and returns
+pass-through audio.
+Future expansion hooks:
+- VAD / segmentation
+- Feature extraction (MFCCs, log-mel)
+- Model inference (ASR, voice conversion, TTS, etc.)
+- Streaming state management
+The design keeps the API intentionally small so upstream code can remain
+stable while internals evolve.
+"""
+from __future__ import annotations
+import threading
+import time
+from dataclasses import dataclass
+from typing import Optional
+@dataclass
+class VoiceResult:
+    """Container for voice processing output.
+    For now, just echoes the PCM input.
+    """
+    pcm: memoryview  # zero-copy view of processed PCM int16 data
+    sample_rate: int
+    # Future: add tokens, text, features, etc.
+class VoiceProcessor:
+    _instance: Optional["VoiceProcessor"] = None
+    _instance_lock = threading.Lock()
+    def __init__(self) -> None:
+        self._loaded = False
+        self._load_lock = threading.Lock()
+        # Placeholder for model / pipeline objects
+        self._models_ready = False
+    # ------------- Singleton Access -------------
+    @classmethod
+    def get(cls) -> "VoiceProcessor":
+        if cls._instance is None:
+            with cls._instance_lock:
+                if cls._instance is None:  # double-checked
+                    cls._instance = cls()
+        return cls._instance
+    # ------------- Lifecycle -------------
+    def load(self) -> None:
+        """Lazy load models / resources.
+        Keep it extremely fast right now. Simulate a trivial setup only
+        on first call.
+        """
+        if self._loaded:
+            return
+        with self._load_lock:
+            if self._loaded:
+                return
+            # Simulate minimal setup work (no sleep to keep fast)
+            self._models_ready = True
+            self._loaded = True
+    # ------------- Processing -------------
+    def process_pcm_int16(self, pcm: bytes | bytearray | memoryview, sample_rate: int) -> tuple[memoryview, float]:
+        """Process an int16 PCM chunk.
+        Returns a tuple of (processed_pcm_memoryview, elapsed_ms).
+        Currently pass-through.
+        """
+        if not self._loaded:
+            self.load()
+        start = time.time() * 1000.0
+        # Pass-through: we could copy but we prefer zero-copy memoryview
+        mv = memoryview(pcm)
+        # Placeholder for future signal chain
+        end = time.time() * 1000.0
+        return mv, end - start
+# Export singleton accessor
+voice_processor = VoiceProcessor.get()