Spaces:

SalexAI
/

api

Sleeping

App Files Files Community

SalexAI commited on Feb 12

Commit

dccec3c

verified ·

1 Parent(s): 0a9dfed

Update app/main.py

Browse files

Files changed (1) hide show

app/main.py +42 -79

app/main.py CHANGED Viewed

@@ -1,26 +1,21 @@
 import asyncio
 import base64
 import json
-import os
 import uuid
-from typing import AsyncGenerator, Literal, Optional
 import numpy as np
 from fastapi import FastAPI, WebSocket, WebSocketDisconnect
-from fastapi.responses import JSONResponse, StreamingResponse
-from dotenv import load_dotenv
-from fastrtc import AdditionalOutputs, AsyncStreamHandler, Stream, wait_for_item
-# ---- Gemini (optional for later; right now we keep your echo handler working) ----
-# You can plug Gemini back in once bridge works.
-load_dotenv()
 app = FastAPI()
 # ---------------------------
-# Minimal VAD echo handler (server is already booting with this)
 # ---------------------------
 class EchoHandler(AsyncStreamHandler):
     def __init__(self, expected_layout: Literal["mono"] = "mono", output_sample_rate: int = 24000):
@@ -38,22 +33,24 @@ class EchoHandler(AsyncStreamHandler):
         if audio.dtype != np.int16:
             audio = audio.astype(np.int16)
-        # Echo back immediately as "audio"
         self.out_q.put_nowait((sr, audio.reshape(1, -1)))
     async def emit(self):
         return await wait_for_item(self.out_q)
 stream = Stream(
     handler=EchoHandler(),
     modality="audio",
     mode="send-receive",
-    additional_inputs=["voice_name"],  # placeholder for later
 )
 stream.mount(app)
 # ---------------------------
 # Helpers
 # ---------------------------
@@ -67,78 +64,56 @@ def int16_to_b64(audio: np.ndarray) -> str:
     return base64.b64encode(audio.tobytes()).decode("utf-8")
-# ---------------------------
-# Basic endpoints
-# ---------------------------
 @app.get("/")
 async def root():
-    return {"ok": True, "message": "FastRTC mounted. Use the mounted endpoints for WebRTC/WebSocket."}
 @app.get("/health")
 async def health():
     return {"ok": True}
-@app.get("/webrtc/new")
-async def webrtc_new():
-    """
-    Mint a webrtc_id to use with /outputs or /ws bridge.
-    """
-    webrtc_id = str(uuid.uuid4())
-    # Initialize internal connection state so output_stream has something to bind to later
-    # (FastRTC will create it lazily when first used, but we create a stable id for the client.)
-    return {"webrtc_id": webrtc_id}
-@app.get("/outputs")
-async def outputs(webrtc_id: str):
-    async def event_stream():
-        async for out in stream.output_stream(webrtc_id):
-            payload = json.dumps(out.args[0] if out.args else None)
-            yield f"event: output\ndata: {payload}\n\n"
-    return StreamingResponse(event_stream(), media_type="text/event-stream")
 # ---------------------------
-# Scratch-friendly WebSocket bridge
 # ---------------------------
 @app.websocket("/ws")
 async def ws_bridge(ws: WebSocket):
     await ws.accept()
-    webrtc_id: Optional[str] = None
-    out_task: Optional[asyncio.Task] = None
-    async def send_outputs_loop():
-        # Stream AdditionalOutputs + audio coming out of FastRTC
         try:
-            async for item in stream.output_stream(webrtc_id):
-                # item is AdditionalOutputs; forward as JSON
-                msg = item.args[0] if item.args else None
-                await ws.send_text(json.dumps({"type": "output", "data": msg}))
-        except Exception:
-            pass
-    async def send_audio_loop():
-        # Also poll the "audio" output if your handler emits raw audio tuples.
-        # FastRTC output_stream yields AdditionalOutputs only.
-        # So for audio we use stream.fetch_output(...) style by calling internal generator:
-        try:
-            async for out in stream.stream_output(webrtc_id):
-                # out can be (sr, np.ndarray) or AdditionalOutputs
                 if isinstance(out, AdditionalOutputs):
                     continue
                 sr, audio = out
                 audio = np.asarray(audio)
                 if audio.ndim == 2:
                     audio = audio.squeeze()
                 if audio.dtype != np.int16:
                     audio = audio.astype(np.int16)
                 await ws.send_text(json.dumps({
                     "type": "audio_delta",
                     "rate": int(sr),
-                    "data": int16_to_b64(audio)
                 }))
         except Exception:
-            pass
     try:
         while True:
@@ -147,39 +122,27 @@ async def ws_bridge(ws: WebSocket):
             t = msg.get("type")
             if t == "connect":
-                # create or use provided webrtc_id
-                webrtc_id = msg.get("webrtc_id") or str(uuid.uuid4())
-                # optionally set voice / other inputs (stored for handler)
-                voice = msg.get("voice") or "Puck"
-                try:
-                    await stream.set_input(webrtc_id, voice)
-                except Exception:
-                    # if set_input isn't supported in your exact FastRTC build, ignore
-                    pass
-                # start output loops once
-                if out_task is None:
-                    out_task = asyncio.gather(send_audio_loop(), send_outputs_loop())
-                await ws.send_text(json.dumps({"type": "ready", "webrtc_id": webrtc_id}))
                 continue
             if t == "audio":
-                if not webrtc_id:
-                    await ws.send_text(json.dumps({"type": "error", "message": "Not connected. Send {type:'connect'} first."}))
                     continue
                 b64 = msg.get("data")
                 rate = int(msg.get("rate") or 16000)
                 if not isinstance(b64, str) or not b64:
                     continue
                 audio = b64_to_int16(b64)
-                # FastRTC expects (sample_rate, np.ndarray)
-                await stream.send_input(webrtc_id, (rate, audio.reshape(1, -1)))
                 continue
             if t == "close":
@@ -192,7 +155,7 @@ async def ws_bridge(ws: WebSocket):
         pass
     finally:
         try:
-            if out_task:
-                out_task.cancel()
         except Exception:
             pass

 import asyncio
 import base64
 import json
 import uuid
+from typing import Optional, Literal
 import numpy as np
 from fastapi import FastAPI, WebSocket, WebSocketDisconnect
+from fastapi.responses import JSONResponse
+from fastrtc import Stream, AsyncStreamHandler, wait_for_item, AdditionalOutputs
 app = FastAPI()
 # ---------------------------
+# A tiny headless audio handler (echo) to validate the pipe.
+# Swap this out later for Gemini / other realtime models.
 # ---------------------------
 class EchoHandler(AsyncStreamHandler):
     def __init__(self, expected_layout: Literal["mono"] = "mono", output_sample_rate: int = 24000):
         if audio.dtype != np.int16:
             audio = audio.astype(np.int16)
+        # Echo straight back
         self.out_q.put_nowait((sr, audio.reshape(1, -1)))
     async def emit(self):
         return await wait_for_item(self.out_q)
+# IMPORTANT: no additional_inputs here (strings crash in 0.0.34)
 stream = Stream(
     handler=EchoHandler(),
     modality="audio",
     mode="send-receive",
 )
+# This mounts FastRTC’s internal routes; we’re also adding /ws below.
 stream.mount(app)
 # ---------------------------
 # Helpers
 # ---------------------------
     return base64.b64encode(audio.tobytes()).decode("utf-8")
 @app.get("/")
 async def root():
+    return {"ok": True, "message": "FastRTC mounted. Headless mode. Use /ws for Scratch."}
 @app.get("/health")
 async def health():
     return {"ok": True}
 # ---------------------------
+# Scratch-friendly WS bridge (no WebRTC needed client-side)
 # ---------------------------
 @app.websocket("/ws")
 async def ws_bridge(ws: WebSocket):
     await ws.accept()
+    session_id: Optional[str] = None
+    pump_task: Optional[asyncio.Task] = None
+    async def pump_outputs():
+        """
+        Pull audio outputs from FastRTC and forward to client.
+        NOTE: FastRTC 0.0.34 uses fetch_output() polling style.
+        """
         try:
+            while True:
+                out = await stream.fetch_output(session_id)
+                if out is None:
+                    await asyncio.sleep(0.01)
+                    continue
                 if isinstance(out, AdditionalOutputs):
+                    payload = out.args[0] if out.args else None
+                    await ws.send_text(json.dumps({"type": "output", "data": payload}))
                     continue
                 sr, audio = out
                 audio = np.asarray(audio)
                 if audio.ndim == 2:
                     audio = audio.squeeze()
                 if audio.dtype != np.int16:
                     audio = audio.astype(np.int16)
                 await ws.send_text(json.dumps({
                     "type": "audio_delta",
                     "rate": int(sr),
+                    "data": int16_to_b64(audio),
                 }))
         except Exception:
+            return
     try:
         while True:
             t = msg.get("type")
             if t == "connect":
+                session_id = msg.get("session_id") or str(uuid.uuid4())
+                # start pump once
+                if pump_task is None:
+                    pump_task = asyncio.create_task(pump_outputs())
+                await ws.send_text(json.dumps({"type": "ready", "session_id": session_id}))
                 continue
             if t == "audio":
+                if not session_id:
+                    await ws.send_text(json.dumps({"type": "error", "message": "Send {type:'connect'} first."}))
                     continue
                 b64 = msg.get("data")
                 rate = int(msg.get("rate") or 16000)
                 if not isinstance(b64, str) or not b64:
                     continue
                 audio = b64_to_int16(b64)
+                await stream.send_input(session_id, (rate, audio.reshape(1, -1)))
                 continue
             if t == "close":
         pass
     finally:
         try:
+            if pump_task:
+                pump_task.cancel()
         except Exception:
             pass