Spaces:
Running
Running
Add french ASR model
Browse files- api/endpoints.py +23 -5
- app.py +29 -11
- config.py +65 -24
- core/asr_service.py +21 -2
- frontend/index.html +4 -0
- frontend/js/main.js +63 -2
api/endpoints.py
CHANGED
|
@@ -10,7 +10,9 @@ from fastapi.responses import FileResponse, JSONResponse
|
|
| 10 |
from core.connection_manager import ConnectionManager
|
| 11 |
from core.asr_service import ASRService
|
| 12 |
from core.audio_streamer import AudioStreamer
|
| 13 |
-
from config import RADIO_URLS
|
|
|
|
|
|
|
| 14 |
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
router = APIRouter()
|
|
@@ -20,6 +22,7 @@ manager = ConnectionManager()
|
|
| 20 |
PCM_FOR_ASR_QUEUE = asyncio.Queue(maxsize=100)
|
| 21 |
BACKGROUND_TASKS: dict[str, asyncio.Task | None] = {"audio": None, "asr": None}
|
| 22 |
CURRENT_RADIO_URL = None
|
|
|
|
| 23 |
|
| 24 |
@router.get("/")
|
| 25 |
async def get_root():
|
|
@@ -31,10 +34,11 @@ async def get_stations():
|
|
| 31 |
"""Returns the list of available radio stations."""
|
| 32 |
return JSONResponse(content=RADIO_URLS)
|
| 33 |
|
|
|
|
| 34 |
@router.websocket("/ws")
|
| 35 |
async def websocket_endpoint(websocket: WebSocket, station: str = Query(None)):
|
| 36 |
"""Handles the WebSocket connection for a single client."""
|
| 37 |
-
global CURRENT_RADIO_URL, BACKGROUND_TASKS
|
| 38 |
|
| 39 |
# Determine the radio URL to use
|
| 40 |
radio_url = RADIO_URLS.get(station) if station else None
|
|
@@ -46,8 +50,16 @@ async def websocket_endpoint(websocket: WebSocket, station: str = Query(None)):
|
|
| 46 |
await websocket.close(code=1011, reason="No radio stations configured")
|
| 47 |
return
|
| 48 |
|
| 49 |
-
#
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
# Stop existing tasks if running
|
| 52 |
if BACKGROUND_TASKS["audio"]:
|
| 53 |
BACKGROUND_TASKS["audio"].cancel()
|
|
@@ -59,9 +71,15 @@ async def websocket_endpoint(websocket: WebSocket, station: str = Query(None)):
|
|
| 59 |
|
| 60 |
await manager.connect(websocket)
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
# Start background tasks if this is the first client or if they were stopped
|
| 63 |
if not BACKGROUND_TASKS["audio"] and not BACKGROUND_TASKS["asr"]:
|
| 64 |
-
logger.info(f"Starting background tasks for station: {station or 'default'}")
|
| 65 |
audio_streamer = AudioStreamer(PCM_FOR_ASR_QUEUE, manager, radio_url)
|
| 66 |
asr_service = ASRService(PCM_FOR_ASR_QUEUE, manager)
|
| 67 |
BACKGROUND_TASKS["audio"] = asyncio.create_task(audio_streamer.run_fetching_loop())
|
|
|
|
| 10 |
from core.connection_manager import ConnectionManager
|
| 11 |
from core.asr_service import ASRService
|
| 12 |
from core.audio_streamer import AudioStreamer
|
| 13 |
+
from config import RADIO_URLS, STATION_LANGUAGES, CURRENT_MODEL
|
| 14 |
+
# Add to the imports
|
| 15 |
+
from config import STATION_LANGUAGES
|
| 16 |
|
| 17 |
logger = logging.getLogger(__name__)
|
| 18 |
router = APIRouter()
|
|
|
|
| 22 |
PCM_FOR_ASR_QUEUE = asyncio.Queue(maxsize=100)
|
| 23 |
BACKGROUND_TASKS: dict[str, asyncio.Task | None] = {"audio": None, "asr": None}
|
| 24 |
CURRENT_RADIO_URL = None
|
| 25 |
+
CURRENT_LANGUAGE = "en"
|
| 26 |
|
| 27 |
@router.get("/")
|
| 28 |
async def get_root():
|
|
|
|
| 34 |
"""Returns the list of available radio stations."""
|
| 35 |
return JSONResponse(content=RADIO_URLS)
|
| 36 |
|
| 37 |
+
# Update the websocket_endpoint function to send language info
|
| 38 |
@router.websocket("/ws")
|
| 39 |
async def websocket_endpoint(websocket: WebSocket, station: str = Query(None)):
|
| 40 |
"""Handles the WebSocket connection for a single client."""
|
| 41 |
+
global CURRENT_RADIO_URL, BACKGROUND_TASKS, CURRENT_LANGUAGE
|
| 42 |
|
| 43 |
# Determine the radio URL to use
|
| 44 |
radio_url = RADIO_URLS.get(station) if station else None
|
|
|
|
| 50 |
await websocket.close(code=1011, reason="No radio stations configured")
|
| 51 |
return
|
| 52 |
|
| 53 |
+
# Determine the language for this station
|
| 54 |
+
station_language = STATION_LANGUAGES.get(station, "en")
|
| 55 |
+
|
| 56 |
+
# Check if we need to switch stations or languages
|
| 57 |
+
if CURRENT_RADIO_URL != radio_url or CURRENT_LANGUAGE != station_language:
|
| 58 |
+
# Update the current language in config
|
| 59 |
+
import config
|
| 60 |
+
config.CURRENT_MODEL = station_language
|
| 61 |
+
CURRENT_LANGUAGE = station_language
|
| 62 |
+
|
| 63 |
# Stop existing tasks if running
|
| 64 |
if BACKGROUND_TASKS["audio"]:
|
| 65 |
BACKGROUND_TASKS["audio"].cancel()
|
|
|
|
| 71 |
|
| 72 |
await manager.connect(websocket)
|
| 73 |
|
| 74 |
+
# Send initial language info to the client
|
| 75 |
+
await websocket.send_json({
|
| 76 |
+
"type": "language",
|
| 77 |
+
"payload": {"language": station_language, "station": station}
|
| 78 |
+
})
|
| 79 |
+
|
| 80 |
# Start background tasks if this is the first client or if they were stopped
|
| 81 |
if not BACKGROUND_TASKS["audio"] and not BACKGROUND_TASKS["asr"]:
|
| 82 |
+
logger.info(f"Starting background tasks for station: {station or 'default'} (language: {station_language})")
|
| 83 |
audio_streamer = AudioStreamer(PCM_FOR_ASR_QUEUE, manager, radio_url)
|
| 84 |
asr_service = ASRService(PCM_FOR_ASR_QUEUE, manager)
|
| 85 |
BACKGROUND_TASKS["audio"] = asyncio.create_task(audio_streamer.run_fetching_loop())
|
app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
"""
|
| 2 |
Main application file to initialize and run the FastAPI server.
|
| 3 |
"""
|
|
@@ -8,7 +9,10 @@ from fastapi.staticfiles import StaticFiles
|
|
| 8 |
from huggingface_hub import snapshot_download
|
| 9 |
|
| 10 |
from api import endpoints
|
| 11 |
-
from config import
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
# Setup basic logging
|
| 14 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
@@ -18,19 +22,34 @@ app = FastAPI(title="Live Radio Karaoke")
|
|
| 18 |
|
| 19 |
@app.on_event("startup")
|
| 20 |
async def download_model_if_needed():
|
| 21 |
-
"""Check for and download the ASR
|
| 22 |
-
logger.info("Checking for ASR
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
| 26 |
try:
|
| 27 |
-
snapshot_download(repo_id=
|
| 28 |
-
logger.info("
|
| 29 |
except Exception as e:
|
| 30 |
-
logger.error(f"Failed to download model: {e}")
|
| 31 |
raise
|
| 32 |
else:
|
| 33 |
-
logger.info("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
# Include the API router
|
| 36 |
app.include_router(endpoints.router)
|
|
@@ -38,7 +57,6 @@ app.include_router(endpoints.router)
|
|
| 38 |
# Mount the frontend directory to serve static files (HTML, CSS, JS)
|
| 39 |
app.mount("/frontend", StaticFiles(directory="frontend"), name="frontend")
|
| 40 |
|
| 41 |
-
|
| 42 |
if __name__ == "__main__":
|
| 43 |
import uvicorn
|
| 44 |
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
"""
|
| 3 |
Main application file to initialize and run the FastAPI server.
|
| 4 |
"""
|
|
|
|
| 9 |
from huggingface_hub import snapshot_download
|
| 10 |
|
| 11 |
from api import endpoints
|
| 12 |
+
from config import (
|
| 13 |
+
MODEL_DIR_EN, REPO_ID_EN,
|
| 14 |
+
MODEL_DIR_FR, REPO_ID_FR
|
| 15 |
+
)
|
| 16 |
|
| 17 |
# Setup basic logging
|
| 18 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
|
|
| 22 |
|
| 23 |
@app.on_event("startup")
|
| 24 |
async def download_model_if_needed():
|
| 25 |
+
"""Check for and download the ASR models on startup."""
|
| 26 |
+
logger.info("Checking for ASR models...")
|
| 27 |
+
|
| 28 |
+
# Check and download English model
|
| 29 |
+
tokens_path_en = os.path.join(MODEL_DIR_EN, "tokens.txt")
|
| 30 |
+
if not os.path.exists(tokens_path_en):
|
| 31 |
+
logger.warning(f"English model not found in {MODEL_DIR_EN}. Downloading from Hugging Face Hub...")
|
| 32 |
try:
|
| 33 |
+
snapshot_download(repo_id=REPO_ID_EN, local_dir=MODEL_DIR_EN, local_dir_use_symlinks=False)
|
| 34 |
+
logger.info("English model download complete.")
|
| 35 |
except Exception as e:
|
| 36 |
+
logger.error(f"Failed to download English model: {e}")
|
| 37 |
raise
|
| 38 |
else:
|
| 39 |
+
logger.info("English model found locally.")
|
| 40 |
+
|
| 41 |
+
# Check and download French model
|
| 42 |
+
tokens_path_fr = os.path.join(MODEL_DIR_FR, "tokens.txt")
|
| 43 |
+
if not os.path.exists(tokens_path_fr):
|
| 44 |
+
logger.warning(f"French model not found in {MODEL_DIR_FR}. Downloading from Hugging Face Hub...")
|
| 45 |
+
try:
|
| 46 |
+
snapshot_download(repo_id=REPO_ID_FR, local_dir=MODEL_DIR_FR, local_dir_use_symlinks=False)
|
| 47 |
+
logger.info("French model download complete.")
|
| 48 |
+
except Exception as e:
|
| 49 |
+
logger.error(f"Failed to download French model: {e}")
|
| 50 |
+
raise
|
| 51 |
+
else:
|
| 52 |
+
logger.info("French model found locally.")
|
| 53 |
|
| 54 |
# Include the API router
|
| 55 |
app.include_router(endpoints.router)
|
|
|
|
| 57 |
# Mount the frontend directory to serve static files (HTML, CSS, JS)
|
| 58 |
app.mount("/frontend", StaticFiles(directory="frontend"), name="frontend")
|
| 59 |
|
|
|
|
| 60 |
if __name__ == "__main__":
|
| 61 |
import uvicorn
|
| 62 |
uvicorn.run(app, host="0.0.0.0", port=8000)
|
config.py
CHANGED
|
@@ -5,76 +5,117 @@ Centralized configuration for the Live Radio Karaoke application.
|
|
| 5 |
import os
|
| 6 |
|
| 7 |
# ASR Model Configuration
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# Radio Station Configuration
|
| 12 |
RADIO_URLS = {
|
| 13 |
## English
|
| 14 |
-
|
| 15 |
-
# Existing English stations
|
| 16 |
"KEXP (Seattle, 64 kbps)": "https://kexp.streamguys1.com/kexp64.aac",
|
| 17 |
"KEXP (Seattle, 160 kbps)": "https://kexp.streamguys1.com/kexp160.aac",
|
| 18 |
"NPR": "https://npr-ice.streamguys1.com/live.mp3",
|
| 19 |
"WYPR 88.1 FM (Baltimore)": "https://wtmd-ice.streamguys1.com/wypr-1-mp3",
|
| 20 |
"WAMU 88.5 FM (Washington DC)": "https://wamu.cdnstream1.com/wamu.mp3",
|
| 21 |
"BBC World Service": "http://stream.live.vc.bbcmedia.co.uk/bbc_world_service",
|
| 22 |
-
|
| 23 |
-
# BBC UK talk & news
|
| 24 |
"BBC Radio 4 (UK)": "http://stream.live.vc.bbcmedia.co.uk/bbc_radio_fourfm",
|
| 25 |
"BBC Radio 5 Live (UK)": "http://stream.live.vc.bbcmedia.co.uk/bbc_radio_five_live_online_nonuk",
|
| 26 |
"BBC Radio 2 (UK)": "http://stream.live.vc.bbcmedia.co.uk/bbc_radio_two",
|
| 27 |
-
|
| 28 |
-
# US public radio / talk
|
| 29 |
"KQED NPR (San Francisco)": "https://streams.kqed.org/kqedradio",
|
| 30 |
"WNYC 93.9 FM (New York)": "http://stream.wnyc.org/wnycfm",
|
| 31 |
"WBUR 90.9 FM (Boston)": "http://icecast.wbur.org/wbur",
|
| 32 |
"KPCC 89.3 FM (Los Angeles)": "http://kpcclive.streamguys1.com/kpcc64.aac",
|
| 33 |
"WHYY 90.9 FM (Philadelphia)": "http://whyy.streamguys1.com/whyy-mp3",
|
| 34 |
-
|
| 35 |
-
# International English news
|
| 36 |
"ABC News Radio (Australia)": "http://live-radio01.mediahubaustralia.com/PBW/mp3/",
|
| 37 |
"CBC Radio One (Toronto)": "http://cbc_r1_tor.akacast.akamaistream.net/7/15/451661/v1/rc.akacast.akamaistream.net/cbc_r1_tor",
|
| 38 |
"Voice of America (VOA News Now)": "https://voa-18.akacast.akamaistream.net/7/983/437752/v1/ibb.akacast.akamaistream.net/voa-18",
|
| 39 |
-
"Al Jazeera English (Audio)": "https://live-hls-web-aje.getaj.net/AJE/01.m3u8",
|
| 40 |
-
|
| 41 |
-
# Extra talk / variety
|
| 42 |
"PRI The World": "http://stream.pri.org:8000/pri.mp3",
|
| 43 |
-
"Radio Paradise (USA, Mix)": "http://stream.radioparadise.com/mp3-128",
|
| 44 |
"KCRW 89.9 FM (Santa Monica)": "http://kcrw.streamguys1.com/kcrw_192",
|
| 45 |
|
| 46 |
## French
|
| 47 |
-
|
| 48 |
-
"France
|
|
|
|
|
|
|
|
|
|
| 49 |
}
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
-
# Default station
|
| 54 |
DEFAULT_RADIO_URL = RADIO_URLS["NPR"]
|
| 55 |
|
| 56 |
# Audio Processing Configuration
|
| 57 |
-
# 6400 bytes = 3200 samples (s16le) = 0.2 seconds of audio
|
| 58 |
CHUNK_SIZE = 6400
|
| 59 |
SAMPLE_RATE = 16000
|
| 60 |
BYTES_PER_SAMPLE = 2 # s16le format is 16-bit -> 2 bytes
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
def get_asr_config() -> dict:
|
| 63 |
"""
|
| 64 |
Returns the configuration dictionary for the sherpa-onnx ASR model.
|
| 65 |
Checks for the existence of model files.
|
| 66 |
"""
|
| 67 |
-
|
|
|
|
|
|
|
| 68 |
raise FileNotFoundError(
|
| 69 |
-
f"ASR model not found in {
|
| 70 |
"Please run the download script or ensure the path is correct."
|
| 71 |
)
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
return {
|
| 74 |
-
"tokens": os.path.join(
|
| 75 |
-
"encoder":
|
| 76 |
-
"decoder":
|
| 77 |
-
"joiner":
|
| 78 |
"enable_endpoint_detection": True,
|
| 79 |
"num_threads": os.cpu_count() or 1,
|
| 80 |
"rule3_min_utterance_length": 300,
|
|
|
|
| 5 |
import os
|
| 6 |
|
| 7 |
# ASR Model Configuration
|
| 8 |
+
MODEL_DIR_EN = "./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17"
|
| 9 |
+
REPO_ID_EN = "csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17"
|
| 10 |
+
|
| 11 |
+
MODEL_DIR_FR = "./sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06"
|
| 12 |
+
REPO_ID_FR = "csukuangfj/sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06"
|
| 13 |
+
|
| 14 |
+
# Current model in use
|
| 15 |
+
CURRENT_MODEL = "en" # Default to English
|
| 16 |
+
MODEL_DIRS = {
|
| 17 |
+
"en": MODEL_DIR_EN,
|
| 18 |
+
"fr": MODEL_DIR_FR
|
| 19 |
+
}
|
| 20 |
+
REPO_IDS = {
|
| 21 |
+
"en": REPO_ID_EN,
|
| 22 |
+
"fr": REPO_ID_FR
|
| 23 |
+
}
|
| 24 |
|
| 25 |
# Radio Station Configuration
|
| 26 |
RADIO_URLS = {
|
| 27 |
## English
|
|
|
|
|
|
|
| 28 |
"KEXP (Seattle, 64 kbps)": "https://kexp.streamguys1.com/kexp64.aac",
|
| 29 |
"KEXP (Seattle, 160 kbps)": "https://kexp.streamguys1.com/kexp160.aac",
|
| 30 |
"NPR": "https://npr-ice.streamguys1.com/live.mp3",
|
| 31 |
"WYPR 88.1 FM (Baltimore)": "https://wtmd-ice.streamguys1.com/wypr-1-mp3",
|
| 32 |
"WAMU 88.5 FM (Washington DC)": "https://wamu.cdnstream1.com/wamu.mp3",
|
| 33 |
"BBC World Service": "http://stream.live.vc.bbcmedia.co.uk/bbc_world_service",
|
|
|
|
|
|
|
| 34 |
"BBC Radio 4 (UK)": "http://stream.live.vc.bbcmedia.co.uk/bbc_radio_fourfm",
|
| 35 |
"BBC Radio 5 Live (UK)": "http://stream.live.vc.bbcmedia.co.uk/bbc_radio_five_live_online_nonuk",
|
| 36 |
"BBC Radio 2 (UK)": "http://stream.live.vc.bbcmedia.co.uk/bbc_radio_two",
|
|
|
|
|
|
|
| 37 |
"KQED NPR (San Francisco)": "https://streams.kqed.org/kqedradio",
|
| 38 |
"WNYC 93.9 FM (New York)": "http://stream.wnyc.org/wnycfm",
|
| 39 |
"WBUR 90.9 FM (Boston)": "http://icecast.wbur.org/wbur",
|
| 40 |
"KPCC 89.3 FM (Los Angeles)": "http://kpcclive.streamguys1.com/kpcc64.aac",
|
| 41 |
"WHYY 90.9 FM (Philadelphia)": "http://whyy.streamguys1.com/whyy-mp3",
|
|
|
|
|
|
|
| 42 |
"ABC News Radio (Australia)": "http://live-radio01.mediahubaustralia.com/PBW/mp3/",
|
| 43 |
"CBC Radio One (Toronto)": "http://cbc_r1_tor.akacast.akamaistream.net/7/15/451661/v1/rc.akacast.akamaistream.net/cbc_r1_tor",
|
| 44 |
"Voice of America (VOA News Now)": "https://voa-18.akacast.akamaistream.net/7/983/437752/v1/ibb.akacast.akamaistream.net/voa-18",
|
| 45 |
+
"Al Jazeera English (Audio)": "https://live-hls-web-aje.getaj.net/AJE/01.m3u8",
|
|
|
|
|
|
|
| 46 |
"PRI The World": "http://stream.pri.org:8000/pri.mp3",
|
| 47 |
+
"Radio Paradise (USA, Mix)": "http://stream.radioparadise.com/mp3-128",
|
| 48 |
"KCRW 89.9 FM (Santa Monica)": "http://kcrw.streamguys1.com/kcrw_192",
|
| 49 |
|
| 50 |
## French
|
| 51 |
+
"France Inter": "https://direct.franceinter.fr/live/franceinter-midfi.mp3",
|
| 52 |
+
"France Info": "https://direct.franceinfo.fr/live/franceinfo-midfi.mp3",
|
| 53 |
+
"France Culture": "https://direct.franceculture.fr/live/franceculture-midfi.mp3",
|
| 54 |
+
"FIP": "https://direct.fip.fr/live/fip-midfi.mp3",
|
| 55 |
+
"Radio Classique": "https://radioclassique.ice.infomaniak.ch/radioclassique-high.mp3",
|
| 56 |
}
|
| 57 |
|
| 58 |
+
# Map stations to languages
|
| 59 |
+
STATION_LANGUAGES = {
|
| 60 |
+
"France Inter": "fr",
|
| 61 |
+
"France Info": "fr",
|
| 62 |
+
"France Culture": "fr",
|
| 63 |
+
"FIP": "fr",
|
| 64 |
+
"Radio Classique": "fr",
|
| 65 |
+
}
|
| 66 |
|
| 67 |
+
# Default to English for all other stations
|
| 68 |
+
for station in RADIO_URLS:
|
| 69 |
+
if station not in STATION_LANGUAGES:
|
| 70 |
+
STATION_LANGUAGES[station] = "en"
|
| 71 |
|
| 72 |
+
# Default station
|
| 73 |
DEFAULT_RADIO_URL = RADIO_URLS["NPR"]
|
| 74 |
|
| 75 |
# Audio Processing Configuration
|
|
|
|
| 76 |
CHUNK_SIZE = 6400
|
| 77 |
SAMPLE_RATE = 16000
|
| 78 |
BYTES_PER_SAMPLE = 2 # s16le format is 16-bit -> 2 bytes
|
| 79 |
|
| 80 |
+
def get_current_model_dir():
|
| 81 |
+
"""Returns the current model directory based on CURRENT_MODEL setting."""
|
| 82 |
+
return MODEL_DIRS.get(CURRENT_MODEL, MODEL_DIR_EN)
|
| 83 |
+
|
| 84 |
+
def get_current_repo_id():
|
| 85 |
+
"""Returns the current repo ID based on CURRENT_MODEL setting."""
|
| 86 |
+
return REPO_IDS.get(CURRENT_MODEL, REPO_ID_EN)
|
| 87 |
+
|
| 88 |
def get_asr_config() -> dict:
|
| 89 |
"""
|
| 90 |
Returns the configuration dictionary for the sherpa-onnx ASR model.
|
| 91 |
Checks for the existence of model files.
|
| 92 |
"""
|
| 93 |
+
model_dir = get_current_model_dir()
|
| 94 |
+
|
| 95 |
+
if not os.path.exists(os.path.join(model_dir, "tokens.txt")):
|
| 96 |
raise FileNotFoundError(
|
| 97 |
+
f"ASR model not found in {model_dir}. "
|
| 98 |
"Please run the download script or ensure the path is correct."
|
| 99 |
)
|
| 100 |
|
| 101 |
+
# Use int8 models if available, otherwise fallback to regular models
|
| 102 |
+
encoder_path = os.path.join(model_dir, "encoder-epoch-99-avg-1.int8.onnx")
|
| 103 |
+
decoder_path = os.path.join(model_dir, "decoder-epoch-99-avg-1.int8.onnx")
|
| 104 |
+
joiner_path = os.path.join(model_dir, "joiner-epoch-99-avg-1.int8.onnx")
|
| 105 |
+
|
| 106 |
+
# Fallback to non-int8 models if int8 versions don't exist
|
| 107 |
+
if not os.path.exists(encoder_path):
|
| 108 |
+
encoder_path = os.path.join(model_dir, "encoder.onnx")
|
| 109 |
+
if not os.path.exists(decoder_path):
|
| 110 |
+
decoder_path = os.path.join(model_dir, "decoder.onnx")
|
| 111 |
+
if not os.path.exists(joiner_path):
|
| 112 |
+
joiner_path = os.path.join(model_dir, "joiner.onnx")
|
| 113 |
+
|
| 114 |
return {
|
| 115 |
+
"tokens": os.path.join(model_dir, "tokens.txt"),
|
| 116 |
+
"encoder": encoder_path,
|
| 117 |
+
"decoder": decoder_path,
|
| 118 |
+
"joiner": joiner_path,
|
| 119 |
"enable_endpoint_detection": True,
|
| 120 |
"num_threads": os.cpu_count() or 1,
|
| 121 |
"rule3_min_utterance_length": 300,
|
core/asr_service.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
"""
|
| 2 |
Handles the real-time speech-to-text transcription using sherpa-onnx.
|
| 3 |
"""
|
|
@@ -6,7 +7,7 @@ import logging
|
|
| 6 |
from typing import Tuple
|
| 7 |
import numpy as np
|
| 8 |
import sherpa_onnx
|
| 9 |
-
from config import get_asr_config, SAMPLE_RATE
|
| 10 |
from core.connection_manager import ConnectionManager
|
| 11 |
|
| 12 |
logger = logging.getLogger(__name__)
|
|
@@ -20,7 +21,8 @@ class ASRService:
|
|
| 20 |
self.asr_config = get_asr_config()
|
| 21 |
self.recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(**self.asr_config)
|
| 22 |
self.stream = self.recognizer.create_stream()
|
| 23 |
-
|
|
|
|
| 24 |
|
| 25 |
def _process_chunk(self, pcm_chunk: bytes) -> dict | None:
|
| 26 |
"""Processes a single PCM chunk with the ASR recognizer."""
|
|
@@ -53,6 +55,23 @@ class ASRService:
|
|
| 53 |
|
| 54 |
try:
|
| 55 |
while True:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
pcm_chunk, chunk_start_time = await self.pcm_queue.get()
|
| 57 |
|
| 58 |
if current_utterance_abs_start_time is None:
|
|
|
|
| 1 |
+
# core/asr_service.py
|
| 2 |
"""
|
| 3 |
Handles the real-time speech-to-text transcription using sherpa-onnx.
|
| 4 |
"""
|
|
|
|
| 7 |
from typing import Tuple
|
| 8 |
import numpy as np
|
| 9 |
import sherpa_onnx
|
| 10 |
+
from config import get_asr_config, SAMPLE_RATE, CURRENT_MODEL
|
| 11 |
from core.connection_manager import ConnectionManager
|
| 12 |
|
| 13 |
logger = logging.getLogger(__name__)
|
|
|
|
| 21 |
self.asr_config = get_asr_config()
|
| 22 |
self.recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(**self.asr_config)
|
| 23 |
self.stream = self.recognizer.create_stream()
|
| 24 |
+
self.current_model = CURRENT_MODEL
|
| 25 |
+
logger.info(f"ASR Service initialized with {self.current_model} model.")
|
| 26 |
|
| 27 |
def _process_chunk(self, pcm_chunk: bytes) -> dict | None:
|
| 28 |
"""Processes a single PCM chunk with the ASR recognizer."""
|
|
|
|
| 55 |
|
| 56 |
try:
|
| 57 |
while True:
|
| 58 |
+
# Check if model needs to be switched
|
| 59 |
+
from config import CURRENT_MODEL
|
| 60 |
+
if CURRENT_MODEL != self.current_model:
|
| 61 |
+
logger.info(f"Switching ASR model from {self.current_model} to {CURRENT_MODEL}")
|
| 62 |
+
# Release old model
|
| 63 |
+
if self.stream:
|
| 64 |
+
del self.stream
|
| 65 |
+
if self.recognizer:
|
| 66 |
+
del self.recognizer
|
| 67 |
+
|
| 68 |
+
# Load new model
|
| 69 |
+
self.asr_config = get_asr_config()
|
| 70 |
+
self.recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(**self.asr_config)
|
| 71 |
+
self.stream = self.recognizer.create_stream()
|
| 72 |
+
self.current_model = CURRENT_MODEL
|
| 73 |
+
logger.info(f"ASR model switched to {self.current_model}")
|
| 74 |
+
|
| 75 |
pcm_chunk, chunk_start_time = await self.pcm_queue.get()
|
| 76 |
|
| 77 |
if current_utterance_abs_start_time is None:
|
frontend/index.html
CHANGED
|
@@ -14,6 +14,10 @@
|
|
| 14 |
<option value="">Loading stations...</option>
|
| 15 |
</select>
|
| 16 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
<div class="controls">
|
| 18 |
<button id="playBtn" onclick="play()">▶️ PLAY</button>
|
| 19 |
<button id="stopBtn" onclick="stop()">⏹️ STOP</button>
|
|
|
|
| 14 |
<option value="">Loading stations...</option>
|
| 15 |
</select>
|
| 16 |
</div>
|
| 17 |
+
<div class="settings-bar">
|
| 18 |
+
<label>Language:</label>
|
| 19 |
+
<span id="language-indicator">English</span>
|
| 20 |
+
</div>
|
| 21 |
<div class="controls">
|
| 22 |
<button id="playBtn" onclick="play()">▶️ PLAY</button>
|
| 23 |
<button id="stopBtn" onclick="stop()">⏹️ STOP</button>
|
frontend/js/main.js
CHANGED
|
@@ -29,7 +29,12 @@ const syncSlider = document.getElementById('sync-offset-slider');
|
|
| 29 |
const syncValueSpan = document.getElementById('sync-offset-value');
|
| 30 |
const stationSelector = document.getElementById('station-selector');
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
// Fetch available stations from the backend
|
|
|
|
| 33 |
async function loadStations() {
|
| 34 |
try {
|
| 35 |
const response = await fetch('/api/stations');
|
|
@@ -62,6 +67,7 @@ async function loadStations() {
|
|
| 62 |
}
|
| 63 |
}
|
| 64 |
|
|
|
|
| 65 |
function play() {
|
| 66 |
if (status === 'CONNECTING' || status === 'PLAYING') return;
|
| 67 |
|
|
@@ -70,6 +76,11 @@ function play() {
|
|
| 70 |
// Get selected station
|
| 71 |
currentStation = stationSelector.value;
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
status = 'CONNECTING';
|
| 74 |
updateUI();
|
| 75 |
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
|
|
@@ -89,8 +100,17 @@ function play() {
|
|
| 89 |
handleAudio(new Uint8Array(event.data));
|
| 90 |
} else {
|
| 91 |
const msg = JSON.parse(event.data);
|
| 92 |
-
if (msg.type === 'config') {
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
}
|
| 95 |
};
|
| 96 |
|
|
@@ -177,6 +197,7 @@ function initializePlayer() {
|
|
| 177 |
mediaSource.addEventListener('sourceopen', setupBuffer, { once: true });
|
| 178 |
}
|
| 179 |
|
|
|
|
| 180 |
function handleConfig(payload) {
|
| 181 |
console.log(`MIME type received: ${payload.mime}`);
|
| 182 |
mime = payload.mime;
|
|
@@ -273,6 +294,7 @@ function renderAllUtterances() {
|
|
| 273 |
utteranceHistory.forEach(utterance => renderNewUtterance(utterance));
|
| 274 |
}
|
| 275 |
|
|
|
|
| 276 |
function handleAsr(asrPayload) {
|
| 277 |
if (!audio || audio.readyState < 1) return;
|
| 278 |
|
|
@@ -384,6 +406,29 @@ function updateUI() {
|
|
| 384 |
stationSelector.disabled = status !== 'STOPPED';
|
| 385 |
}
|
| 386 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
// Load stations when the page loads
|
| 388 |
document.addEventListener('DOMContentLoaded', loadStations);
|
| 389 |
|
|
@@ -400,4 +445,20 @@ stationSelector.addEventListener('change', () => {
|
|
| 400 |
window.addEventListener('resize', function() {
|
| 401 |
// Maintain scroll position to bottom when resizing
|
| 402 |
setTimeout(scrollToLastUtterance, 100);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
});
|
|
|
|
| 29 |
const syncValueSpan = document.getElementById('sync-offset-value');
|
| 30 |
const stationSelector = document.getElementById('station-selector');
|
| 31 |
|
| 32 |
+
// Add to the global variables section
|
| 33 |
+
let currentLanguage = 'en';
|
| 34 |
+
const languageIndicator = document.getElementById('language-indicator');
|
| 35 |
+
|
| 36 |
// Fetch available stations from the backend
|
| 37 |
+
// Update the loadStations function to include language info
|
| 38 |
async function loadStations() {
|
| 39 |
try {
|
| 40 |
const response = await fetch('/api/stations');
|
|
|
|
| 67 |
}
|
| 68 |
}
|
| 69 |
|
| 70 |
+
// Update the play function to detect language from station
|
| 71 |
function play() {
|
| 72 |
if (status === 'CONNECTING' || status === 'PLAYING') return;
|
| 73 |
|
|
|
|
| 76 |
// Get selected station
|
| 77 |
currentStation = stationSelector.value;
|
| 78 |
|
| 79 |
+
// Update language indicator
|
| 80 |
+
const stationLanguage = getStationLanguage(currentStation);
|
| 81 |
+
currentLanguage = stationLanguage;
|
| 82 |
+
updateLanguageIndicator();
|
| 83 |
+
|
| 84 |
status = 'CONNECTING';
|
| 85 |
updateUI();
|
| 86 |
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
|
|
|
|
| 100 |
handleAudio(new Uint8Array(event.data));
|
| 101 |
} else {
|
| 102 |
const msg = JSON.parse(event.data);
|
| 103 |
+
if (msg.type === 'config') {
|
| 104 |
+
handleConfig(msg.payload);
|
| 105 |
+
}
|
| 106 |
+
else if (msg.type === 'asr') {
|
| 107 |
+
handleAsr(msg.payload);
|
| 108 |
+
}
|
| 109 |
+
else if (msg.type === 'language') {
|
| 110 |
+
// Update language indicator when receiving language info from backend
|
| 111 |
+
currentLanguage = msg.payload.language;
|
| 112 |
+
updateLanguageIndicator();
|
| 113 |
+
}
|
| 114 |
}
|
| 115 |
};
|
| 116 |
|
|
|
|
| 197 |
mediaSource.addEventListener('sourceopen', setupBuffer, { once: true });
|
| 198 |
}
|
| 199 |
|
| 200 |
+
// Update the handleConfig function
|
| 201 |
function handleConfig(payload) {
|
| 202 |
console.log(`MIME type received: ${payload.mime}`);
|
| 203 |
mime = payload.mime;
|
|
|
|
| 294 |
utteranceHistory.forEach(utterance => renderNewUtterance(utterance));
|
| 295 |
}
|
| 296 |
|
| 297 |
+
// Update the handleAsr function to include language info
|
| 298 |
function handleAsr(asrPayload) {
|
| 299 |
if (!audio || audio.readyState < 1) return;
|
| 300 |
|
|
|
|
| 406 |
stationSelector.disabled = status !== 'STOPPED';
|
| 407 |
}
|
| 408 |
|
| 409 |
+
// Add new function to determine station language
|
| 410 |
+
function getStationLanguage(stationName) {
|
| 411 |
+
// This would normally come from the backend
|
| 412 |
+
const frenchStations = [
|
| 413 |
+
"France Inter", "France Info", "France Culture",
|
| 414 |
+
"FIP", "Radio Classique"
|
| 415 |
+
];
|
| 416 |
+
|
| 417 |
+
return frenchStations.includes(stationName) ? 'fr' : 'en';
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
// Add new function to update language indicator
|
| 421 |
+
function updateLanguageIndicator() {
|
| 422 |
+
const languageNames = {
|
| 423 |
+
'en': 'English',
|
| 424 |
+
'fr': 'French'
|
| 425 |
+
};
|
| 426 |
+
|
| 427 |
+
languageIndicator.textContent = languageNames[currentLanguage] || 'Unknown';
|
| 428 |
+
languageIndicator.style.color = currentLanguage === 'fr' ? '#1DB954' : '#4A90E2';
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
|
| 432 |
// Load stations when the page loads
|
| 433 |
document.addEventListener('DOMContentLoaded', loadStations);
|
| 434 |
|
|
|
|
| 445 |
window.addEventListener('resize', function() {
|
| 446 |
// Maintain scroll position to bottom when resizing
|
| 447 |
setTimeout(scrollToLastUtterance, 100);
|
| 448 |
+
});
|
| 449 |
+
|
| 450 |
+
|
| 451 |
+
// Handle station change
|
| 452 |
+
stationSelector.addEventListener('change', () => {
|
| 453 |
+
// Update language indicator immediately when station changes
|
| 454 |
+
const selectedStation = stationSelector.value;
|
| 455 |
+
const stationLanguage = getStationLanguage(selectedStation);
|
| 456 |
+
currentLanguage = stationLanguage;
|
| 457 |
+
updateLanguageIndicator();
|
| 458 |
+
|
| 459 |
+
if (status === 'PLAYING') {
|
| 460 |
+
stop();
|
| 461 |
+
// Brief delay to ensure cleanup before restarting
|
| 462 |
+
setTimeout(play, 500);
|
| 463 |
+
}
|
| 464 |
});
|