Spaces:
Sleeping
Sleeping
feat: add Radio Browser discovery + Discover UI, language mapping and ASR fallback\n\n- Add api/radio_browser.py to query and normalize Radio Browser results\n- Provide /api/stations/discover endpoint and enrich /api/stations with languages mapping\n- Implement language detection & ASR fallback in config.py\n- Add Discover UI (search, filters, tabs) in frontend/index.html and frontend/js/main.js\n- Styles for discovery UI in frontend/css/style.css\n- Add python-dateutil to requirements.txt\n\nIncludes debounce/dedupe client-side logic and visible request URL for debugging.,
8820050
| # config.py | |
| """ | |
| Centralized configuration for the Live Radio Karaoke application. | |
| """ | |
| import os | |
| from performance_config import PERF_CONFIG | |
| # ASR Model Configuration - Optimized for HF Spaces Free CPU (smaller models) | |
| MODEL_DIR_EN = "./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17" | |
| REPO_ID_EN = "csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17" | |
| MODEL_DIR_FR = "./sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06" | |
| REPO_ID_FR = "csukuangfj/sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06" | |
| MODEL_DIR_ZH = "./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23" | |
| REPO_ID_ZH = "csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23" | |
| # Summarizer Model Configuration (Optimized - smallest available model) | |
| SUMMARIZER_MODEL_DIR = "./google_gemma-3-1b-it-qat-Q4_0.gguf" | |
| SUMMARIZER_REPO_ID = "bartowski/google_gemma-3-1b-it-qat-GGUF" | |
| SUMMARIZER_FILENAME = "google_gemma-3-1b-it-qat-Q4_0.gguf" | |
| # Current model in use | |
| CURRENT_MODEL = "en" # Default to English | |
| MODEL_DIRS = { | |
| "en": MODEL_DIR_EN, | |
| "fr": MODEL_DIR_FR, | |
| "zh": MODEL_DIR_ZH | |
| } | |
| REPO_IDS = { | |
| "en": REPO_ID_EN, | |
| "fr": REPO_ID_FR, | |
| "zh": REPO_ID_ZH | |
| } | |
| # Radio Station Configuration | |
| RADIO_URLS = { | |
| ## English | |
| "KEXP (Seattle, 64 kbps)": "https://kexp.streamguys1.com/kexp64.aac", | |
| "KEXP (Seattle, 160 kbps)": "https://kexp.streamguys1.com/kexp160.aac", | |
| "NPR": "https://npr-ice.streamguys1.com/live.mp3", | |
| "WYPR 88.1 FM (Baltimore)": "https://wtmd-ice.streamguys1.com/wypr-1-mp3", | |
| "WAMU 88.5 FM (Washington DC)": "https://wamu.cdnstream1.com/wamu.mp3", | |
| "BBC World Service": "http://stream.live.vc.bbcmedia.co.uk/bbc_world_service", | |
| "BBC Radio 4 (UK)": "http://stream.live.vc.bbcmedia.co.uk/bbc_radio_fourfm", | |
| "BBC Radio 5 Live (UK)": "http://stream.live.vc.bbcmedia.co.uk/bbc_radio_five_live_online_nonuk", | |
| "BBC Radio 2 (UK)": "http://stream.live.vc.bbcmedia.co.uk/bbc_radio_two", | |
| "KQED NPR (San Francisco)": "https://streams.kqed.org/kqedradio", | |
| "WNYC 93.9 FM (New York)": "http://stream.wnyc.org/wnycfm", | |
| "WBUR 90.9 FM (Boston)": "http://icecast.wbur.org/wbur", | |
| "KPCC 89.3 FM (Los Angeles)": "http://kpcclive.streamguys1.com/kpcc64.aac", | |
| "WHYY 90.9 FM (Philadelphia)": "http://whyy.streamguys1.com/whyy-mp3", | |
| "ABC News Radio (Australia)": "http://live-radio01.mediahubaustralia.com/PBW/mp3/", | |
| "CBC Radio One (Toronto)": "http://cbc_r1_tor.akacast.akamaistream.net/7/15/451661/v1/rc.akacast.akamaistream.net/cbc_r1_tor", | |
| "Voice of America (VOA News Now)": "https://voa-18.akacast.akamaistream.net/7/983/437752/v1/ibb.akacast.akamaistream.net/voa-18", | |
| "Al Jazeera English (Audio)": "https://live-hls-web-aje.getaj.net/AJE/01.m3u8", | |
| "PRI The World": "http://stream.pri.org:8000/pri.mp3", | |
| "Radio Paradise (USA, Mix)": "http://stream.radioparadise.com/mp3-128", | |
| "KCRW 89.9 FM (Santa Monica)": "http://kcrw.streamguys1.com/kcrw_192", | |
| ## French | |
| "France Inter": "https://direct.franceinter.fr/live/franceinter-midfi.mp3", | |
| "France Info": "https://direct.franceinfo.fr/live/franceinfo-midfi.mp3", | |
| "France Culture": "https://direct.franceculture.fr/live/franceculture-midfi.mp3", | |
| "FIP": "https://direct.fip.fr/live/fip-midfi.mp3", | |
| "Radio Classique": "https://radioclassique.ice.infomaniak.ch/radioclassique-high.mp3", | |
| ## Mandarin Chinese (Traditional) | |
| "中廣新聞網": "https://stream.rcs.revma.com/78fm9wyy2tzuv", | |
| "News98新聞網": "https://stream.rcs.revma.com/pntx1639ntzuv.m4a", | |
| "飛碟聯播網": "https://stream.rcs.revma.com/em90w4aeewzuv", | |
| } | |
| # Map stations to languages | |
| STATION_LANGUAGES = { | |
| # French stations | |
| "France Inter": "fr", | |
| "France Info": "fr", | |
| "France Culture": "fr", | |
| "FIP": "fr", | |
| "Radio Classique": "fr", | |
| # Mandarin stations | |
| "中廣新聞網": "zh", | |
| "News98新聞網": "zh", | |
| "飛碟聯播網": "zh", | |
| } | |
| # Supported ASR languages | |
| SUPPORTED_ASR_LANGUAGES = {"en", "fr", "zh"} | |
| # Language fallback mapping for unsupported languages | |
| LANGUAGE_FALLBACK = { | |
| # Romance languages -> French (similar phonetics) | |
| "es": "fr", # Spanish | |
| "it": "fr", # Italian | |
| "pt": "fr", # Portuguese | |
| "ro": "fr", # Romanian | |
| "ca": "fr", # Catalan | |
| # Germanic languages -> English (similar phonetics) | |
| "de": "en", # German | |
| "nl": "en", # Dutch | |
| "da": "en", # Danish | |
| "sv": "en", # Swedish | |
| "no": "en", # Norwegian | |
| # Other European languages -> English | |
| "pl": "en", # Polish | |
| "hu": "en", # Hungarian | |
| "cs": "en", # Czech | |
| "sk": "en", # Slovak | |
| "fi": "en", # Finnish | |
| # Asian languages -> Chinese or English | |
| "ja": "zh", # Japanese -> Chinese (better for Asian phonetics) | |
| "ko": "zh", # Korean -> Chinese | |
| "th": "en", # Thai -> English | |
| "vi": "en", # Vietnamese -> English | |
| # Arabic and others -> English | |
| "ar": "en", # Arabic | |
| "tr": "en", # Turkish | |
| "ru": "en", # Russian | |
| } | |
| def get_asr_language(detected_language: str) -> tuple[str, bool]: | |
| """ | |
| Get the appropriate ASR language and whether it's a fallback. | |
| Args: | |
| detected_language: Detected language code (e.g., 'es', 'de', 'ja') | |
| Returns: | |
| tuple: (asr_language, is_fallback) | |
| """ | |
| if detected_language in SUPPORTED_ASR_LANGUAGES: | |
| return detected_language, False | |
| fallback = LANGUAGE_FALLBACK.get(detected_language, "en") | |
| return fallback, True | |
| def detect_station_language(station_name: str, country: str = "", metadata: str = "") -> str: | |
| """ | |
| Detect language for any station based on name and metadata. | |
| Args: | |
| station_name: Name of the station | |
| country: Country code (optional) | |
| metadata: Additional metadata like description or tags | |
| Returns: | |
| Language code | |
| """ | |
| # Check existing mapping first | |
| if station_name in STATION_LANGUAGES: | |
| return STATION_LANGUAGES[station_name] | |
| # Use radio browser detection logic | |
| from api.radio_browser import RadioBrowserAPI | |
| browser = RadioBrowserAPI() | |
| # Create a fake station dict for detection | |
| fake_station = { | |
| 'name': station_name.lower(), | |
| 'country': country.upper(), | |
| 'tags': metadata.lower(), | |
| 'language': metadata.lower() | |
| } | |
| return browser._detect_language(fake_station) | |
| # Default to English for all other stations | |
| for station in RADIO_URLS: | |
| if station not in STATION_LANGUAGES: | |
| STATION_LANGUAGES[station] = "en" | |
| # Default station | |
| DEFAULT_RADIO_URL = RADIO_URLS["NPR"] | |
| # Audio Processing Configuration - Dynamic based on performance mode | |
| CHUNK_SIZE = PERF_CONFIG["chunk_size"] | |
| SAMPLE_RATE = 16000 | |
| BYTES_PER_SAMPLE = 2 # s16le format is 16-bit -> 2 bytes | |
| def get_current_model_dir(): | |
| """Returns the current model directory based on CURRENT_MODEL setting.""" | |
| return MODEL_DIRS.get(CURRENT_MODEL, MODEL_DIR_EN) | |
| def get_current_repo_id(): | |
| """Returns the current repo ID based on CURRENT_MODEL setting.""" | |
| return REPO_IDS.get(CURRENT_MODEL, REPO_ID_EN) | |
| def get_asr_config() -> dict: | |
| """ | |
| Returns the configuration dictionary for the sherpa-onnx ASR model. | |
| Checks for the existence of model files and handles different naming conventions. | |
| """ | |
| model_dir = get_current_model_dir() | |
| if not os.path.exists(os.path.join(model_dir, "tokens.txt")): | |
| raise FileNotFoundError( | |
| f"ASR model not found in {model_dir}. " | |
| "Please run the download script or ensure the path is correct." | |
| ) | |
| # Try different naming conventions for model files | |
| def find_model_file(base_names): | |
| """Find the first existing file from a list of possible names""" | |
| for name in base_names: | |
| path = os.path.join(model_dir, name) | |
| if os.path.exists(path): | |
| return path | |
| return None | |
| # Look for encoder files (prefer int8 if available) | |
| encoder_path = find_model_file([ | |
| "encoder-epoch-99-avg-1.int8.onnx", | |
| "encoder-epoch-99-avg-1.onnx", | |
| "encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx", | |
| "encoder-epoch-20-avg-1-chunk-16-left-128.onnx", | |
| "encoder.int8.onnx", | |
| "encoder.onnx" | |
| ]) | |
| # Look for decoder files | |
| decoder_path = find_model_file([ | |
| "decoder-epoch-99-avg-1.onnx", | |
| "decoder-epoch-20-avg-1-chunk-16-left-128.onnx", | |
| "decoder.onnx" | |
| ]) | |
| # Look for joiner files (prefer int8 if available) | |
| joiner_path = find_model_file([ | |
| "joiner-epoch-99-avg-1.int8.onnx", | |
| "joiner-epoch-99-avg-1.onnx", | |
| "joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx", | |
| "joiner-epoch-20-avg-1-chunk-16-left-128.onnx", | |
| "joiner.int8.onnx", | |
| "joiner.onnx" | |
| ]) | |
| if not encoder_path or not decoder_path or not joiner_path: | |
| raise FileNotFoundError( | |
| f"Required model files not found in {model_dir}. " | |
| f"Found: encoder={encoder_path}, decoder={decoder_path}, joiner={joiner_path}" | |
| ) | |
| return { | |
| "tokens": os.path.join(model_dir, "tokens.txt"), | |
| "encoder": encoder_path, | |
| "decoder": decoder_path, | |
| "joiner": joiner_path, | |
| "enable_endpoint_detection": True, | |
| "num_threads": PERF_CONFIG["asr_threads"], | |
| "rule3_min_utterance_length": 500, # Increased to reduce processing frequency | |
| } |