Spaces:

Luigi
/

Live-Radio-Karaoke

Sleeping

Live-Radio-Karaoke / config.py

feat: add Radio Browser discovery + Discover UI, language mapping and ASR fallback\n\n- Add api/radio_browser.py to query and normalize Radio Browser results\n- Provide /api/stations/discover endpoint and enrich /api/stations with languages mapping\n- Implement language detection & ASR fallback in config.py\n- Add Discover UI (search, filters, tabs) in frontend/index.html and frontend/js/main.js\n- Styles for discovery UI in frontend/css/style.css\n- Add python-dateutil to requirements.txt\n\nIncludes debounce/dedupe client-side logic and visible request URL for debugging.,

8820050 about 2 months ago

raw

history blame contribute delete

9.41 kB

	# config.py
	"""
	Centralized configuration for the Live Radio Karaoke application.
	"""
	import os
	from performance_config import PERF_CONFIG

	# ASR Model Configuration - Optimized for HF Spaces Free CPU (smaller models)
	MODEL_DIR_EN = "./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17"
	REPO_ID_EN = "csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17"

	MODEL_DIR_FR = "./sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06"
	REPO_ID_FR = "csukuangfj/sherpa-onnx-streaming-zipformer-fr-kroko-2025-08-06"

	MODEL_DIR_ZH = "./sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23"
	REPO_ID_ZH = "csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23"

	# Summarizer Model Configuration (Optimized - smallest available model)
	SUMMARIZER_MODEL_DIR = "./google_gemma-3-1b-it-qat-Q4_0.gguf"
	SUMMARIZER_REPO_ID = "bartowski/google_gemma-3-1b-it-qat-GGUF"
	SUMMARIZER_FILENAME = "google_gemma-3-1b-it-qat-Q4_0.gguf"

	# Current model in use
	CURRENT_MODEL = "en" # Default to English
	MODEL_DIRS = {
	"en": MODEL_DIR_EN,
	"fr": MODEL_DIR_FR,
	"zh": MODEL_DIR_ZH
	}
	REPO_IDS = {
	"en": REPO_ID_EN,
	"fr": REPO_ID_FR,
	"zh": REPO_ID_ZH
	}

	# Radio Station Configuration
	RADIO_URLS = {
	## English
	"KEXP (Seattle, 64 kbps)": "https://kexp.streamguys1.com/kexp64.aac",
	"KEXP (Seattle, 160 kbps)": "https://kexp.streamguys1.com/kexp160.aac",
	"NPR": "https://npr-ice.streamguys1.com/live.mp3",
	"WYPR 88.1 FM (Baltimore)": "https://wtmd-ice.streamguys1.com/wypr-1-mp3",
	"WAMU 88.5 FM (Washington DC)": "https://wamu.cdnstream1.com/wamu.mp3",
	"BBC World Service": "http://stream.live.vc.bbcmedia.co.uk/bbc_world_service",
	"BBC Radio 4 (UK)": "http://stream.live.vc.bbcmedia.co.uk/bbc_radio_fourfm",
	"BBC Radio 5 Live (UK)": "http://stream.live.vc.bbcmedia.co.uk/bbc_radio_five_live_online_nonuk",
	"BBC Radio 2 (UK)": "http://stream.live.vc.bbcmedia.co.uk/bbc_radio_two",
	"KQED NPR (San Francisco)": "https://streams.kqed.org/kqedradio",
	"WNYC 93.9 FM (New York)": "http://stream.wnyc.org/wnycfm",
	"WBUR 90.9 FM (Boston)": "http://icecast.wbur.org/wbur",
	"KPCC 89.3 FM (Los Angeles)": "http://kpcclive.streamguys1.com/kpcc64.aac",
	"WHYY 90.9 FM (Philadelphia)": "http://whyy.streamguys1.com/whyy-mp3",
	"ABC News Radio (Australia)": "http://live-radio01.mediahubaustralia.com/PBW/mp3/",
	"CBC Radio One (Toronto)": "http://cbc_r1_tor.akacast.akamaistream.net/7/15/451661/v1/rc.akacast.akamaistream.net/cbc_r1_tor",
	"Voice of America (VOA News Now)": "https://voa-18.akacast.akamaistream.net/7/983/437752/v1/ibb.akacast.akamaistream.net/voa-18",
	"Al Jazeera English (Audio)": "https://live-hls-web-aje.getaj.net/AJE/01.m3u8",
	"PRI The World": "http://stream.pri.org:8000/pri.mp3",
	"Radio Paradise (USA, Mix)": "http://stream.radioparadise.com/mp3-128",
	"KCRW 89.9 FM (Santa Monica)": "http://kcrw.streamguys1.com/kcrw_192",

	## French
	"France Inter": "https://direct.franceinter.fr/live/franceinter-midfi.mp3",
	"France Info": "https://direct.franceinfo.fr/live/franceinfo-midfi.mp3",
	"France Culture": "https://direct.franceculture.fr/live/franceculture-midfi.mp3",
	"FIP": "https://direct.fip.fr/live/fip-midfi.mp3",
	"Radio Classique": "https://radioclassique.ice.infomaniak.ch/radioclassique-high.mp3",

	## Mandarin Chinese (Traditional)
	"中廣新聞網": "https://stream.rcs.revma.com/78fm9wyy2tzuv",
	"News98新聞網": "https://stream.rcs.revma.com/pntx1639ntzuv.m4a",
	"飛碟聯播網": "https://stream.rcs.revma.com/em90w4aeewzuv",
	}

	# Map stations to languages
	STATION_LANGUAGES = {
	# French stations
	"France Inter": "fr",
	"France Info": "fr",
	"France Culture": "fr",
	"FIP": "fr",
	"Radio Classique": "fr",

	# Mandarin stations
	"中廣新聞網": "zh",
	"News98新聞網": "zh",
	"飛碟聯播網": "zh",
	}

	# Supported ASR languages
	SUPPORTED_ASR_LANGUAGES = {"en", "fr", "zh"}

	# Language fallback mapping for unsupported languages
	LANGUAGE_FALLBACK = {
	# Romance languages -> French (similar phonetics)
	"es": "fr", # Spanish
	"it": "fr", # Italian
	"pt": "fr", # Portuguese
	"ro": "fr", # Romanian
	"ca": "fr", # Catalan

	# Germanic languages -> English (similar phonetics)
	"de": "en", # German
	"nl": "en", # Dutch
	"da": "en", # Danish
	"sv": "en", # Swedish
	"no": "en", # Norwegian

	# Other European languages -> English
	"pl": "en", # Polish
	"hu": "en", # Hungarian
	"cs": "en", # Czech
	"sk": "en", # Slovak
	"fi": "en", # Finnish

	# Asian languages -> Chinese or English
	"ja": "zh", # Japanese -> Chinese (better for Asian phonetics)
	"ko": "zh", # Korean -> Chinese
	"th": "en", # Thai -> English
	"vi": "en", # Vietnamese -> English

	# Arabic and others -> English
	"ar": "en", # Arabic
	"tr": "en", # Turkish
	"ru": "en", # Russian
	}

	def get_asr_language(detected_language: str) -> tuple[str, bool]:
	"""
	Get the appropriate ASR language and whether it's a fallback.

	Args:
	detected_language: Detected language code (e.g., 'es', 'de', 'ja')

	Returns:
	tuple: (asr_language, is_fallback)
	"""
	if detected_language in SUPPORTED_ASR_LANGUAGES:
	return detected_language, False

	fallback = LANGUAGE_FALLBACK.get(detected_language, "en")
	return fallback, True

	def detect_station_language(station_name: str, country: str = "", metadata: str = "") -> str:
	"""
	Detect language for any station based on name and metadata.

	Args:
	station_name: Name of the station
	country: Country code (optional)
	metadata: Additional metadata like description or tags

	Returns:
	Language code
	"""
	# Check existing mapping first
	if station_name in STATION_LANGUAGES:
	return STATION_LANGUAGES[station_name]

	# Use radio browser detection logic
	from api.radio_browser import RadioBrowserAPI
	browser = RadioBrowserAPI()

	# Create a fake station dict for detection
	fake_station = {
	'name': station_name.lower(),
	'country': country.upper(),
	'tags': metadata.lower(),
	'language': metadata.lower()
	}

	return browser._detect_language(fake_station)

	# Default to English for all other stations
	for station in RADIO_URLS:
	if station not in STATION_LANGUAGES:
	STATION_LANGUAGES[station] = "en"

	# Default station
	DEFAULT_RADIO_URL = RADIO_URLS["NPR"]

	# Audio Processing Configuration - Dynamic based on performance mode
	CHUNK_SIZE = PERF_CONFIG["chunk_size"]
	SAMPLE_RATE = 16000
	BYTES_PER_SAMPLE = 2 # s16le format is 16-bit -> 2 bytes

	def get_current_model_dir():
	"""Returns the current model directory based on CURRENT_MODEL setting."""
	return MODEL_DIRS.get(CURRENT_MODEL, MODEL_DIR_EN)

	def get_current_repo_id():
	"""Returns the current repo ID based on CURRENT_MODEL setting."""
	return REPO_IDS.get(CURRENT_MODEL, REPO_ID_EN)

	def get_asr_config() -> dict:
	"""
	Returns the configuration dictionary for the sherpa-onnx ASR model.
	Checks for the existence of model files and handles different naming conventions.
	"""
	model_dir = get_current_model_dir()

	if not os.path.exists(os.path.join(model_dir, "tokens.txt")):
	raise FileNotFoundError(
	f"ASR model not found in {model_dir}. "
	"Please run the download script or ensure the path is correct."
	)

	# Try different naming conventions for model files
	def find_model_file(base_names):
	"""Find the first existing file from a list of possible names"""
	for name in base_names:
	path = os.path.join(model_dir, name)
	if os.path.exists(path):
	return path
	return None

	# Look for encoder files (prefer int8 if available)
	encoder_path = find_model_file([
	"encoder-epoch-99-avg-1.int8.onnx",
	"encoder-epoch-99-avg-1.onnx",
	"encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx",
	"encoder-epoch-20-avg-1-chunk-16-left-128.onnx",
	"encoder.int8.onnx",
	"encoder.onnx"
	])

	# Look for decoder files
	decoder_path = find_model_file([
	"decoder-epoch-99-avg-1.onnx",
	"decoder-epoch-20-avg-1-chunk-16-left-128.onnx",
	"decoder.onnx"
	])

	# Look for joiner files (prefer int8 if available)
	joiner_path = find_model_file([
	"joiner-epoch-99-avg-1.int8.onnx",
	"joiner-epoch-99-avg-1.onnx",
	"joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx",
	"joiner-epoch-20-avg-1-chunk-16-left-128.onnx",
	"joiner.int8.onnx",
	"joiner.onnx"
	])

	if not encoder_path or not decoder_path or not joiner_path:
	raise FileNotFoundError(
	f"Required model files not found in {model_dir}. "
	f"Found: encoder={encoder_path}, decoder={decoder_path}, joiner={joiner_path}"
	)

	return {
	"tokens": os.path.join(model_dir, "tokens.txt"),
	"encoder": encoder_path,
	"decoder": decoder_path,
	"joiner": joiner_path,
	"enable_endpoint_detection": True,
	"num_threads": PERF_CONFIG["asr_threads"],
	"rule3_min_utterance_length": 500, # Increased to reduce processing frequency
	}