Spaces:

Genius-Society
/

music_parsers

Building

admin

sync ms

7068663 about 1 month ago

4.3 kB

	import requests
	import gradio as gr
	from tqdm import tqdm
	from pydub import AudioSegment
	from datetime import datetime, timedelta
	from utils import timestamp, extract_fst_int, rm_dir, mk_dir, TMP_DIR, EN_US

	ZH2EN = {
	"输入声音页 URL": "Enter the sound page URL",
	"按格式输入声音发布日期": "Enter sound publication date in format",
	"下载 MP3": "Download MP3",
	"荔枝FM音频解析下载": "Lizhi FM Audio Direct URL Parsing Tool",
	"推荐辅助工具 <a href='https://tool.lu/datecalc' target='_blank'>日期计算器</a>": "The <a href='https://tool.lu/en_US/datecalc' target='_blank'>datecalc</a> is highly recommanded.",
	"状态栏": "Status",
	}


	def _L(zh_txt: str):
	return ZH2EN[zh_txt] if EN_US else zh_txt


	def get_prev_day(date_str):
	date_format = "%Y/%m/%d"
	date_obj = datetime.strptime(date_str, date_format)
	previous_day = date_obj - timedelta(days=1)
	return previous_day.strftime(date_format)


	def rm_end_seconds(input_file: str, output_file="", seconds=3.1):
	print("移除音频水印...")
	if not output_file:
	output_file = input_file

	audio = AudioSegment.from_file(input_file)
	remove_ms = seconds * 1000
	new_audio = audio[:-remove_ms]
	new_audio.export(output_file, format="mp3")
	return output_file


	def download_mp3(url: str, local_file: str):
	response = requests.get(url, stream=True)
	retcode = response.status_code
	if retcode == 200:
	total_size = int(response.headers.get("Content-Length", 0)) + 1
	time_stamp = timestamp()
	progress_bar = tqdm(
	total=total_size,
	unit="B",
	unit_scale=True,
	desc=f"[{time_stamp}] {local_file}",
	)
	with open(local_file, "wb") as f:
	for chunk in response.iter_content(chunk_size=8192):
	if chunk: # 确保 chunk 不为空
	f.write(chunk) # 更新进度条
	progress_bar.update(len(chunk))

	return rm_end_seconds(local_file)

	elif retcode == 404:
	bad_date = "/".join(url.split("/audio/")[-1].split("/")[:-1])
	fixed_date = get_prev_day(bad_date)
	fixed_url = url.replace(bad_date, fixed_date)
	return download_mp3(fixed_url, local_file)

	else:
	raise ConnectionError(f"错误: {retcode}, {response.text}")


	# outer func requires try except
	def infer(page_url: str, date: str, cache=f"{TMP_DIR}/lizhi"):
	status = "Success"
	outpath = None
	try:
	rm_dir(cache)
	if not page_url:
	raise ValueError("声音链接或ID为空")

	if ("http" in page_url and ".lizhi" in page_url) or page_url.isdigit():
	sound_id = extract_fst_int(page_url.split("/")[-1])
	else:
	raise ValueError("无效的声音链接或ID")

	voice_time = date.strip().replace("-", "/")
	mp3_url = f"http://cdn5.lizhi.fm/audio/{voice_time}/{sound_id}_hd.mp3"
	mk_dir(cache)
	outpath = download_mp3(mp3_url, f"{cache}/{sound_id}.mp3")

	except Exception as e:
	status = f"{e}"

	return status, outpath


	def lizhifm_parser():
	return gr.Interface(
	fn=infer,
	inputs=[
	gr.Textbox(
	label=_L("输入声音页 URL"),
	placeholder="https://www.lizhi.fm//",
	),
	gr.Textbox(label=_L("按格式输入声音发布日期"), placeholder="YYYY-MM-DD"),
	],
	outputs=[
	gr.Textbox(label=_L("状态栏"), show_copy_button=True),
	gr.Audio(label=_L("下载 MP3"), show_download_button=True),
	],
	flagging_mode="never",
	title=_L("荔枝FM音频解析下载"),
	description=_L(
	"推荐辅助工具 <a href='https://tool.lu/datecalc' target='_blank'>日期计算器</a>"
	),
	examples=[
	["https://www.lizhi.fm/voice/3136401036767886342", "2025-04-05"],
	["https://www.lizhifm.com/voice/3136401036767886342", "2025-04-05"],
	["https://m.lizhi.fm/voice/3136401036767886342", "2025-04-05"],
	["https://m.lizhifm.com/voice/3136401036767886342", "2025-04-05"],
	["3136401036767886342", "2025-04-05"],
	],
	cache_examples=False,
	)