Spaces:
Running
Running
import requests | |
import gradio as gr | |
from tqdm import tqdm | |
from pydub import AudioSegment | |
from datetime import datetime, timedelta | |
from utils import timestamp, extract_fst_int, rm_dir, mk_dir, TMP_DIR, EN_US | |
ZH2EN = { | |
"输入声音页 URL": "Enter the sound page URL", | |
"按格式输入声音发布日期": "Enter sound publication date in format", | |
"下载 MP3": "Download MP3", | |
"荔枝FM音频解析下载": "Lizhi FM Audio Direct URL Parsing Tool", | |
"推荐辅助工具 <a href='https://tool.lu/datecalc' target='_blank'>日期计算器</a>": "The <a href='https://tool.lu/en_US/datecalc' target='_blank'>datecalc</a> is highly recommanded.", | |
"状态栏": "Status", | |
} | |
def _L(zh_txt: str): | |
return ZH2EN[zh_txt] if EN_US else zh_txt | |
def get_prev_day(date_str): | |
date_format = "%Y/%m/%d" | |
date_obj = datetime.strptime(date_str, date_format) | |
previous_day = date_obj - timedelta(days=1) | |
return previous_day.strftime(date_format) | |
def rm_end_seconds(input_file: str, output_file="", seconds=3.1): | |
print("移除音频水印...") | |
if not output_file: | |
output_file = input_file | |
audio = AudioSegment.from_file(input_file) | |
remove_ms = seconds * 1000 | |
new_audio = audio[:-remove_ms] | |
new_audio.export(output_file, format="mp3") | |
return output_file | |
def download_mp3(url: str, local_file: str): | |
response = requests.get(url, stream=True) | |
retcode = response.status_code | |
if retcode == 200: | |
total_size = int(response.headers.get("Content-Length", 0)) + 1 | |
time_stamp = timestamp() | |
progress_bar = tqdm( | |
total=total_size, | |
unit="B", | |
unit_scale=True, | |
desc=f"[{time_stamp}] {local_file}", | |
) | |
with open(local_file, "wb") as f: | |
for chunk in response.iter_content(chunk_size=8192): | |
if chunk: # 确保 chunk 不为空 | |
f.write(chunk) # 更新进度条 | |
progress_bar.update(len(chunk)) | |
return rm_end_seconds(local_file) | |
elif retcode == 404: | |
bad_date = "/".join(url.split("/audio/")[-1].split("/")[:-1]) | |
fixed_date = get_prev_day(bad_date) | |
fixed_url = url.replace(bad_date, fixed_date) | |
return download_mp3(fixed_url, local_file) | |
else: | |
raise ConnectionError(f"错误: {retcode}, {response.text}") | |
# outer func requires try except | |
def infer(page_url: str, date: str, cache=f"{TMP_DIR}/lizhi"): | |
status = "Success" | |
outpath = None | |
try: | |
rm_dir(cache) | |
if not page_url: | |
raise ValueError("声音链接或ID为空") | |
if ("http" in page_url and ".lizhi" in page_url) or page_url.isdigit(): | |
sound_id = extract_fst_int(page_url.split("/")[-1]) | |
else: | |
raise ValueError("无效的声音链接或ID") | |
voice_time = date.strip().replace("-", "/") | |
mp3_url = f"http://cdn5.lizhi.fm/audio/{voice_time}/{sound_id}_hd.mp3" | |
mk_dir(cache) | |
outpath = download_mp3(mp3_url, f"{cache}/{sound_id}.mp3") | |
except Exception as e: | |
status = f"{e}" | |
return status, outpath | |
def lizhifm_parser(): | |
return gr.Interface( | |
fn=infer, | |
inputs=[ | |
gr.Textbox( | |
label=_L("输入声音页 URL"), | |
placeholder="https://www.lizhi.fm/*/*", | |
), | |
gr.Textbox(label=_L("按格式输入声音发布日期"), placeholder="YYYY-MM-DD"), | |
], | |
outputs=[ | |
gr.Textbox(label=_L("状态栏"), show_copy_button=True), | |
gr.Audio(label=_L("下载 MP3"), show_download_button=True), | |
], | |
flagging_mode="never", | |
title=_L("荔枝FM音频解析下载"), | |
description=_L( | |
"推荐辅助工具 <a href='https://tool.lu/datecalc' target='_blank'>日期计算器</a>" | |
), | |
examples=[ | |
["https://www.lizhi.fm/voice/3136401036767886342", "2025-04-05"], | |
["https://www.lizhifm.com/voice/3136401036767886342", "2025-04-05"], | |
["https://m.lizhi.fm/voice/3136401036767886342", "2025-04-05"], | |
["https://m.lizhifm.com/voice/3136401036767886342", "2025-04-05"], | |
["3136401036767886342", "2025-04-05"], | |
], | |
) | |