|
import gradio as gr |
|
from pydub import AudioSegment |
|
|
|
|
|
def combine_audio_with_time(target_audio, mixed_audio): |
|
|
|
target_audio_segment = AudioSegment.from_wav(target_audio.name) |
|
|
|
|
|
mixed_audio_segment = AudioSegment.from_wav(mixed_audio.name) |
|
|
|
|
|
target_start_time = len(mixed_audio_segment) / 1000 |
|
|
|
|
|
final_audio = mixed_audio_segment + target_audio_segment |
|
|
|
|
|
final_audio.export("final_output.wav", format="wav") |
|
|
|
return "final_output.wav", target_start_time |
|
|
|
|
|
interface = gr.Interface( |
|
fn=combine_audio_with_time, |
|
inputs=[ |
|
gr.File(label="目标说话人音频"), |
|
gr.File(label="混合音频") |
|
], |
|
outputs=[ |
|
gr.Audio(label="输出音频"), |
|
gr.Textbox(label="目标音频起始时间") |
|
], |
|
live=False |
|
) |
|
|
|
interface.launch() |
|
|