|
|
|
import gradio as gr |
|
import torch |
|
import uuid |
|
import os |
|
from pathlib import Path |
|
from pytube import YouTube |
|
from pydub import AudioSegment |
|
from TTS.api import TTS |
|
|
|
test_audio="./shufflin.wav" |
|
uid = uuid.uuid4() |
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
def custom_bark(inp, in_aud=None, trim_aud=None, in_aud_mic=None): |
|
if in_aud_mic != None: |
|
speaker_wav=in_aud_mic |
|
if in_aud !=None and trim_aud==None: |
|
speaker_wav=in_aud |
|
|
|
if trim_aud != None: |
|
speaker_wav=Path(f"{uid}-trim.wav") |
|
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to(device) |
|
tts.tts_to_file(inp, speaker_wav=speaker_wav, language="en", file_path=f"{uid}-output.wav") |
|
return (f"{uid}-output.wav") |
|
|
|
def load_video_yt(vid): |
|
yt = YouTube(vid) |
|
vid = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download(filename=f"{uid}-tmp.mp4") |
|
vid_aud = yt.streams.filter(only_audio=True)[0].download(filename=f"{uid}-tmp_aud.mp4") |
|
print (f'Video Length: {yt.length}') |
|
return vid, vid_aud, f"{uid}-tmp_aud.mp4" |
|
|
|
def trim_clip(clip, start_t, end_t): |
|
clip = Path(f"{clip}") |
|
song = AudioSegment.from_file(f"{clip}", format="mp4") |
|
start_min = int(start_t.split(":",1)[0]) |
|
start_sec = int(start_t.split(":",1)[1]) |
|
end_min = int(end_t.split(":",1)[0]) |
|
end_sec = int(end_t.split(":",1)[1]) |
|
start = ((start_min*60)+start_sec)*1000 |
|
end = ((end_min*60)+end_sec)*1000 |
|
song_clip = song[start: end] |
|
song_clip.export(f"{uid}-trim.wav", format="wav") |
|
print("New Audio file is created and saved") |
|
return f"{uid}-trim.wav" |
|
|
|
def pre_aud(inp): |
|
print(inp) |
|
song = AudioSegment.from_file(Path(f'{inp}'), format="mp4") |
|
song.export(f"{uid}-tmp_aud.mp4", format="mp4") |
|
print(f'pre_aud:: {f"{uid}-tmp_aud.mp4"}') |
|
return inp |
|
|
|
with gr.Blocks() as app: |
|
|
|
with gr.Group(): |
|
|
|
with gr.Row(): |
|
in_text = gr.Textbox(lines = 6, max_lines = 20) |
|
with gr.Column(): |
|
alt_go_btn = gr.Button() |
|
out_audio = gr.Audio(interactive=False) |
|
|
|
with gr.Group(): |
|
|
|
with gr.Row(): |
|
gr.Markdown('''<H1> Audio Source:''') |
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(): |
|
|
|
in_aud_file = gr.Audio(label = 'Audio Source', sources=['microphone','upload'], interactive = True,type='filepath', value=test_audio) |
|
aud_file = gr.File(interactive=False,visible=True) |
|
with gr.Row(): |
|
start_time = gr.Textbox(label = "Start", value = "0:00", placeholder = "0:23") |
|
end_time = gr.Textbox(label = "End", value = "0:01", placeholder = "1:12") |
|
trim_clip_btn = gr.Button("Trim Clip") |
|
trim_aud = gr.Audio(label = 'Trimmed Audio Source', sources=['upload'], interactive = False) |
|
|
|
with gr.Column(): |
|
in_aud_yt = gr.Textbox(label="YouTube URL") |
|
load_yt_btn = gr.Button("Load URL") |
|
yt_vid = gr.Video(interactive=False) |
|
|
|
load_yt_btn.click(load_video_yt, in_aud_yt, [yt_vid,in_aud_file,aud_file]) |
|
trim_clip_btn.click(trim_clip,[in_aud_file, start_time, end_time],trim_aud) |
|
alt_go_btn.click(custom_bark, [in_text,in_aud_file,trim_aud], out_audio) |
|
|
|
app.launch() |