File size: 7,175 Bytes
635ad89 f088aee 635ad89 f088aee c1a6d8e f088aee 28c73d5 f088aee 8f2153e f088aee 0a26beb f088aee 0a26beb 9433e9a 0a26beb f088aee 28c73d5 f088aee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import gradio as gr, glob, os, auditok, random, zipfile, wave, pytube.exceptions
from pytube import YouTube
from moviepy.editor import VideoFileClip
def download_video(url):
try:
yt = YouTube(url)
except pytube.exceptions.RegexMatchError:
raise gr.Error("URL not valid or is empty! Please fix the link or enter one!")
video = yt.streams.get_highest_resolution()
video.download()
video_path = f"{video.default_filename}"
video_clip = VideoFileClip(video_path)
audio_clip = video_clip.audio
audio_clip.write_audiofile("output.wav")
audio_clip.close()
video_clip.close()
for removalmp4 in glob.glob("*.mp4"):
os.remove(removalmp4)
return "Finished downloading! Please proceed to final tab."
def split_audio(mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur):
if show_amount_of_files_and_file_dur == True:
gr.Warning(f"show_amount_of_files_and_file_dur set to True. This feature may be inaccurate especially for WAV files, so dont rely too much on the count and duration.")
if not os.path.exists("output.wav"):
raise gr.Error("Output.wav does not exist! Did you do the first tab correctly or at all?")
if mindur == maxdur:
raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.")
elif mindur > maxdur:
raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.")
elif name_for_split_files == None:
raise gr.Error("Split files name cannot be empty! This will be replaced with an alternative naming style in the future.")
else:
audio_path = "output.wav"
audio_regions = auditok.split(
audio_path,
min_dur=mindur,
max_dur=maxdur,
max_silence=0.3,
energy_threshold=45
)
os.remove(audio_path)
for i, r in enumerate(audio_regions):
filename = r.save(f"{name_for_split_files}-{i+1}.wav")
for f in sorted(glob.glob("*.wav")):
set_name = name_for_split_files + "-" + str(random.randint(1, 91071988)) + ".wav"
audio_files = glob.glob("*.wav")
zip_file_name = "audio_files.zip"
with zipfile.ZipFile(zip_file_name, "w") as zip_file:
for audio_file in audio_files:
zip_file.write(audio_file, os.path.basename(audio_file))
if show_amount_of_files_and_file_dur == False:
return "Files split successfully!\n\nCheck below for zipped files.", zip_file_name
elif show_amount_of_files_and_file_dur == True:
largest_file = ("", 0)
total_files = 0
total_length = 0.0
for file_name in glob.glob("*.wav"):
file_path = os.path.join(os.getcwd(), file_name)
if file_path.lower().endswith(".wav"):
try:
with wave.open(file_path, 'r') as audio_file:
frames = audio_file.getnframes()
rate = audio_file.getframerate()
duration = frames / float(rate)
file_size = os.path.getsize(file_path)
if file_size > largest_file[1]:
largest_file = (file_name, file_size)
total_length += duration
total_files += 1
except wave.Error as e:
raise gr.Error(f"Error reading file: {e}")
length_mins = total_length / 60
for file2 in glob.glob("*.wav"):
os.remove(file2)
return f"Files split successfully!\n\nCheck below for zipped files.\n\n{total_files} files created, {length_mins:.2f} minutes total.", zip_file_name
def analyze_audio(zip_file_path):
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
zip_ref.extractall('unzipped_files')
total_sample_rate = 0
total_files = 0
for file_name in os.listdir('unzipped_files'):
if file_name.lower().endswith('.wav'):
file_path = os.path.join('unzipped_files', file_name)
try:
with wave.open(file_path, 'r') as audio_file:
sample_rate = audio_file.getframerate()
total_sample_rate += sample_rate
total_files += 1
except wave.Error as e:
print(f"Error reading file: {e}")
if total_files > 0:
average_sample_rate = total_sample_rate / total_files
return f"Average sample rate: {average_sample_rate}"
else:
return "No average sample rate could be found."
with gr.Blocks(theme=gr.themes.Monochrome(), title="Global Dataset Maker") as app:
gr.HTML(
"<h1> Welcome to the GDMGS! (GlobalDatasetMaker Gradio Space) </h1>"
)
gr.Markdown(
"This Space will create a dataset for you, all automatically. **Please be warned that due to not having a GPU on this Space, some steps might take longer to complete, so please be patient.**"
)
with gr.Tabs():
with gr.TabItem("Download Video"):
with gr.Row():
with gr.Column():
with gr.Row():
url = gr.Textbox(label="URL")
convertion = gr.Button("Download", variant='primary')
convertion.click(
fn=download_video,
inputs=[url],
outputs=gr.Text(label="Output")
)
with gr.TabItem("Split audio files"):
with gr.Row():
with gr.Column():
with gr.Row():
mindur = gr.Number(label="Min duration", minimum=1, maximum=10, value=1)
maxdur = gr.Number(label="Max duration", minimum=1, maximum=10, value=5)
name_for_split_files = gr.Textbox(label="Name for split files")
show_amount_of_files_and_file_dur = gr.Checkbox(False, label="Show total amount of files and duration?")
splitbtn = gr.Button("Split", variant='primary')
splitbtn.click(
split_audio,
inputs=[mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur],
outputs=[gr.Text(label="Output"), gr.File(label="Zipped files")]
)
with gr.TabItem("Misc tools"):
with gr.Tab("SR analyzer"):
gr.Markdown("Upload a zip file of your wavs here and this will determine the average sample rate.")
with gr.Row():
with gr.Column():
with gr.Row():
zipuploader = gr.File(file_count='single', file_types=[".zip"], label="ZIP file")
uploadbtn = gr.Button("Analyze", variant='primary')
uploadbtn.click(
analyze_audio,
[zipuploader],
[gr.Text(label="Result")]
)
app.launch() |