Kryptone commited on
Commit
f088aee
·
1 Parent(s): 4da32e9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -0
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr, glob, os, auditok, random, zipfile, wave
2
+ from pytube import YouTube
3
+ from moviepy.editor import VideoFileClip
4
+ import auditok
5
+
6
+ def download_video(url):
7
+ yt = YouTube(url)
8
+ video = yt.streams.get_highest_resolution()
9
+ video.download()
10
+ video_path = f"{video.default_filename}"
11
+ video_clip = VideoFileClip(video_path)
12
+ audio_clip = video_clip.audio
13
+ audio_clip.write_audiofile("output.wav")
14
+ audio_clip.close()
15
+ video_clip.close()
16
+ for removalmp4 in glob.glob("*.mp4"):
17
+ os.remove(removalmp4)
18
+ return "Finished downloading! Please proceed to final tab."
19
+
20
+ def split_audio(mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur):
21
+ if show_amount_of_files_and_file_dur == True:
22
+ gr.Warning(f"show_amount_of_files_and_file_dur set to True. This feature may be inaccurate especially for WAV files, so dont rely too much on the count and duration.")
23
+ if mindur == maxdur:
24
+ raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.")
25
+ elif mindur > maxdur:
26
+ raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.")
27
+ elif name_for_split_files == None:
28
+ raise gr.Error("Split files name cannot be empty! This will be replaced with an alternative naming style in the future.")
29
+ else:
30
+ audio_path = "output.wav"
31
+ audio_regions = auditok.split(
32
+ audio_path,
33
+ min_dur=mindur,
34
+ max_dur=maxdur,
35
+ max_silence=0.3,
36
+ energy_threshold=45
37
+ )
38
+ os.remove(audio_path)
39
+ for i, r in enumerate(audio_regions):
40
+ filename = r.save(f"{name_for_split_files}-{i+1}.wav")
41
+ for f in sorted(glob.glob("*.wav")):
42
+ set_name = name_for_split_files + "-" + str(random.randint(1, 91071988)) + ".wav"
43
+ audio_files = glob.glob("*.wav")
44
+ zip_file_name = "audio_files.zip"
45
+ with zipfile.ZipFile(zip_file_name, "w") as zip_file:
46
+ for audio_file in audio_files:
47
+ zip_file.write(audio_file, os.path.basename(audio_file))
48
+ if show_amount_of_files_and_file_dur == False:
49
+ return "Files split successfully!\n\nCheck below for zipped files.", zip_file_name
50
+ elif show_amount_of_files_and_file_dur == True:
51
+ largest_file = ("", 0)
52
+ total_files = 0
53
+ total_length = 0.0
54
+ for file_name in glob.glob("*.wav"):
55
+ file_path = os.path.join(os.getcwd(), file_name)
56
+ if file_path.lower().endswith(".wav"):
57
+ try:
58
+ with wave.open(file_path, 'r') as audio_file:
59
+ frames = audio_file.getnframes()
60
+ rate = audio_file.getframerate()
61
+ duration = frames / float(rate)
62
+ file_size = os.path.getsize(file_path)
63
+ if file_size > largest_file[1]:
64
+ largest_file = (file_name, file_size)
65
+ total_length += duration
66
+ total_files += 1
67
+ except wave.Error as e:
68
+ raise gr.Error(f"Error reading file: {e}")
69
+ length_mins = total_length / 60
70
+ for file2 in glob.glob("*.wav"):
71
+ os.remove(file2)
72
+ return f"Files split successfully!\n\nCheck below for zipped files.\n\n{total_files} files created, {length_mins:.2f} minutes total.", zip_file_name
73
+
74
+ with gr.Blocks(theme=gr.themes.Soft(), title="Global Dataset Maker") as app:
75
+ gr.HTML(
76
+ "<h1> Welcome to the GDMGS! (GlobalDatasetMaker Gradio Space) </h1>"
77
+ )
78
+ gr.Markdown(
79
+ "This Space will create a dataset for you, all automatically."
80
+ )
81
+ with gr.Tabs():
82
+ with gr.TabItem("Download Video"):
83
+ with gr.Row():
84
+ gr.Markdown(
85
+ "Enter a YT link here, and it will save as a WAV."
86
+ )
87
+ url = gr.Textbox(label="URL")
88
+ convertion = gr.Button("Download", variant='primary')
89
+ convertion.click(
90
+ fn=download_video,
91
+ inputs=[url],
92
+ outputs=gr.Text(label="Output")
93
+ )
94
+ with gr.TabItem("Split audio files"):
95
+ with gr.Row():
96
+ gr.Markdown(
97
+ "Split the WAV file based on silence. You can also set a name for the split files here too."
98
+ )
99
+ mindur = gr.Number(label="Min duration", minimum=1, maximum=10, value=1)
100
+ maxdur = gr.Number(label="Max duration", minimum=1, maximum=10, value=8)
101
+ name_for_split_files = gr.Textbox(label="Name for split files")
102
+ show_amount_of_files_and_file_dur = gr.Checkbox(False, label="Show total amount of files and duration?")
103
+ splitbtn = gr.Button("Split", variant='primary')
104
+ splitbtn.click(
105
+ split_audio,
106
+ inputs=[mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur],
107
+ outputs=[gr.Text(label="Output"), gr.File(label="Zipped files")]
108
+ )
109
+
110
+ app.launch()