Spaces:

KanvaBhatia
/

AudioPure

Running

KanvaBhatia commited on Dec 3, 2023

Commit

68647bf

•

1 Parent(s): 89e1e33

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -31,11 +31,15 @@ model = torch.load(("model.pth"), map_location=torch.device('cpu'))
 model.to(DEVICE)
 model.eval()
-def identity(x):
-    print(x)
     # audio = mp.AudioFileClip(x)
-    wav_file = x
     # audio.write_audiofile(wav_file)
     print("Wav stored.")
     meta = AudioMetaData(-1, -1, -1, -1, "")
     sr = config("sr", 48000, int, section="df")
@@ -63,13 +67,22 @@ def identity(x):
     enhanced = torch.cat(estimate, dim = -1)
     sr = meta.sample_rate
     save_audio("enhanced_aud.wav", enhanced, sr)
-    return "enhanced_aud.wav"
 demo = gr.Interface(
     fn=identity,
     title="Audio Denoiser using DeepFilterNet V3",
     description="Implemented audio denoising using DeepFilterNet V3, enabled processing of larger files even on cpu, by splitting up the audio file into chunks of 1 minute each.\n\nThe processing will be very slow since it's the free version of HuggingFace, 2 second audio can take about 5 minutes.",
-    inputs=gr.Audio(type='filepath'),
-    outputs=gr.Audio(label="Output Audio"),
 )
 demo.launch()

 model.to(DEVICE)
 model.eval()
+def identity(video_path):
+    print(video_path)
     # audio = mp.AudioFileClip(x)
+    # wav_file = x
     # audio.write_audiofile(wav_file)
+    video = mp.VideoFileClip(video_path)
+    audio = video.audio
+    wav_file = "tmp.wav"
+    audio.write_audiofile(wav_file)
     print("Wav stored.")
     meta = AudioMetaData(-1, -1, -1, -1, "")
     sr = config("sr", 48000, int, section="df")
     enhanced = torch.cat(estimate, dim = -1)
     sr = meta.sample_rate
     save_audio("enhanced_aud.wav", enhanced, sr)
+    audio = mp.AudioFileClip('enhanced_aud.wav')
+    video = mp.VideoFileClip(video_path)
+    final_video = video.set_audio(audio)
+    final_video.write_videofile("output_video.mp4",
+            codec='libx264',
+            audio_codec='aac',
+            temp_audiofile='temp-audio.m4a',
+            remove_temp=True
+            )
+    return "output_video.mp4"
 demo = gr.Interface(
     fn=identity,
     title="Audio Denoiser using DeepFilterNet V3",
     description="Implemented audio denoising using DeepFilterNet V3, enabled processing of larger files even on cpu, by splitting up the audio file into chunks of 1 minute each.\n\nThe processing will be very slow since it's the free version of HuggingFace, 2 second audio can take about 5 minutes.",
+   inputs=gr.Video(label="Input Video", sources="upload"),
+    outputs=gr.Video(label="Output Video"),
 )
 demo.launch()