Spaces:

karay
/

diar_speech

Runtime error

Aray Karjauv commited on Dec 15, 2022

Commit

cb38808

•

1 Parent(s): 2a44c87

.

Files changed (3) hide show

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Test
 emoji: 🌖
 colorFrom: gray
 colorTo: green
@@ -10,4 +10,4 @@ pinned: false
 python_version: 3.10.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Speech recognition and diarization
 emoji: 🌖
 colorFrom: gray
 colorTo: green
 python_version: 3.10.0
 ---

app.py CHANGED Viewed

@@ -18,13 +18,14 @@ import string
 from streamlit.in_memory_file_manager import in_memory_file_manager as file_mng
 def run():
-    progress_bar.progress(5)
-    placeholder.write("Downloading pre-trained model...")
-    from backend import get_speakers, split_audio, get_subtitles, timeline_to_vtt, calc_speaker_percentage
-    progress_bar.progress(25)
     if video_file is None:
         return
     video_file.seek(0)
     # file storage for streamlit < 1.11
     # id = storage.load_and_get_id(video_file.read(), video_file.type, "media")
@@ -43,7 +44,7 @@ def run():
         placeholder.write("Removing noise...")
         get_speakers(tmpdirname)
-        progress_bar.progress(45)
             # https://docs.streamlit.io/knowledge-base/using-streamlit/where-file-uploader-store-when-deleted
             # https://github.com/streamlit/streamlit/blob/10ae0d651b18d4258e3b7cbbc9313d395a073768/lib/streamlit/elements/media.py#L204
@@ -55,7 +56,7 @@ def run():
         placeholder.write("Diarisation...")
         speaker_diarisation, cleaned_path = get_speakers(tmpdirname)
-        progress_bar.progress(70)
         placeholder.write("Extracting subtitles...")
         timeline  = get_subtitles(speaker_diarisation, cleaned_path)

 from streamlit.in_memory_file_manager import in_memory_file_manager as file_mng
 def run():
     if video_file is None:
         return
+    progress_bar.progress(1)
+    placeholder.write("Downloading pre-trained model...")
+    from backend import get_speakers, split_audio, get_subtitles, timeline_to_vtt, calc_speaker_percentage
+    progress_bar.progress(15)
     video_file.seek(0)
     # file storage for streamlit < 1.11
     # id = storage.load_and_get_id(video_file.read(), video_file.type, "media")
         placeholder.write("Removing noise...")
         get_speakers(tmpdirname)
+        progress_bar.progress(50)
             # https://docs.streamlit.io/knowledge-base/using-streamlit/where-file-uploader-store-when-deleted
             # https://github.com/streamlit/streamlit/blob/10ae0d651b18d4258e3b7cbbc9313d395a073768/lib/streamlit/elements/media.py#L204
         placeholder.write("Diarisation...")
         speaker_diarisation, cleaned_path = get_speakers(tmpdirname)
+        progress_bar.progress(75)
         placeholder.write("Extracting subtitles...")
         timeline  = get_subtitles(speaker_diarisation, cleaned_path)

backend.py CHANGED Viewed

@@ -38,8 +38,6 @@ def add_flags(parser):
     # device = "cpu"
     pretrained.add_model_flags(parser)
     parser.add_argument('--device', default=device)
-    parser.add_argument('--dry', type=float, default=0,
-                        help='dry/wet knob coefficient. 0 is only denoised, 1 only input signal.')
     parser.add_argument('--num_workers', type=int, default=0)
     parser.add_argument('--streaming', action="store_true",
                         help="true streaming evaluation for Demucs")
@@ -66,20 +64,6 @@ denoise_model.eval()
 whisper_model = whisper.load_model("large").to(args.device)
 whisper_model.eval()
-def get_estimate(model, noisy, args):
-    torch.set_num_threads(1)
-    if args.streaming:
-        streamer = DemucsStreamer(model, dry=args.dry)
-        with torch.no_grad():
-            estimate = torch.cat([
-                streamer.feed(noisy[0]),
-                streamer.flush()], dim=1)[None]
-    else:
-        with torch.no_grad():
-            estimate = model(noisy)
-            estimate = (1 - args.dry) * estimate + args.dry * noisy
-    return estimate
 def split_audio(tmpdirname, video, chunk_size=120):
     """
     Split audio into chunks of chunk_size

     # device = "cpu"
     pretrained.add_model_flags(parser)
     parser.add_argument('--device', default=device)
     parser.add_argument('--num_workers', type=int, default=0)
     parser.add_argument('--streaming', action="store_true",
                         help="true streaming evaluation for Demucs")
 whisper_model = whisper.load_model("large").to(args.device)
 whisper_model.eval()
 def split_audio(tmpdirname, video, chunk_size=120):
     """
     Split audio into chunks of chunk_size