Spaces:
Runtime error
Runtime error
Aray Karjauv
commited on
Commit
β’
cb38808
1
Parent(s):
2a44c87
- README.md +2 -2
- app.py +7 -6
- backend.py +0 -16
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: π
|
4 |
colorFrom: gray
|
5 |
colorTo: green
|
@@ -10,4 +10,4 @@ pinned: false
|
|
10 |
python_version: 3.10.0
|
11 |
---
|
12 |
|
13 |
-
|
|
|
1 |
---
|
2 |
+
title: Speech recognition and diarization
|
3 |
emoji: π
|
4 |
colorFrom: gray
|
5 |
colorTo: green
|
|
|
10 |
python_version: 3.10.0
|
11 |
---
|
12 |
|
13 |
+
|
app.py
CHANGED
@@ -18,13 +18,14 @@ import string
|
|
18 |
from streamlit.in_memory_file_manager import in_memory_file_manager as file_mng
|
19 |
|
20 |
def run():
|
21 |
-
progress_bar.progress(5)
|
22 |
-
placeholder.write("Downloading pre-trained model...")
|
23 |
-
from backend import get_speakers, split_audio, get_subtitles, timeline_to_vtt, calc_speaker_percentage
|
24 |
-
progress_bar.progress(25)
|
25 |
if video_file is None:
|
26 |
return
|
27 |
|
|
|
|
|
|
|
|
|
|
|
28 |
video_file.seek(0)
|
29 |
# file storage for streamlit < 1.11
|
30 |
# id = storage.load_and_get_id(video_file.read(), video_file.type, "media")
|
@@ -43,7 +44,7 @@ def run():
|
|
43 |
|
44 |
placeholder.write("Removing noise...")
|
45 |
get_speakers(tmpdirname)
|
46 |
-
progress_bar.progress(
|
47 |
|
48 |
# https://docs.streamlit.io/knowledge-base/using-streamlit/where-file-uploader-store-when-deleted
|
49 |
# https://github.com/streamlit/streamlit/blob/10ae0d651b18d4258e3b7cbbc9313d395a073768/lib/streamlit/elements/media.py#L204
|
@@ -55,7 +56,7 @@ def run():
|
|
55 |
|
56 |
placeholder.write("Diarisation...")
|
57 |
speaker_diarisation, cleaned_path = get_speakers(tmpdirname)
|
58 |
-
progress_bar.progress(
|
59 |
|
60 |
placeholder.write("Extracting subtitles...")
|
61 |
timeline = get_subtitles(speaker_diarisation, cleaned_path)
|
|
|
18 |
from streamlit.in_memory_file_manager import in_memory_file_manager as file_mng
|
19 |
|
20 |
def run():
|
|
|
|
|
|
|
|
|
21 |
if video_file is None:
|
22 |
return
|
23 |
|
24 |
+
progress_bar.progress(1)
|
25 |
+
placeholder.write("Downloading pre-trained model...")
|
26 |
+
from backend import get_speakers, split_audio, get_subtitles, timeline_to_vtt, calc_speaker_percentage
|
27 |
+
progress_bar.progress(15)
|
28 |
+
|
29 |
video_file.seek(0)
|
30 |
# file storage for streamlit < 1.11
|
31 |
# id = storage.load_and_get_id(video_file.read(), video_file.type, "media")
|
|
|
44 |
|
45 |
placeholder.write("Removing noise...")
|
46 |
get_speakers(tmpdirname)
|
47 |
+
progress_bar.progress(50)
|
48 |
|
49 |
# https://docs.streamlit.io/knowledge-base/using-streamlit/where-file-uploader-store-when-deleted
|
50 |
# https://github.com/streamlit/streamlit/blob/10ae0d651b18d4258e3b7cbbc9313d395a073768/lib/streamlit/elements/media.py#L204
|
|
|
56 |
|
57 |
placeholder.write("Diarisation...")
|
58 |
speaker_diarisation, cleaned_path = get_speakers(tmpdirname)
|
59 |
+
progress_bar.progress(75)
|
60 |
|
61 |
placeholder.write("Extracting subtitles...")
|
62 |
timeline = get_subtitles(speaker_diarisation, cleaned_path)
|
backend.py
CHANGED
@@ -38,8 +38,6 @@ def add_flags(parser):
|
|
38 |
# device = "cpu"
|
39 |
pretrained.add_model_flags(parser)
|
40 |
parser.add_argument('--device', default=device)
|
41 |
-
parser.add_argument('--dry', type=float, default=0,
|
42 |
-
help='dry/wet knob coefficient. 0 is only denoised, 1 only input signal.')
|
43 |
parser.add_argument('--num_workers', type=int, default=0)
|
44 |
parser.add_argument('--streaming', action="store_true",
|
45 |
help="true streaming evaluation for Demucs")
|
@@ -66,20 +64,6 @@ denoise_model.eval()
|
|
66 |
whisper_model = whisper.load_model("large").to(args.device)
|
67 |
whisper_model.eval()
|
68 |
|
69 |
-
def get_estimate(model, noisy, args):
|
70 |
-
torch.set_num_threads(1)
|
71 |
-
if args.streaming:
|
72 |
-
streamer = DemucsStreamer(model, dry=args.dry)
|
73 |
-
with torch.no_grad():
|
74 |
-
estimate = torch.cat([
|
75 |
-
streamer.feed(noisy[0]),
|
76 |
-
streamer.flush()], dim=1)[None]
|
77 |
-
else:
|
78 |
-
with torch.no_grad():
|
79 |
-
estimate = model(noisy)
|
80 |
-
estimate = (1 - args.dry) * estimate + args.dry * noisy
|
81 |
-
return estimate
|
82 |
-
|
83 |
def split_audio(tmpdirname, video, chunk_size=120):
|
84 |
"""
|
85 |
Split audio into chunks of chunk_size
|
|
|
38 |
# device = "cpu"
|
39 |
pretrained.add_model_flags(parser)
|
40 |
parser.add_argument('--device', default=device)
|
|
|
|
|
41 |
parser.add_argument('--num_workers', type=int, default=0)
|
42 |
parser.add_argument('--streaming', action="store_true",
|
43 |
help="true streaming evaluation for Demucs")
|
|
|
64 |
whisper_model = whisper.load_model("large").to(args.device)
|
65 |
whisper_model.eval()
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
def split_audio(tmpdirname, video, chunk_size=120):
|
68 |
"""
|
69 |
Split audio into chunks of chunk_size
|