Aray Karjauv commited on
Commit
cb38808
β€’
1 Parent(s): 2a44c87
Files changed (3) hide show
  1. README.md +2 -2
  2. app.py +7 -6
  3. backend.py +0 -16
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Test
3
  emoji: πŸŒ–
4
  colorFrom: gray
5
  colorTo: green
@@ -10,4 +10,4 @@ pinned: false
10
  python_version: 3.10.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Speech recognition and diarization
3
  emoji: πŸŒ–
4
  colorFrom: gray
5
  colorTo: green
 
10
  python_version: 3.10.0
11
  ---
12
 
13
+
app.py CHANGED
@@ -18,13 +18,14 @@ import string
18
  from streamlit.in_memory_file_manager import in_memory_file_manager as file_mng
19
 
20
  def run():
21
- progress_bar.progress(5)
22
- placeholder.write("Downloading pre-trained model...")
23
- from backend import get_speakers, split_audio, get_subtitles, timeline_to_vtt, calc_speaker_percentage
24
- progress_bar.progress(25)
25
  if video_file is None:
26
  return
27
 
 
 
 
 
 
28
  video_file.seek(0)
29
  # file storage for streamlit < 1.11
30
  # id = storage.load_and_get_id(video_file.read(), video_file.type, "media")
@@ -43,7 +44,7 @@ def run():
43
 
44
  placeholder.write("Removing noise...")
45
  get_speakers(tmpdirname)
46
- progress_bar.progress(45)
47
 
48
  # https://docs.streamlit.io/knowledge-base/using-streamlit/where-file-uploader-store-when-deleted
49
  # https://github.com/streamlit/streamlit/blob/10ae0d651b18d4258e3b7cbbc9313d395a073768/lib/streamlit/elements/media.py#L204
@@ -55,7 +56,7 @@ def run():
55
 
56
  placeholder.write("Diarisation...")
57
  speaker_diarisation, cleaned_path = get_speakers(tmpdirname)
58
- progress_bar.progress(70)
59
 
60
  placeholder.write("Extracting subtitles...")
61
  timeline = get_subtitles(speaker_diarisation, cleaned_path)
 
18
  from streamlit.in_memory_file_manager import in_memory_file_manager as file_mng
19
 
20
  def run():
 
 
 
 
21
  if video_file is None:
22
  return
23
 
24
+ progress_bar.progress(1)
25
+ placeholder.write("Downloading pre-trained model...")
26
+ from backend import get_speakers, split_audio, get_subtitles, timeline_to_vtt, calc_speaker_percentage
27
+ progress_bar.progress(15)
28
+
29
  video_file.seek(0)
30
  # file storage for streamlit < 1.11
31
  # id = storage.load_and_get_id(video_file.read(), video_file.type, "media")
 
44
 
45
  placeholder.write("Removing noise...")
46
  get_speakers(tmpdirname)
47
+ progress_bar.progress(50)
48
 
49
  # https://docs.streamlit.io/knowledge-base/using-streamlit/where-file-uploader-store-when-deleted
50
  # https://github.com/streamlit/streamlit/blob/10ae0d651b18d4258e3b7cbbc9313d395a073768/lib/streamlit/elements/media.py#L204
 
56
 
57
  placeholder.write("Diarisation...")
58
  speaker_diarisation, cleaned_path = get_speakers(tmpdirname)
59
+ progress_bar.progress(75)
60
 
61
  placeholder.write("Extracting subtitles...")
62
  timeline = get_subtitles(speaker_diarisation, cleaned_path)
backend.py CHANGED
@@ -38,8 +38,6 @@ def add_flags(parser):
38
  # device = "cpu"
39
  pretrained.add_model_flags(parser)
40
  parser.add_argument('--device', default=device)
41
- parser.add_argument('--dry', type=float, default=0,
42
- help='dry/wet knob coefficient. 0 is only denoised, 1 only input signal.')
43
  parser.add_argument('--num_workers', type=int, default=0)
44
  parser.add_argument('--streaming', action="store_true",
45
  help="true streaming evaluation for Demucs")
@@ -66,20 +64,6 @@ denoise_model.eval()
66
  whisper_model = whisper.load_model("large").to(args.device)
67
  whisper_model.eval()
68
 
69
- def get_estimate(model, noisy, args):
70
- torch.set_num_threads(1)
71
- if args.streaming:
72
- streamer = DemucsStreamer(model, dry=args.dry)
73
- with torch.no_grad():
74
- estimate = torch.cat([
75
- streamer.feed(noisy[0]),
76
- streamer.flush()], dim=1)[None]
77
- else:
78
- with torch.no_grad():
79
- estimate = model(noisy)
80
- estimate = (1 - args.dry) * estimate + args.dry * noisy
81
- return estimate
82
-
83
  def split_audio(tmpdirname, video, chunk_size=120):
84
  """
85
  Split audio into chunks of chunk_size
 
38
  # device = "cpu"
39
  pretrained.add_model_flags(parser)
40
  parser.add_argument('--device', default=device)
 
 
41
  parser.add_argument('--num_workers', type=int, default=0)
42
  parser.add_argument('--streaming', action="store_true",
43
  help="true streaming evaluation for Demucs")
 
64
  whisper_model = whisper.load_model("large").to(args.device)
65
  whisper_model.eval()
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  def split_audio(tmpdirname, video, chunk_size=120):
68
  """
69
  Split audio into chunks of chunk_size