Neprox commited on
Commit
143dc39
·
1 Parent(s): 40da39c

Aesthetics and sampling_rate fix

Browse files
Files changed (1) hide show
  1. app.py +11 -2
app.py CHANGED
@@ -12,6 +12,11 @@ def download_from_youtube(url):
12
  fpath = streams.first().download()
13
  return fpath
14
 
 
 
 
 
 
15
  def divide_into_30s_segments(audio_fpath, seconds_max):
16
  if not os.path.exists("segmented_audios"):
17
  os.makedirs("segmented_audios")
@@ -55,12 +60,16 @@ def transcribe(audio, url, seconds_max):
55
  fpath = download_from_youtube(url)
56
  segment_paths, segment_start_times = divide_into_30s_segments(fpath, seconds_max)
57
 
58
- audio_dataset = Dataset.from_dict({"audio": segment_paths}).cast_column("audio", Audio())
59
  print(audio_dataset)
60
  print(audio_dataset[0])
61
- text = pipe(audio_dataset["audio"])
62
  print(type(text))
63
  print(text)
 
 
 
 
64
  return text
65
 
66
  else:
 
12
  fpath = streams.first().download()
13
  return fpath
14
 
15
+ def get_timestamp(seconds):
16
+ minutes = int(seconds / 60)
17
+ seconds = int(seconds % 60)
18
+ return f"{minutes}:{seconds}"
19
+
20
  def divide_into_30s_segments(audio_fpath, seconds_max):
21
  if not os.path.exists("segmented_audios"):
22
  os.makedirs("segmented_audios")
 
60
  fpath = download_from_youtube(url)
61
  segment_paths, segment_start_times = divide_into_30s_segments(fpath, seconds_max)
62
 
63
+ audio_dataset = Dataset.from_dict({"audio": segment_paths}).cast_column("audio", Audio(sampling_rate=16000))
64
  print(audio_dataset)
65
  print(audio_dataset[0])
66
+ pred = pipe(audio_dataset["audio"])
67
  print(type(text))
68
  print(text)
69
+ text = ""
70
+ n_segments = len(segment_start_times)
71
+ for i, (seconds, output) in enumerate(zip(segment_start_times, pred)):
72
+ text += f"[Segment {i}/{n_segments}, start time {get_timestamp(seconds)}]\n{output['text']}\n"
73
  return text
74
 
75
  else: