Alexander Seifert commited on
Commit
b251a38
1 Parent(s): d99c280

improve chunking of texts

Browse files
Files changed (1) hide show
  1. app.py +64 -1
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import base64
2
  import os
3
  import time
 
 
4
 
5
  import banana_dev as banana
6
  import gradio as gr
@@ -11,6 +13,57 @@ api_key = os.environ["BANANA_API_KEY"]
11
  model_key = os.environ["BANANA_MODEL_KEY"]
12
  password = os.environ["PASSWORD"]
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  def transcribe(audio=None, url=None):
16
  if audio:
@@ -47,8 +100,18 @@ def run_demo(password, microphone, file_upload):
47
 
48
  start = time.time()
49
  transcription = transcribe(AudioSegment.from_file(file)[:60_000])
 
 
 
 
 
 
 
 
 
 
50
  logger.info(f"transcription took {time.time()-start:.3f}s")
51
- return "\n\n".join([seg["text"].strip() for seg in transcription["segments"]])
52
 
53
 
54
  demo = gr.Interface(
 
1
  import base64
2
  import os
3
  import time
4
+ from dataclasses import dataclass
5
+ from datetime import timedelta
6
 
7
  import banana_dev as banana
8
  import gradio as gr
 
13
  model_key = os.environ["BANANA_MODEL_KEY"]
14
  password = os.environ["PASSWORD"]
15
 
16
+ SECONDS_IN_HOUR = 3600
17
+ SECONDS_IN_MINUTE = 60
18
+ HOURS_IN_DAY = 24
19
+ MICROSECONDS_IN_MILLISECOND = 1000
20
+
21
+
22
+ def timedelta_to_srt_timestamp(timedelta_timestamp):
23
+ r"""
24
+ Convert a :py:class:`~datetime.timedelta` to an SRT timestamp.
25
+ .. doctest::
26
+ >>> import datetime
27
+ >>> delta = datetime.timedelta(hours=1, minutes=23, seconds=4)
28
+ >>> timedelta_to_srt_timestamp(delta)
29
+ '01:23:04,000'
30
+ :param datetime.timedelta timedelta_timestamp: A datetime to convert to an SRT timestamp
31
+ :returns: The timestamp in SRT format
32
+ :rtype: str
33
+ """
34
+
35
+ hrs, secs_remainder = divmod(timedelta_timestamp.seconds, SECONDS_IN_HOUR)
36
+ hrs += timedelta_timestamp.days * HOURS_IN_DAY
37
+ mins, secs = divmod(secs_remainder, SECONDS_IN_MINUTE)
38
+ msecs = timedelta_timestamp.microseconds // MICROSECONDS_IN_MILLISECOND
39
+ return "%02d:%02d:%02d,%03d" % (hrs, mins, secs, msecs)
40
+
41
+
42
+ def timedelta_to_otr_timestamp(timedelta_timestamp):
43
+ output = timedelta_to_srt_timestamp(timedelta_timestamp)
44
+ if output.startswith("00:"):
45
+ output = output[3:]
46
+ return output[:-4]
47
+
48
+
49
+ @dataclass
50
+ class Segment:
51
+ text: str
52
+ start: float
53
+ end: float
54
+
55
+ @property
56
+ def start_ts(self) -> str:
57
+ return timedelta_to_otr_timestamp(timedelta(seconds=self.start))
58
+
59
+ @property
60
+ def end_ts(self):
61
+ return timedelta_to_otr_timestamp(timedelta(seconds=self.end))
62
+
63
+ def __str__(self):
64
+ sep = " "
65
+ return f'<p><span class="timestamp" data-timestamp="{self.start}">{self.start_ts}</span>{sep}{self.text}</p>'
66
+
67
 
68
  def transcribe(audio=None, url=None):
69
  if audio:
 
100
 
101
  start = time.time()
102
  transcription = transcribe(AudioSegment.from_file(file)[:60_000])
103
+
104
+ segments = []
105
+
106
+ for seg in transcription["segments"]:
107
+ text = seg["text"].strip()
108
+ if not segments or segments[-1].text[-1] in ".:?!":
109
+ segments.append(Segment(text, seg["start"], seg["end"]))
110
+ else:
111
+ segments[-1].text += " " + text
112
+
113
  logger.info(f"transcription took {time.time()-start:.3f}s")
114
+ return "\n\n".join(str(s) for s in segments)
115
 
116
 
117
  demo = gr.Interface(