Alexander Seifert commited on
Commit
a9bf4b2
1 Parent(s): 6e406cd

switch to streamlit app

Browse files
Files changed (3) hide show
  1. app.py +52 -122
  2. gradio-app.py +75 -0
  3. requirements.txt +1 -2
app.py CHANGED
@@ -1,129 +1,59 @@
1
  import base64
 
2
  import os
3
- import time
4
- from dataclasses import dataclass
5
- from datetime import timedelta
6
 
7
- import banana_dev as banana
8
- import gradio as gr
 
9
  from loguru import logger
10
  from pydub import AudioSegment
11
 
12
- api_key = os.environ["BANANA_API_KEY"]
13
- model_key = os.environ["BANANA_MODEL_KEY"]
14
- password = os.environ["PASSWORD"]
15
-
16
- SECONDS_IN_HOUR = 3600
17
- SECONDS_IN_MINUTE = 60
18
- HOURS_IN_DAY = 24
19
- MICROSECONDS_IN_MILLISECOND = 1000
20
-
21
-
22
- def timedelta_to_srt_timestamp(timedelta_timestamp):
23
- r"""
24
- Convert a :py:class:`~datetime.timedelta` to an SRT timestamp.
25
- .. doctest::
26
- >>> import datetime
27
- >>> delta = datetime.timedelta(hours=1, minutes=23, seconds=4)
28
- >>> timedelta_to_srt_timestamp(delta)
29
- '01:23:04,000'
30
- :param datetime.timedelta timedelta_timestamp: A datetime to convert to an SRT timestamp
31
- :returns: The timestamp in SRT format
32
- :rtype: str
33
- """
34
-
35
- hrs, secs_remainder = divmod(timedelta_timestamp.seconds, SECONDS_IN_HOUR)
36
- hrs += timedelta_timestamp.days * HOURS_IN_DAY
37
- mins, secs = divmod(secs_remainder, SECONDS_IN_MINUTE)
38
- msecs = timedelta_timestamp.microseconds // MICROSECONDS_IN_MILLISECOND
39
- return "%02d:%02d:%02d,%03d" % (hrs, mins, secs, msecs)
40
-
41
-
42
- def timedelta_to_otr_timestamp(timedelta_timestamp):
43
- output = timedelta_to_srt_timestamp(timedelta_timestamp)
44
- if output.startswith("00:"):
45
- output = output[3:]
46
- return output[:-4]
47
-
48
-
49
- @dataclass
50
- class Segment:
51
- text: str
52
- start: float
53
- end: float
54
-
55
- @property
56
- def start_ts(self) -> str:
57
- return timedelta_to_otr_timestamp(timedelta(seconds=self.start))
58
-
59
- @property
60
- def end_ts(self):
61
- return timedelta_to_otr_timestamp(timedelta(seconds=self.end))
62
-
63
- def __str__(self):
64
- return f"{self.start_ts} {self.text}"
65
-
66
- def to_otr(self):
67
- sep = " "
68
- return f'<p><span class="timestamp" data-timestamp="{self.start}">{self.start_ts}</span>{sep}{self.text}</p>'
69
-
70
-
71
- def transcribe(audio=None, url=None):
72
- if audio:
73
- audio_b64 = base64.b64encode(audio.export().read()).decode("ascii")
74
- payload = {"audio_b64": audio_b64}
75
- else:
76
- payload = {"url": url}
77
- response = banana.run(api_key, model_key, payload)
78
- print(response)
79
-
80
- if "error" in response:
81
- raise gr.Error(response["error"])
82
-
83
- # TODO: not sure why response dict contains multiple model outputs
84
- return response["modelOutputs"][0]
85
-
86
-
87
- def run_demo(password, url, file_upload):
88
- if password not in [os.environ["PASSWORD"], os.environ["ROOT_PASSWORD"]]:
89
- raise gr.Error("Der Zugriffscode ist falsch.")
90
-
91
- if (url is not None) and (file_upload is not None):
92
- logger.warning(
93
- "Achtung: Sie haben sowohl eine URL angegeben als auch eine Datei hochgeladen."
94
- " Wir verwenden nur die Datei, die Sie hochgeladen haben."
95
- )
96
-
97
- elif (url is None) and (file_upload is None):
98
- raise gr.Error(
99
- "Sie müssen entweder eine URL angeben oder eine Datei hochladen."
100
  )
101
-
102
- start = time.time()
103
-
104
- if file_upload is not None:
105
- cutoff = None if password == os.environ["ROOT_PASSWORD"] else 60_000
106
- audio = AudioSegment.from_file(file_upload, format="mp3")[:cutoff]
107
- transcription = transcribe(audio=audio, url=None)
108
- else:
109
- transcription = transcribe(audio=None, url=url)
110
-
111
- logger.info(f"transcription took {time.time()-start:.3f}s")
112
- return transcription["text"]
113
-
114
-
115
- demo = gr.Interface(
116
- fn=run_demo,
117
- inputs=[
118
- # gr.Textbox(label="Email", type="email"),
119
- gr.Textbox(label="Zugriffscode (siehe oben)"),
120
- # gr.Audio(source="microphone", type="filepath", label="Aufnehmen"),
121
- gr.Textbox(label="URL (z.B. YouTube-Video, Dropbox-Datei, etc.)"),
122
- gr.Audio(source="upload", type="filepath", label="Datei hochladen"),
123
- ],
124
- outputs=gr.Textbox(label="Automatisches Transkript"),
125
- allow_flagging="never",
126
- css="footer {visibility: hidden} .meta-text {visibility: hidden}",
127
- )
128
-
129
- demo.launch()
1
  import base64
2
+ import json
3
  import os
 
 
 
4
 
5
+ import modal
6
+ import requests
7
+ import streamlit as st
8
  from loguru import logger
9
  from pydub import AudioSegment
10
 
11
+ # password = os.environ["PASSWORD"]
12
+ run_transcription = modal.lookup("ffpub-transcription", "run_transcription")
13
+
14
+ st.set_page_config(page_title="Speech to Text Transcription App")
15
+
16
+
17
+ @st.cache
18
+ def transcribe(url, audio_b64):
19
+ return run_transcription.call(url=url, audio_b64=audio_b64)
20
+
21
+
22
+ def run():
23
+ password = st.text_input("Zugriffscode (siehe oben)")
24
+ audio_file = st.file_uploader(
25
+ "Datei auswählen", type=[".wav", ".mp3", ".flac", ".m4a", ".ogg"]
26
+ )
27
+ url = st.text_input(
28
+ "URL (e.g. YouTube video, Dropbox file, etc.)",
29
+ value="",
30
+ )
31
+ # https://www.youtube.com/watch?v=pLAaQO1iPz0
32
+ submit_button = st.button(
33
+ label="Transkribieren", disabled=(not audio_file and not url)
34
+ )
35
+
36
+ if audio_file is not None:
37
+ st.audio(audio_file)
38
+ cutoff = None if password == "" else 60_000
39
+ audio_file = AudioSegment.from_file(audio_file)[:cutoff]
40
+ audio_b64 = base64.b64encode(audio_file.export().read()).decode("ascii")
41
+ if url:
42
+ st.video(url)
43
+
44
+ if submit_button:
45
+ audio_b64 = None
46
+ transcription = transcribe(url, audio_b64)
47
+ for seg in transcription["text"].split("\n\n"):
48
+ st.write(seg)
49
+ st.json(transcription)
50
+
51
+
52
+ if __name__ == "__main__":
53
+ try:
54
+ run()
55
+ except Exception as e:
56
+ logger.error(e)
57
+ st.error(
58
+ "Leider ist ein unerwarter Fehler aufgetreten. Ich könnte mir das Problem sofort ansehen, Sie erreichen mich unter alexander.seifert@gmail.com"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gradio-app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import os
3
+ import time
4
+ from dataclasses import dataclass
5
+ from datetime import timedelta
6
+
7
+ import banana_dev as banana
8
+ import gradio as gr
9
+ from loguru import logger
10
+ from pydub import AudioSegment
11
+
12
+ api_key = os.environ["BANANA_API_KEY"]
13
+ model_key = os.environ["BANANA_MODEL_KEY"]
14
+ password = os.environ["PASSWORD"]
15
+
16
+
17
+ def transcribe(audio=None, url=None):
18
+ if audio:
19
+ audio_b64 = base64.b64encode(audio.export().read()).decode("ascii")
20
+ payload = {"audio_b64": audio_b64}
21
+ else:
22
+ payload = {"url": url}
23
+ response = banana.run(api_key, model_key, payload)
24
+ print(response)
25
+
26
+ if "error" in response:
27
+ raise gr.Error(response["error"])
28
+
29
+ # TODO: not sure why response dict contains multiple model outputs
30
+ return response["modelOutputs"][0]
31
+
32
+
33
+ def run_demo(password, url, file_upload):
34
+ if password not in [os.environ["PASSWORD"], os.environ["ROOT_PASSWORD"]]:
35
+ raise gr.Error("Der Zugriffscode ist falsch.")
36
+
37
+ if (url is not None) and (file_upload is not None):
38
+ logger.warning(
39
+ "Achtung: Sie haben sowohl eine URL angegeben als auch eine Datei hochgeladen."
40
+ " Wir verwenden nur die Datei, die Sie hochgeladen haben."
41
+ )
42
+
43
+ elif (url is None) and (file_upload is None):
44
+ raise gr.Error(
45
+ "Sie müssen entweder eine URL angeben oder eine Datei hochladen."
46
+ )
47
+
48
+ start = time.time()
49
+
50
+ if file_upload is not None:
51
+ cutoff = None if password == os.environ["ROOT_PASSWORD"] else 60_000
52
+ audio = AudioSegment.from_file(file_upload, format="mp3")[:cutoff]
53
+ transcription = transcribe(audio=audio, url=None)
54
+ else:
55
+ transcription = transcribe(audio=None, url=url)
56
+
57
+ logger.info(f"transcription took {time.time()-start:.3f}s")
58
+ return transcription["text"]
59
+
60
+
61
+ demo = gr.Interface(
62
+ fn=run_demo,
63
+ inputs=[
64
+ # gr.Textbox(label="Email", type="email"),
65
+ gr.Textbox(label="Zugriffscode (siehe oben)"),
66
+ # gr.Audio(source="microphone", type="filepath", label="Aufnehmen"),
67
+ gr.Textbox(label="URL (z.B. YouTube-Video, Dropbox-Datei, etc.)"),
68
+ gr.Audio(source="upload", type="filepath", label="Datei hochladen"),
69
+ ],
70
+ outputs=gr.Textbox(label="Automatisches Transkript"),
71
+ allow_flagging="never",
72
+ css="footer {visibility: hidden} .meta-text {visibility: hidden}",
73
+ )
74
+
75
+ demo.launch()
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- banana-dev
2
- gradio
3
  loguru
4
  pydub
1
+ modal-client
 
2
  loguru
3
  pydub