Spaces:
Runtime error
Runtime error
File size: 4,023 Bytes
8fc7f5c b251a38 8fc7f5c 56cc953 8fc7f5c b251a38 f5ff208 4bf6ad4 b251a38 8fc7f5c 56cc953 8fc7f5c 6e406cd f5ff208 15a2fa0 8fc7f5c 6e406cd 8fc7f5c 6e406cd 8fc7f5c 6e406cd 8fc7f5c 6e406cd 8fc7f5c b251a38 6e406cd b251a38 8fc7f5c 6e406cd 8fc7f5c 161a5eb 15a2fa0 6e406cd 8fc7f5c d99c280 8fc7f5c 721a7b8 8fc7f5c 65c972e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import base64
import os
import time
from dataclasses import dataclass
from datetime import timedelta
import banana_dev as banana
import gradio as gr
from loguru import logger
from pydub import AudioSegment
api_key = os.environ["BANANA_API_KEY"]
model_key = os.environ["BANANA_MODEL_KEY"]
password = os.environ["PASSWORD"]
SECONDS_IN_HOUR = 3600
SECONDS_IN_MINUTE = 60
HOURS_IN_DAY = 24
MICROSECONDS_IN_MILLISECOND = 1000
def timedelta_to_srt_timestamp(timedelta_timestamp):
r"""
Convert a :py:class:`~datetime.timedelta` to an SRT timestamp.
.. doctest::
>>> import datetime
>>> delta = datetime.timedelta(hours=1, minutes=23, seconds=4)
>>> timedelta_to_srt_timestamp(delta)
'01:23:04,000'
:param datetime.timedelta timedelta_timestamp: A datetime to convert to an SRT timestamp
:returns: The timestamp in SRT format
:rtype: str
"""
hrs, secs_remainder = divmod(timedelta_timestamp.seconds, SECONDS_IN_HOUR)
hrs += timedelta_timestamp.days * HOURS_IN_DAY
mins, secs = divmod(secs_remainder, SECONDS_IN_MINUTE)
msecs = timedelta_timestamp.microseconds // MICROSECONDS_IN_MILLISECOND
return "%02d:%02d:%02d,%03d" % (hrs, mins, secs, msecs)
def timedelta_to_otr_timestamp(timedelta_timestamp):
output = timedelta_to_srt_timestamp(timedelta_timestamp)
if output.startswith("00:"):
output = output[3:]
return output[:-4]
@dataclass
class Segment:
text: str
start: float
end: float
@property
def start_ts(self) -> str:
return timedelta_to_otr_timestamp(timedelta(seconds=self.start))
@property
def end_ts(self):
return timedelta_to_otr_timestamp(timedelta(seconds=self.end))
def __str__(self):
return f"{self.start_ts} {self.text}"
def to_otr(self):
sep = " "
return f'<p><span class="timestamp" data-timestamp="{self.start}">{self.start_ts}</span>{sep}{self.text}</p>'
def transcribe(audio=None, url=None):
if audio:
audio_b64 = base64.b64encode(audio.export().read()).decode("ascii")
payload = {"audio_b64": audio_b64}
else:
payload = {"url": url}
response = banana.run(api_key, model_key, payload)
print(response)
if "error" in response:
raise gr.Error(response["error"])
# TODO: not sure why response dict contains multiple model outputs
return response["modelOutputs"][0]
def run_demo(password, url, file_upload):
if password not in [os.environ["PASSWORD"], os.environ["ROOT_PASSWORD"]]:
raise gr.Error("Der Zugriffscode ist falsch.")
if (url is not None) and (file_upload is not None):
logger.warning(
"Achtung: Sie haben sowohl eine URL angegeben als auch eine Datei hochgeladen."
" Wir verwenden nur die Datei, die Sie hochgeladen haben."
)
elif (url is None) and (file_upload is None):
raise gr.Error(
"Sie müssen entweder eine URL angeben oder eine Datei hochladen."
)
start = time.time()
if file_upload is not None:
cutoff = None if password == os.environ["ROOT_PASSWORD"] else 60_000
audio = AudioSegment.from_file(file_upload, format="mp3")[:cutoff]
transcription = transcribe(audio=audio, url=None)
else:
transcription = transcribe(audio=None, url=url)
logger.info(f"transcription took {time.time()-start:.3f}s")
return transcription["text"]
demo = gr.Interface(
fn=run_demo,
inputs=[
# gr.Textbox(label="Email", type="email"),
gr.Textbox(label="Zugriffscode (siehe oben)"),
# gr.Audio(source="microphone", type="filepath", label="Aufnehmen"),
gr.Textbox(label="URL (z.B. YouTube-Video, Dropbox-Datei, etc.)"),
gr.Audio(source="upload", type="filepath", label="Datei hochladen"),
],
outputs=gr.Textbox(label="Automatisches Transkript"),
allow_flagging="never",
css="footer {visibility: hidden} .meta-text {visibility: hidden}",
)
demo.launch()
|