Alexander Seifert
add css
c122f2d
import base64
import json
import os
import modal
import streamlit as st
from loguru import logger
from pydub import AudioSegment
run_transcription = modal.lookup("ffpub-transcription", "run_transcription")
st.set_page_config(page_title="Speech to Text Transcription App")
@st.cache(show_spinner=False)
def transcribe(url, audio_b64, cutoff):
return run_transcription.call(url=url, audio_b64=audio_b64, cutoff=None)
def password_is_correct(password):
return password in [os.environ["PASSWORD"], os.environ["ROOT_PASSWORD"]]
def input_is_ready(password, audio_file, url):
return password_is_correct(password) and (audio_file or url)
def run():
st.markdown(
"<style>section.main > div:first-child { padding: 0}</style>",
unsafe_allow_html=True,
)
submit_button = False
if "is_expanded" not in st.session_state:
st.session_state["is_expanded"] = True
# expander = st.expander("Einstellungen", expanded=st.session_state["is_expanded"])
# with expander:
password = st.text_input("Zugriffscode (siehe oben)")
url = audio_file = None
col1, col2 = st.columns([1, 3])
type = col1.radio(
"Input-Typ",
["Beispiel (Kabarett)", "URL (YouTube, …)", "Datei-Upload"],
label_visibility="hidden",
)
if type == "Beispiel (Kabarett)":
url = col2.text_input(
"URL (e.g. YouTube video, Dropbox file, etc.)",
value="https://www.youtube.com/watch?v=6UONiGMmbS4",
disabled=True,
)
elif type == "URL (YouTube, …)":
url = col2.text_input(
"URL (e.g. YouTube video, Dropbox file, etc.)",
value="",
)
else:
audio_file = col2.file_uploader(
"Datei auswählen", type=[".wav", ".mp3", ".flac", ".m4a", ".ogg"]
)
submit_button = col2.button(
label="⚡ Transkribieren"
+ (" (Zugriffscode inkorrekt)" if not password_is_correct(password) else ""),
disabled=(not password_is_correct(password) or (not audio_file and not url)),
)
cutoff = audio_b64 = None
if audio_file or url:
with st.expander(("Audio" if audio_file else "Video") + " abspielen"):
if audio_file:
st.audio(audio_file)
cutoff = None if password == os.environ["ROOT_PASSWORD"] else 60_000
audio_file = AudioSegment.from_file(audio_file)[:cutoff]
audio_b64 = base64.b64encode(audio_file.export().read()).decode("ascii")
if url:
st.video(url)
if input_is_ready(password, audio_file, url) and submit_button:
# my_bar = st.progress(0)
# for percent_complete in range(100):
# time.sleep(1)
# my_bar.progress(percent_complete + 1)
with st.spinner("Transkription läuft..."):
transcription = transcribe(url, audio_b64, cutoff)
col1, col2 = st.columns([1, 1])
col1.download_button(
label="Transkript herunterladen",
data=transcription["text"],
file_name="transkript.txt",
mime="text/plain",
)
col2.download_button(
label="OTR-Datei herunterladen",
data=json.dumps(transcription["otr"], indent=2, ensure_ascii=False),
file_name="transkript.otr",
mime="application/json",
)
st.text_area("Transkript", transcription["text"], height=300)
try:
run()
except Exception as e:
logger.error(e)
st.error(
"Leider ist ein unerwarter Fehler aufgetreten. Ich kann mir das Problem sofort ansehen, Sie erreichen mich unter alexander.seifert@gmail.com"
)