import base64 import json import os import modal import streamlit as st from loguru import logger from pydub import AudioSegment run_transcription = modal.lookup("ffpub-transcription", "run_transcription") st.set_page_config(page_title="Speech to Text Transcription App") @st.cache(show_spinner=False) def transcribe(url, audio_b64, cutoff): return run_transcription.call(url=url, audio_b64=audio_b64, cutoff=None) def password_is_correct(password): return password in [os.environ["PASSWORD"], os.environ["ROOT_PASSWORD"]] def input_is_ready(password, audio_file, url): return password_is_correct(password) and (audio_file or url) def run(): st.markdown( "", unsafe_allow_html=True, ) submit_button = False if "is_expanded" not in st.session_state: st.session_state["is_expanded"] = True # expander = st.expander("Einstellungen", expanded=st.session_state["is_expanded"]) # with expander: password = st.text_input("Zugriffscode (siehe oben)") url = audio_file = None col1, col2 = st.columns([1, 3]) type = col1.radio( "Input-Typ", ["Beispiel (Kabarett)", "URL (YouTube, …)", "Datei-Upload"], label_visibility="hidden", ) if type == "Beispiel (Kabarett)": url = col2.text_input( "URL (e.g. YouTube video, Dropbox file, etc.)", value="https://www.youtube.com/watch?v=6UONiGMmbS4", disabled=True, ) elif type == "URL (YouTube, …)": url = col2.text_input( "URL (e.g. YouTube video, Dropbox file, etc.)", value="", ) else: audio_file = col2.file_uploader( "Datei auswählen", type=[".wav", ".mp3", ".flac", ".m4a", ".ogg"] ) submit_button = col2.button( label="⚡ Transkribieren" + (" (Zugriffscode inkorrekt)" if not password_is_correct(password) else ""), disabled=(not password_is_correct(password) or (not audio_file and not url)), ) cutoff = audio_b64 = None if audio_file or url: with st.expander(("Audio" if audio_file else "Video") + " abspielen"): if audio_file: st.audio(audio_file) cutoff = None if password == os.environ["ROOT_PASSWORD"] else 60_000 audio_file = AudioSegment.from_file(audio_file)[:cutoff] audio_b64 = base64.b64encode(audio_file.export().read()).decode("ascii") if url: st.video(url) if input_is_ready(password, audio_file, url) and submit_button: # my_bar = st.progress(0) # for percent_complete in range(100): # time.sleep(1) # my_bar.progress(percent_complete + 1) with st.spinner("Transkription läuft..."): transcription = transcribe(url, audio_b64, cutoff) col1, col2 = st.columns([1, 1]) col1.download_button( label="Transkript herunterladen", data=transcription["text"], file_name="transkript.txt", mime="text/plain", ) col2.download_button( label="OTR-Datei herunterladen", data=json.dumps(transcription["otr"], indent=2, ensure_ascii=False), file_name="transkript.otr", mime="application/json", ) st.text_area("Transkript", transcription["text"], height=300) try: run() except Exception as e: logger.error(e) st.error( "Leider ist ein unerwarter Fehler aufgetreten. Ich kann mir das Problem sofort ansehen, Sie erreichen mich unter alexander.seifert@gmail.com" )