Spaces:

aseifert
/

ffpub-transcription

Sleeping

File size: 4,393 Bytes

8fc7f5c
e414a67
b602cc6
8fc7f5c
a9bf4b2
b67a63e
a9bf4b2
8fc7f5c
56cc953
8fc7f5c
e6989d8
 
 
11f0950
 
 
a9bf4b2
 
 
 
a1e6550
8930d8d
994f902
b67a63e
 
 
 
a9bf4b2
 
e414a67
e6989d8
e414a67
 
 
 
 
 
a9bf4b2
c122f2d
cb19e55
c122f2d
 
 
d0fed63
e414a67
 
 
 
 
 
a9bf4b2
e414a67
 
 
999c934
acf3663
8ae6044
999c934
222d696
 
 
 
 
 
8ae6044
e414a67
 
 
 
b67a63e
 
 
 
e414a67
 
 
 
d8a8864
 
e414a67
 
11f0950
a9bf4b2
 
8930d8d
b67a63e
e414a67
5ac50f7
 
 
b67a63e
5ac50f7
 
b67a63e
 
5ac50f7
e414a67
 
 
 
 
 
 
d0fed63
e414a67
8930d8d
d0fed63
a9bf4b2
999c934
a190643
d0fed63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9bf4b2
5d9101b
 
 
 
 
11f0950
5d9101b

import base64
import json
import os

import modal
import requests
import streamlit as st
from loguru import logger
from pydub import AudioSegment

PASSWORDS = os.environ["PASSWORD"].split(",")
ROOT_PASSWORDS = os.environ["ROOT_PASSWORD"].split(",")

run_transcription = modal.Function.lookup(
    "ffpub-transcription", "run_transcription", environment_name="main"
)

st.set_page_config(page_title="Speech to Text Transcription App")


@st.cache(show_spinner=False)
def transcribe(url, audio_b64, cutoff):
    payload = {"url": url, "audio_b64": audio_b64, "cutoff": cutoff or 60_000}
    response = requests.post(
        "https://aseifert--ffpub-transcription-fastapi-app.modal.run/transcribe", json=payload
    )
    return response.json()


def password_is_correct(password):
    return password in PASSWORDS or password in ROOT_PASSWORDS


def input_is_ready(password, audio_file, url):
    return password_is_correct(password) and (audio_file or url)


def run():
    st.markdown(
        "<style>section.main > div:first-child { padding-top: 0; padding-bottom: 0; }</style>",
        unsafe_allow_html=True,
    )

    running = False
    submit_button = False
    if "is_expanded" not in st.session_state:
        st.session_state["is_expanded"] = True

    # expander = st.expander("Einstellungen", expanded=st.session_state["is_expanded"])
    # with expander:
    password = st.text_input("Zugriffscode (siehe oben)")
    url = audio_file = None

    col1, col2 = st.columns([1, 3])
    type = col1.radio(
        "Input",
        ["Beispiel (Kabarett)", "URL (YouTube, …)", "Datei-Upload"],
    )
    if type == "Beispiel (Kabarett)":
        url = col2.text_input(
            "URL (e.g. YouTube video, Dropbox file, etc.)",
            value="https://www.youtube.com/watch?v=6UONiGMmbS4",
            disabled=True,
        )
    elif type == "URL (YouTube, …)":
        url = col2.text_input(
            "URL (e.g. YouTube video, Dropbox file, etc.)",
            value="",
        )
        if "youtu" in url:
            url = url.replace("youtu.be/", "youtube.com/watch?v=")
        elif "dropbox" in url:
            url = url.replace("dl=0", "raw=1")
    else:
        audio_file = col2.file_uploader(
            "Datei auswählen", type=[".wav", ".mp3", ".flac", ".m4a", ".ogg"]
        )

    submit_button = col2.button(
        label="⚡ Transkribieren"
        + (" (Zugriffscode inkorrekt)" if not password_is_correct(password) else ""),
        disabled=(not password_is_correct(password) or (not audio_file and not url) or running),
    )

    cutoff = audio_b64 = None
    cutoff = None if password in ROOT_PASSWORDS else 60
    if audio_file or url:
        # with st.expander(("Audio" if audio_file else "Video") + " abspielen"):
        if audio_file:
            st.audio(audio_file)
            audio_file = AudioSegment.from_file(audio_file)[: cutoff * 1000 if cutoff else None]
            audio_b64 = base64.b64encode(audio_file.export().read()).decode("ascii")
        if url:
            if url == "https://www.youtube.com/watch?v=6UONiGMmbS4":
                cutoff = None
            st.video(url)

    if input_is_ready(password, audio_file, url) and submit_button:
        # my_bar = st.progress(0)
        # for percent_complete in range(100):
        #     time.sleep(1)
        #     my_bar.progress(percent_complete + 1)

        running = True
        with st.spinner("Transkription läuft..."):
            transcription = transcribe(url, audio_b64, cutoff)
            running = False

        st.text_area("Transkript", transcription["text"], height=300)

        with st.expander("⬇️ Transkript herunterladen"):
            st.download_button(
                label="⬇️ Txt-Datei herunterladen",
                data=transcription["text"],
                file_name="transkript.txt",
                mime="text/plain",
            )

            st.download_button(
                label="⬇️ OTR-Datei herunterladen",
                data=json.dumps(transcription["otr"], indent=2, ensure_ascii=False),
                file_name="transkript.otr",
                mime="application/json",
            )


try:
    run()
except Exception as e:
    logger.error(e)
    st.error(
        "Leider ist ein unerwarter Fehler aufgetreten. Ich kann mir das Problem sofort ansehen, Sie erreichen mich unter alexander@ff.pub"
    )