File size: 3,652 Bytes
8fc7f5c
e414a67
b602cc6
8fc7f5c
a9bf4b2
 
8fc7f5c
56cc953
8fc7f5c
a9bf4b2
 
 
 
 
e414a67
8930d8d
 
a9bf4b2
 
e414a67
 
 
 
 
 
 
 
a9bf4b2
c122f2d
 
 
 
 
e414a67
 
 
 
 
 
a9bf4b2
e414a67
 
 
999c934
acf3663
8ae6044
999c934
222d696
 
 
 
 
 
8ae6044
e414a67
 
 
 
 
 
 
 
d8a8864
 
e414a67
 
 
a9bf4b2
 
8930d8d
e414a67
a190643
e414a67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8930d8d
e414a67
fa89e92
 
aeb977f
a190643
9af6bee
a190643
 
e414a67
fa89e92
aeb977f
e414a67
 
 
 
a9bf4b2
999c934
a190643
a9bf4b2
5d9101b
 
 
 
 
c122f2d
5d9101b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import base64
import json
import os

import modal
import streamlit as st
from loguru import logger
from pydub import AudioSegment

run_transcription = modal.lookup("ffpub-transcription", "run_transcription")

st.set_page_config(page_title="Speech to Text Transcription App")


@st.cache(show_spinner=False)
def transcribe(url, audio_b64, cutoff):
    return run_transcription.call(url=url, audio_b64=audio_b64, cutoff=None)


def password_is_correct(password):
    return password in [os.environ["PASSWORD"], os.environ["ROOT_PASSWORD"]]


def input_is_ready(password, audio_file, url):
    return password_is_correct(password) and (audio_file or url)


def run():
    st.markdown(
        "<style>section.main > div:first-child { padding: 0}</style>",
        unsafe_allow_html=True,
    )

    submit_button = False
    if "is_expanded" not in st.session_state:
        st.session_state["is_expanded"] = True

    # expander = st.expander("Einstellungen", expanded=st.session_state["is_expanded"])
    # with expander:
    password = st.text_input("Zugriffscode (siehe oben)")
    url = audio_file = None

    col1, col2 = st.columns([1, 3])
    type = col1.radio(
        "Input",
        ["Beispiel (Kabarett)", "URL (YouTube, …)", "Datei-Upload"],
    )
    if type == "Beispiel (Kabarett)":
        url = col2.text_input(
            "URL (e.g. YouTube video, Dropbox file, etc.)",
            value="https://www.youtube.com/watch?v=6UONiGMmbS4",
            disabled=True,
        )
    elif type == "URL (YouTube, …)":
        url = col2.text_input(
            "URL (e.g. YouTube video, Dropbox file, etc.)",
            value="",
        )
    else:
        audio_file = col2.file_uploader(
            "Datei auswählen", type=[".wav", ".mp3", ".flac", ".m4a", ".ogg"]
        )

    submit_button = col2.button(
        label="⚡ Transkribieren"
        + (" (Zugriffscode inkorrekt)" if not password_is_correct(password) else ""),
        disabled=(not password_is_correct(password) or (not audio_file and not url)),
    )

    cutoff = audio_b64 = None
    if audio_file or url:
        with st.expander(("Audio" if audio_file else "Video") + " abspielen"):
            if audio_file:
                st.audio(audio_file)
                cutoff = None if password == os.environ["ROOT_PASSWORD"] else 60_000
                audio_file = AudioSegment.from_file(audio_file)[:cutoff]
                audio_b64 = base64.b64encode(audio_file.export().read()).decode("ascii")
            if url:
                st.video(url)

    if input_is_ready(password, audio_file, url) and submit_button:
        # my_bar = st.progress(0)
        # for percent_complete in range(100):
        #     time.sleep(1)
        #     my_bar.progress(percent_complete + 1)

        with st.spinner("Transkription läuft..."):
            transcription = transcribe(url, audio_b64, cutoff)

        col1, col2 = st.columns([1, 1])
        col1.download_button(
            label="⬇️ Transkript",
            data=transcription["text"],
            file_name="transkript.txt",
            mime="text/plain",
        )

        col2.download_button(
            label="⬇️ OTR-Datei",
            data=json.dumps(transcription["otr"], indent=2, ensure_ascii=False),
            file_name="transkript.otr",
            mime="application/json",
        )

        st.text_area("Transkript", transcription["text"], height=300)


try:
    run()
except Exception as e:
    logger.error(e)
    st.error(
        "Leider ist ein unerwarter Fehler aufgetreten. Ich kann mir das Problem sofort ansehen, Sie erreichen mich unter alexander.seifert@gmail.com"
    )