Spaces:

nahue-passano
/

librispeech-corpus-generator

Runtime error

App Files Files Community

nahue-passano commited on Jul 17, 2023

Commit

9bdb941

•

1 Parent(s): 80f5b87

update: main streamlit app

Browse files

Files changed (1) hide show

app.py +8 -4

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import streamlit as st
-import whisper_timestamped as whisper
 import pandas as pd
 from utils.files import (
@@ -7,7 +7,7 @@ from utils.files import (
     save_temp_file,
     compress_utterances_folder,
 )
-from utils.text import get_sentence_data, get_word_data, generate_transcriptions_splits
 from utils.audio import generate_audio_splits
 STAMP_TYPES = {"Sentence-level": "sentence", "Word-level": "word"}
@@ -62,9 +62,11 @@ def main_app():
             model = load_model(model_size)
         timestamps_df = pd.DataFrame()
         temp_dir = create_temp_directory()
         utterances_folder = temp_dir / "utterances_segments"
         utterances_folder.mkdir(exist_ok=True)
         for audio_i in audio_file:
             with st.spinner(f"Processing audio: {audio_i.name}"):
                 tmp_audio = save_temp_file(audio_i)
@@ -78,10 +80,12 @@ def main_app():
                 # Stamp level
                 if timestamp_type == "Sentence-level":
                     audio_i_df = get_sentence_data(audio_i.name, timestamp_result)
                 if timestamp_type == "Word-level":
                     audio_i_df = get_word_data(audio_i.name, timestamp_result)
                 # Timestamps in dataframe
                 timestamps_df = pd.concat(
                     [timestamps_df, audio_i_df], ignore_index=True
@@ -89,6 +93,7 @@ def main_app():
                 generate_audio_splits(tmp_audio, audio_i_df, utterances_folder)
                 generate_transcriptions_splits(tmp_audio, audio_i_df, utterances_folder)
         st.divider()
         st.markdown(
             "<h3 style='text-align: center;'>Timestamps</h3>",
@@ -119,4 +124,3 @@ def main_app():
 if __name__ == "__main__":
     main_app()

 import streamlit as st
+import whisper_transcriber as whisper
 import pandas as pd
 from utils.files import (
     save_temp_file,
     compress_utterances_folder,
 )
+from utils.text import get_sentence_data, get_word_data, generate_transcriptions_splits, check_ut_min_duration
 from utils.audio import generate_audio_splits
 STAMP_TYPES = {"Sentence-level": "sentence", "Word-level": "word"}
             model = load_model(model_size)
         timestamps_df = pd.DataFrame()
         temp_dir = create_temp_directory()
         utterances_folder = temp_dir / "utterances_segments"
         utterances_folder.mkdir(exist_ok=True)
         for audio_i in audio_file:
             with st.spinner(f"Processing audio: {audio_i.name}"):
                 tmp_audio = save_temp_file(audio_i)
                 # Stamp level
                 if timestamp_type == "Sentence-level":
                     audio_i_df = get_sentence_data(audio_i.name, timestamp_result)
+                    # Checks utterance duration
+                    audio_i_df = check_ut_min_duration(audio_i_df)
                 if timestamp_type == "Word-level":
                     audio_i_df = get_word_data(audio_i.name, timestamp_result)
                 # Timestamps in dataframe
                 timestamps_df = pd.concat(
                     [timestamps_df, audio_i_df], ignore_index=True
                 generate_audio_splits(tmp_audio, audio_i_df, utterances_folder)
                 generate_transcriptions_splits(tmp_audio, audio_i_df, utterances_folder)
         st.divider()
         st.markdown(
             "<h3 style='text-align: center;'>Timestamps</h3>",
 if __name__ == "__main__":
     main_app()