Spaces:

nahue-passano
/

librispeech-corpus-generator

Runtime error

App Files Files Community

nahue-passano commited on Jul 5, 2023

Commit

7fc9bd4

•

1 Parent(s): d4a3c16

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -7

app.py CHANGED Viewed

@@ -46,7 +46,20 @@ def get_sentence_data(filename: str, timestamp_dict: dict):
 def get_word_data(filename: str, timestamp_dict: dict):
-    pass
 st.set_page_config(layout="wide")
@@ -68,11 +81,11 @@ with lang:
 with size:
     model_size = st.selectbox("Model size", options=list(MODEL_SIZES.keys()))
-# Botón para generar el timestamp
 if st.button("Generate Timestamp", use_container_width=True):
     with st.spinner("Loading model..."):
         model = load_model(model_size)
-    sentences_df = pd.DataFrame()
     for audio_i in audio_file:
         with st.spinner(f"Processing audio: {audio_i.name}"):
             tmp_audio = save_temp_file(audio_i)
@@ -80,14 +93,17 @@ if st.button("Generate Timestamp", use_container_width=True):
             timestamp_result = whisper.transcribe(
                 model, tmp_audio_file, language=LANGUAGES[language]
             )
-            audio_i_df = get_sentence_data(audio_i.name, timestamp_result)
-            sentences_df = pd.concat([sentences_df, audio_i_df], ignore_index=True)
-    st.dataframe(sentences_df)
     st.download_button(
         "Save timestamps",
-        sentences_df.to_csv(index=False),
         file_name="timestamps.csv",
         mime="text/csv",
         use_container_width=True,

 def get_word_data(filename: str, timestamp_dict: dict):
+    word_df = pd.DataFrame(columns=["Audio file", "Word", "Start", "End", "Duration"])
+    for sentence_i in timestamp_dict["segments"]:
+        for word_i in sentence_i["words"]:
+            word_i_df = pd.DataFrame(
+                {
+                    "Audio file": [filename],
+                    "Word": [str(word_i["text"])],
+                    "Start": [word_i["start"]],
+                    "End": [word_i["end"]],
+                    "Duration": [word_i["end"] - word_i["start"]],
+                }
+            )
+            word_df = pd.concat([word_df, word_i_df], ignore_index=True)
+    return word_df
 st.set_page_config(layout="wide")
 with size:
     model_size = st.selectbox("Model size", options=list(MODEL_SIZES.keys()))
 if st.button("Generate Timestamp", use_container_width=True):
     with st.spinner("Loading model..."):
         model = load_model(model_size)
+    timestamps_df = pd.DataFrame()
     for audio_i in audio_file:
         with st.spinner(f"Processing audio: {audio_i.name}"):
             tmp_audio = save_temp_file(audio_i)
             timestamp_result = whisper.transcribe(
                 model, tmp_audio_file, language=LANGUAGES[language]
             )
+            if timestamp_type == "Sentence-level":
+                audio_i_df = get_sentence_data(audio_i.name, timestamp_result)
+            if timestamp_type == "Word-level":
+                audio_i_df = get_word_data(audio_i.name, timestamp_result)
+            timestamps_df = pd.concat([timestamps_df, audio_i_df], ignore_index=True)
+    st.dataframe(timestamps_df)
     st.download_button(
         "Save timestamps",
+        timestamps_df.to_csv(index=False),
         file_name="timestamps.csv",
         mime="text/csv",
         use_container_width=True,