Spaces:
Runtime error
Runtime error
from io import StringIO | |
import os | |
import tempfile | |
import streamlit as st | |
import json | |
import whisper_timestamped as whisper | |
import pandas as pd | |
STAMP_TYPES = {"Sentence-level": "sentence", "Word-level": "word"} | |
LANGUAGES = {"English": "en", "Spanish": "es"} | |
MODEL_SIZES = {"Medium": "medium", "Large": "large"} | |
def save_temp_file(file): | |
temp_dir = tempfile.gettempdir() | |
temp_file_path = os.path.join(temp_dir, file.name) | |
with open(temp_file_path, "wb") as temp_file: | |
temp_file.write(file.getvalue()) | |
return temp_file_path | |
def load_model(model_size: str): | |
print(f"model size : {MODEL_SIZES[model_size]}") | |
return whisper.load_model( | |
MODEL_SIZES[model_size], device="cpu", download_root="models" | |
) | |
def get_sentence_data(filename: str, timestamp_dict: dict): | |
sentence_df = pd.DataFrame( | |
columns=["Audio file", "Sentence", "Start", "End", "Duration"] | |
) | |
for sentence_i in timestamp_dict["segments"]: | |
sentence_i = pd.DataFrame( | |
{ | |
"Audio file": [filename], | |
"Sentence": [str(sentence_i["text"])], | |
"Start": [sentence_i["start"]], | |
"End": [sentence_i["end"]], | |
"Duration": [sentence_i["end"] - sentence_i["start"]], | |
} | |
) | |
sentence_df = pd.concat([sentence_df, sentence_i], ignore_index=True) | |
return sentence_df | |
def get_word_data(filename: str, timestamp_dict: dict): | |
pass | |
def get_word_data(): | |
pass | |
st.title("⏱️🧾 Timestamp generator") | |
# Audio load | |
audio_file = st.file_uploader( | |
"Load audio file to transcribe", type=["wav", "mp3"], accept_multiple_files=True | |
) | |
stamp_type, lang, size = st.columns(3) | |
with stamp_type: | |
timestamp_type = st.selectbox("Timestamp type", options=list(STAMP_TYPES.keys())) | |
with lang: | |
language = st.selectbox("Language", options=list(LANGUAGES.keys())) | |
with size: | |
model_size = st.selectbox("Model size", options=list(MODEL_SIZES.keys())) | |
# Botón para generar el timestamp | |
if st.button("Generate Timestamp", use_container_width=True): | |
with st.spinner("Loading model..."): | |
model = load_model(model_size) | |
sentences_df = pd.DataFrame() | |
for audio_i in audio_file: | |
with st.spinner(f"Processing audio: {audio_i.name}"): | |
tmp_audio = save_temp_file(audio_i) | |
tmp_audio_file = whisper.load_audio(tmp_audio) | |
timestamp_result = whisper.transcribe( | |
model, tmp_audio_file, language=LANGUAGES[language] | |
) | |
audio_i_df = get_sentence_data(audio_i.name, timestamp_result) | |
sentences_df = pd.concat([sentences_df, audio_i_df], ignore_index=True) | |
st.dataframe(sentences_df) | |
st.download_button( | |
"Save timestamps", | |
sentences_df.to_csv(index=False), | |
file_name="timestamps.csv", | |
mime="text/csv", | |
use_container_width=True, | |
) | |