transcribe / app.py
poemsforaphrodite's picture
Update app.py
cf30482 verified
raw
history blame
2.13 kB
import os
import json
import torch
import whisper
import streamlit as st
from tempfile import NamedTemporaryFile
def transcribe_audio(audio_file, model):
"""
Transcribe a single audio file using OpenAI's Whisper model locally.
"""
result = model.transcribe(audio_file)
return result["text"].strip()
def main():
st.title("Audio Transcription with Whisper")
# File uploader
uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "m4a", "flac", "aac"])
if uploaded_file is not None:
# Display audio file details
file_details = {"Filename": uploaded_file.name, "FileSize": uploaded_file.size}
st.write(file_details)
# Play audio
st.audio(uploaded_file, format='audio/wav')
if st.button('Transcribe Audio'):
with st.spinner('Transcribing audio using Whisper large model...'):
# Check if CUDA is available
device = "cuda" if torch.cuda.is_available() else "cpu"
st.info(f"Using device: {device}")
# Load the Whisper model
model = whisper.load_model("large", device=device)
# Save uploaded file temporarily and transcribe
with NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file:
tmp_file.write(uploaded_file.getvalue())
tmp_file_path = tmp_file.name
transcription = transcribe_audio(tmp_file_path, model)
# Remove temporary file
os.unlink(tmp_file_path)
# Display transcription
st.subheader("Transcription:")
st.write(transcription)
# Save transcription to JSON
output_json = 'transcription.json'
with open(output_json, 'w', encoding='utf-8') as f:
json.dump({uploaded_file.name: transcription}, f, ensure_ascii=False, indent=4)
st.success(f"Transcription saved to {output_json}")
if __name__ == "__main__":
main()