Spaces:

poemsforaphrodite
/

transcribe

Sleeping

App Files Files Community

poemsforaphrodite commited on Sep 24

Commit

cf30482

•

1 Parent(s): 9d66ad1

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -51

app.py CHANGED Viewed

@@ -1,70 +1,60 @@
 import os
 import json
 import torch
-from tqdm import tqdm  # Progress bar
 import whisper
-def transcribe_audio(audio_path, model):
     """
     Transcribe a single audio file using OpenAI's Whisper model locally.
-    Args:
-        audio_path (str): Path to the audio file.
-        model (whisper.Whisper): Loaded Whisper model.
-    Returns:
-        str: Continuous string of transcribed text.
-    """
-    # Perform transcription
-    result = model.transcribe(audio_path)
-    # Extract the transcribed text
-    transcriptions = result["text"].strip()
-    return transcriptions
-def transcribe_all_audios(directory, output_json, model_size="large"):
-    """
-    Transcribe all audio files in a directory and save the transcriptions to a JSON file.
-    Args:
-        directory (str): Directory containing audio files.
-        output_json (str): Path to the output JSON file.
-        model_size (str): Size of the Whisper model to load. Options: tiny, base, small, medium, large.
-    """
-    transcriptions = {}
-    # Check if CUDA is available
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    print(f"Using device: {device}")
-    # Load the Whisper model
-    print(f"Loading Whisper model '{model_size}'...")
-    model = whisper.load_model(model_size, device=device)
-    print("Model loaded successfully.")
-    # Walk through the directory to find all audio files, including subdirectories
-    audio_files = [
-        os.path.join(root, file)
-        for root, dirs, files in os.walk(directory)
-        for file in files
-        if file.lower().endswith((".wav", ".mp3", ".m4a", ".flac", ".aac"))
-    ]
-    for file_path in tqdm(audio_files, desc="Transcribing Audio files"):
-        file_name = os.path.basename(file_path)
-        print(f"Transcribing: {file_path}")
-        transcription = transcribe_audio(file_path, model)
-        transcriptions[file_name] = transcription
-    # Save the transcriptions to a JSON file
-    with open(output_json, "w", encoding='utf-8') as f:
-        json.dump(transcriptions, f, ensure_ascii=False, indent=4)
-    print(f"Transcriptions saved to {output_json}")
 if __name__ == "__main__":
-    directory = "wav"  # Ensure the input directory is "wav"
-    output_json = "transcriptions.json"
-    model_size = "large"
-    transcribe_all_audios(directory, output_json, model_size)

 import os
 import json
 import torch
 import whisper
+import streamlit as st
+from tempfile import NamedTemporaryFile
+def transcribe_audio(audio_file, model):
     """
     Transcribe a single audio file using OpenAI's Whisper model locally.
+    """
+    result = model.transcribe(audio_file)
+    return result["text"].strip()
+def main():
+    st.title("Audio Transcription with Whisper")
+    # File uploader
+    uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "m4a", "flac", "aac"])
+    if uploaded_file is not None:
+        # Display audio file details
+        file_details = {"Filename": uploaded_file.name, "FileSize": uploaded_file.size}
+        st.write(file_details)
+        # Play audio
+        st.audio(uploaded_file, format='audio/wav')
+        if st.button('Transcribe Audio'):
+            with st.spinner('Transcribing audio using Whisper large model...'):
+                # Check if CUDA is available
+                device = "cuda" if torch.cuda.is_available() else "cpu"
+                st.info(f"Using device: {device}")
+                # Load the Whisper model
+                model = whisper.load_model("large", device=device)
+                # Save uploaded file temporarily and transcribe
+                with NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file:
+                    tmp_file.write(uploaded_file.getvalue())
+                    tmp_file_path = tmp_file.name
+                transcription = transcribe_audio(tmp_file_path, model)
+                # Remove temporary file
+                os.unlink(tmp_file_path)
+                # Display transcription
+                st.subheader("Transcription:")
+                st.write(transcription)
+                # Save transcription to JSON
+                output_json = 'transcription.json'
+                with open(output_json, 'w', encoding='utf-8') as f:
+                    json.dump({uploaded_file.name: transcription}, f, ensure_ascii=False, indent=4)
+                st.success(f"Transcription saved to {output_json}")
 if __name__ == "__main__":
+    main()