Spaces:

DataMine
/

Adhan_prep

Sleeping

DataMine commited on Oct 10, 2024

Commit

3ef02b3

verified ·

1 Parent(s): b54a482

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -67,21 +67,29 @@ from io import BytesIO
 import librosa
 # Updated function for Streamlit-compatible audio processing
-import soundfile as sf
 from io import BytesIO
-# Update the function to handle both file paths and Streamlit-uploaded files
 def get_audio_embedding(file_input):
-    # If the input is a Streamlit-uploaded file, convert it to bytes and then load using librosa
     if not isinstance(file_input, str):
-        # Streamlit uploads the file as an in-memory file-like object, so we convert it into bytes
         file_input = BytesIO(file_input.read())
-    # Load the audio using librosa
-    audio, sr = librosa.load(file_input, sr=16000)
     # Convert audio to embeddings using Wav2Vec2
-    inputs = feature_extractor(audio, sampling_rate=sr, return_tensors="pt", padding=True)
     with torch.no_grad():
         embeddings = model(**inputs).last_hidden_state.mean(dim=1)
     return embeddings

 import librosa
 # Updated function for Streamlit-compatible audio processing
 from io import BytesIO
+from pydub import AudioSegment
 def get_audio_embedding(file_input):
+    # Convert Streamlit file input to BytesIO if it's not a string path
     if not isinstance(file_input, str):
         file_input = BytesIO(file_input.read())
+    # Convert the audio to WAV format using pydub (supports various formats like mp3, m4a, etc.)
+    try:
+        # Read the audio file using pydub and convert to WAV format
+        audio = AudioSegment.from_file(file_input)
+        wav_io = BytesIO()
+        audio.export(wav_io, format="wav")
+        wav_io.seek(0)  # Move back to the start of the BytesIO object
+        # Load the converted WAV file using librosa
+        audio_data, sr = librosa.load(wav_io, sr=16000)
+    except Exception as e:
+        raise ValueError(f"Failed to process the audio file: {str(e)}")
     # Convert audio to embeddings using Wav2Vec2
+    inputs = feature_extractor(audio_data, sampling_rate=sr, return_tensors="pt", padding=True)
     with torch.no_grad():
         embeddings = model(**inputs).last_hidden_state.mean(dim=1)
     return embeddings