Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -67,21 +67,29 @@ from io import BytesIO
|
|
67 |
import librosa
|
68 |
|
69 |
# Updated function for Streamlit-compatible audio processing
|
70 |
-
import soundfile as sf
|
71 |
from io import BytesIO
|
|
|
72 |
|
73 |
-
# Update the function to handle both file paths and Streamlit-uploaded files
|
74 |
def get_audio_embedding(file_input):
|
75 |
-
#
|
76 |
if not isinstance(file_input, str):
|
77 |
-
# Streamlit uploads the file as an in-memory file-like object, so we convert it into bytes
|
78 |
file_input = BytesIO(file_input.read())
|
79 |
-
|
80 |
-
#
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
# Convert audio to embeddings using Wav2Vec2
|
84 |
-
inputs = feature_extractor(
|
85 |
with torch.no_grad():
|
86 |
embeddings = model(**inputs).last_hidden_state.mean(dim=1)
|
87 |
return embeddings
|
|
|
67 |
import librosa
|
68 |
|
69 |
# Updated function for Streamlit-compatible audio processing
|
|
|
70 |
from io import BytesIO
|
71 |
+
from pydub import AudioSegment
|
72 |
|
|
|
73 |
def get_audio_embedding(file_input):
|
74 |
+
# Convert Streamlit file input to BytesIO if it's not a string path
|
75 |
if not isinstance(file_input, str):
|
|
|
76 |
file_input = BytesIO(file_input.read())
|
77 |
+
|
78 |
+
# Convert the audio to WAV format using pydub (supports various formats like mp3, m4a, etc.)
|
79 |
+
try:
|
80 |
+
# Read the audio file using pydub and convert to WAV format
|
81 |
+
audio = AudioSegment.from_file(file_input)
|
82 |
+
wav_io = BytesIO()
|
83 |
+
audio.export(wav_io, format="wav")
|
84 |
+
wav_io.seek(0) # Move back to the start of the BytesIO object
|
85 |
+
|
86 |
+
# Load the converted WAV file using librosa
|
87 |
+
audio_data, sr = librosa.load(wav_io, sr=16000)
|
88 |
+
except Exception as e:
|
89 |
+
raise ValueError(f"Failed to process the audio file: {str(e)}")
|
90 |
+
|
91 |
# Convert audio to embeddings using Wav2Vec2
|
92 |
+
inputs = feature_extractor(audio_data, sampling_rate=sr, return_tensors="pt", padding=True)
|
93 |
with torch.no_grad():
|
94 |
embeddings = model(**inputs).last_hidden_state.mean(dim=1)
|
95 |
return embeddings
|