DataMine commited on
Commit
3ef02b3
·
verified ·
1 Parent(s): b54a482

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -9
app.py CHANGED
@@ -67,21 +67,29 @@ from io import BytesIO
67
  import librosa
68
 
69
  # Updated function for Streamlit-compatible audio processing
70
- import soundfile as sf
71
  from io import BytesIO
 
72
 
73
- # Update the function to handle both file paths and Streamlit-uploaded files
74
  def get_audio_embedding(file_input):
75
- # If the input is a Streamlit-uploaded file, convert it to bytes and then load using librosa
76
  if not isinstance(file_input, str):
77
- # Streamlit uploads the file as an in-memory file-like object, so we convert it into bytes
78
  file_input = BytesIO(file_input.read())
79
-
80
- # Load the audio using librosa
81
- audio, sr = librosa.load(file_input, sr=16000)
82
-
 
 
 
 
 
 
 
 
 
 
83
  # Convert audio to embeddings using Wav2Vec2
84
- inputs = feature_extractor(audio, sampling_rate=sr, return_tensors="pt", padding=True)
85
  with torch.no_grad():
86
  embeddings = model(**inputs).last_hidden_state.mean(dim=1)
87
  return embeddings
 
67
  import librosa
68
 
69
  # Updated function for Streamlit-compatible audio processing
 
70
  from io import BytesIO
71
+ from pydub import AudioSegment
72
 
 
73
  def get_audio_embedding(file_input):
74
+ # Convert Streamlit file input to BytesIO if it's not a string path
75
  if not isinstance(file_input, str):
 
76
  file_input = BytesIO(file_input.read())
77
+
78
+ # Convert the audio to WAV format using pydub (supports various formats like mp3, m4a, etc.)
79
+ try:
80
+ # Read the audio file using pydub and convert to WAV format
81
+ audio = AudioSegment.from_file(file_input)
82
+ wav_io = BytesIO()
83
+ audio.export(wav_io, format="wav")
84
+ wav_io.seek(0) # Move back to the start of the BytesIO object
85
+
86
+ # Load the converted WAV file using librosa
87
+ audio_data, sr = librosa.load(wav_io, sr=16000)
88
+ except Exception as e:
89
+ raise ValueError(f"Failed to process the audio file: {str(e)}")
90
+
91
  # Convert audio to embeddings using Wav2Vec2
92
+ inputs = feature_extractor(audio_data, sampling_rate=sr, return_tensors="pt", padding=True)
93
  with torch.no_grad():
94
  embeddings = model(**inputs).last_hidden_state.mean(dim=1)
95
  return embeddings