Spaces:

Hammad712
/

audio

Sleeping

App Files Files Community

Hammad712 commited on Dec 27, 2024

Commit

6682f41

verified ·

1 Parent(s): 85a59b8

Upload 3 files

Browse files

Files changed (4) hide show

.gitattributes +1 -0
depression_audio_model1.keras +3 -0
main.py +75 -0
requirements.txt +5 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+depression_audio_model1.keras filter=lfs diff=lfs merge=lfs -text

depression_audio_model1.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ca9cc124fa7d6d0ca747f170db61331020c0b0844b7bf296413ad56065b7edb
+size 36825812

main.py ADDED Viewed

	@@ -0,0 +1,75 @@

+from fastapi import FastAPI, UploadFile, HTTPException
+from fastapi.responses import JSONResponse
+import tensorflow as tf
+import librosa
+import numpy as np
+import uvicorn
+import os
+# Load the pre-trained model
+loaded_model = tf.keras.models.load_model('depression_audio_model1.keras')
+print("Model loaded successfully.")
+# Constants
+N_MELS = 128
+N_FFT = 2048
+HOP_LENGTH = 512
+DURATION = 10
+SAMPLE_RATE = 22050
+FIXED_SHAPE = (N_MELS, int(DURATION * SAMPLE_RATE / HOP_LENGTH))
+# Create the FastAPI app
+app = FastAPI()
+def extract_mel_spectrogram(file_path, n_mels=N_MELS, n_fft=N_FFT, hop_length=HOP_LENGTH, duration=DURATION, sample_rate=SAMPLE_RATE):
+    signal, _ = librosa.load(file_path, sr=sample_rate, duration=duration)
+    mel_spectrogram = librosa.feature.melspectrogram(y=signal, sr=sample_rate, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
+    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
+    mean = mel_spectrogram_db.mean()
+    std = mel_spectrogram_db.std()
+    if std > 0:
+        mel_spectrogram_db = (mel_spectrogram_db - mean) / std
+    else:
+        mel_spectrogram_db = mel_spectrogram_db - mean
+    if mel_spectrogram_db.shape[1] < FIXED_SHAPE[1]:
+        pad_width = FIXED_SHAPE[1] - mel_spectrogram_db.shape[1]
+        mel_spectrogram_db = np.pad(mel_spectrogram_db, ((0, 0), (0, pad_width)), mode='constant')
+    else:
+        mel_spectrogram_db = mel_spectrogram_db[:, :FIXED_SHAPE[1]]
+    return mel_spectrogram_db
+def inference(file_path):
+    mel_spectrogram_db = extract_mel_spectrogram(file_path)
+    mel_spectrogram_db = mel_spectrogram_db.reshape(1, *mel_spectrogram_db.shape)  # Add batch dimension
+    prediction = loaded_model.predict(mel_spectrogram_db)
+    predicted_label = np.argmax(prediction, axis=-1)
+    return int(predicted_label[0])
+@app.post("/predict")
+async def predict(file: UploadFile):
+    try:
+        # Check file type
+        if not file.filename.endswith(('.wav', '.mp3')):
+            raise HTTPException(status_code=400, detail="Invalid file type. Please upload an audio file.")
+        # Save uploaded file to a temporary location
+        temp_file_path = f"temp_{file.filename}"
+        with open(temp_file_path, "wb") as temp_file:
+            temp_file.write(await file.read())
+        # Perform inference
+        predicted_label = inference(temp_file_path)
+        # Remove temporary file
+        os.remove(temp_file_path)
+        return JSONResponse(content={"prediction": predicted_label})
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error during prediction: {str(e)}")
+# Run the application if the script is executed directly
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+fastapi
+uvicorn
+tensorflow
+librosa
+numpy