Spaces:

earthtoolsmaker
/

forest-elephant-rumbles-detection

Running

App Files Files Community

achouffe commited on Nov 6

Commit

481da55

•

1 Parent(s): ded1e7b

feat: use Gallery element to show predictions

Browse files

Files changed (3) hide show

app.py +11 -24
requirements.txt +1 -0
utils.py +1 -1

app.py CHANGED Viewed

@@ -34,7 +34,7 @@ def interface_fn(
     model: YOLO,
     audio_filepath: str,
     config_model: dict[str, float | int],
-) -> Tuple[Image.Image, pd.DataFrame, str]:
     """
     Main interface function that runs the model on the provided audio_filepath and
     returns the exepected tuple to populate the gradio interface.
@@ -60,6 +60,8 @@ def interface_fn(
         overlap=overlap,
     )
     yolov8_predictions = inference(
         model=model,
         audio_filepath=Path(audio_filepath),
@@ -99,28 +101,12 @@ def interface_fn(
     spectrograms_pil_images = [Image.fromarray(a) for a in spectrograms_array_images]
-    array_image = waveform_to_np_image(
-        waveform=waveforms[0],
-        sample_rate=sample_rate,
-        n_fft=config_model["n_fft"],
-        hop_length=config_model["hop_length"],
-        freq_max=config_model["freq_max"],
-        width=config_model["width"],
-        height=config_model["height"],
-    )
     predictions = model.predict(spectrograms_pil_images)
-    pil_image_spectrogram_with_prediction = Image.fromarray(
-        bgr_to_rgb(predictions[0].plot())
-    )
-    for i in range(1, len(predictions)):
-        pil_image_spectrogram_with_prediction = get_concat_v(
-            pil_image_spectrogram_with_prediction,
-            Image.fromarray(bgr_to_rgb(predictions[i].plot())),
-        )
-    return (pil_image_spectrogram_with_prediction, df, prediction_to_str(df=df))
 def examples(dir_examples: Path) -> list[Path]:
@@ -144,6 +130,7 @@ MODEL_FILEPATH_WEIGHTS = Path("data/model/weights/best.pt")
 MODEL_FILEPTAH_CONFIG = Path("data/model/config.yaml")
 DIR_EXAMPLES = Path("data/sounds/raw")
 DEFAULT_VALUE_INDEX = 0
 with gr.Blocks() as demo:
     model = load_model(MODEL_FILEPATH_WEIGHTS)
@@ -157,10 +144,10 @@ with gr.Blocks() as demo:
         type="filepath",
         label="input audio",
     )
-    output_image = gr.Image(type="pil", label="model prediction")
     output_raw = gr.Text(label="raw prediction")
     output_dataframe = gr.DataFrame(
-        headers=["t_start", "t_end", "freq_start", "freq_end", "probability"],
         label="prediction as CSV",
     )
@@ -173,7 +160,7 @@ with gr.Blocks() as demo:
         title="ML model for forest elephant rumble detection 🐘",
         fn=fn,
         inputs=input,
-        outputs=[output_image, output_dataframe, output_raw],
         examples=sound_filepaths,
         flagging_mode="never",
     )

     model: YOLO,
     audio_filepath: str,
     config_model: dict[str, float | int],
+) -> Tuple[list[Image.Image], pd.DataFrame, str]:
     """
     Main interface function that runs the model on the provided audio_filepath and
     returns the exepected tuple to populate the gradio interface.
         overlap=overlap,
     )
+    print(f"waveforms: {waveforms}")
     yolov8_predictions = inference(
         model=model,
         audio_filepath=Path(audio_filepath),
     spectrograms_pil_images = [Image.fromarray(a) for a in spectrograms_array_images]
     predictions = model.predict(spectrograms_pil_images)
+    pil_image_spectrogram_with_predictions = [
+        Image.fromarray(bgr_to_rgb(p.plot())) for p in predictions
+    ]
+    return (pil_image_spectrogram_with_predictions, df[CSV_COLUMNS], prediction_to_str(df=df))
 def examples(dir_examples: Path) -> list[Path]:
 MODEL_FILEPTAH_CONFIG = Path("data/model/config.yaml")
 DIR_EXAMPLES = Path("data/sounds/raw")
 DEFAULT_VALUE_INDEX = 0
+CSV_COLUMNS = ["t_start", "t_end", "freq_start", "freq_end", "probability"]
 with gr.Blocks() as demo:
     model = load_model(MODEL_FILEPATH_WEIGHTS)
         type="filepath",
         label="input audio",
     )
+    output_gallery = gr.Gallery(label="model predictions")
     output_raw = gr.Text(label="raw prediction")
     output_dataframe = gr.DataFrame(
+        headers=CSV_COLUMNS,
         label="prediction as CSV",
     )
         title="ML model for forest elephant rumble detection 🐘",
         fn=fn,
         inputs=input,
+        outputs=[output_gallery, output_dataframe, output_raw],
         examples=sound_filepaths,
         flagging_mode="never",
     )

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 gradio==5.4.*
 torch==2.5.*
 torchaudio==2.5.*
 torchvision==0.20.*

 gradio==5.4.*
+pandas==2.2.*
 torch==2.5.*
 torchaudio==2.5.*
 torchvision==0.20.*

utils.py CHANGED Viewed

@@ -49,7 +49,7 @@ def chunk(
     total_seconds = waveform.shape[1] / sample_rate
     number_spectrograms = total_seconds / (duration - overlap)
     offsets = [
-        idx * (duration - overlap) for idx in range(0, math.floor(number_spectrograms))
     ]
     return [
         clip(

     total_seconds = waveform.shape[1] / sample_rate
     number_spectrograms = total_seconds / (duration - overlap)
     offsets = [
+        idx * (duration - overlap) for idx in range(0, math.ceil(number_spectrograms))
     ]
     return [
         clip(