Spaces:

Nick088
/

Audio-SR

Running on Zero

App Files Files Community

Nick088 commited on 21 days ago

Commit

6e70b11

verified ·

1 Parent(s): de71e88

add dynamic zerogpu duration & catch no audio file errors

Browse files

Files changed (1) hide show

app.py +53 -3

app.py CHANGED Viewed

@@ -3,10 +3,60 @@ import gradio as gr
 from audiosr import super_resolution, build_model
 import torch
 import gc # free up memory
-@spaces.GPU(duration=120)
 def inference(audio_file, model_name, guidance_scale, ddim_steps, seed):
     audiosr = build_model(model_name=model_name)
     if torch.cuda.is_available():
@@ -45,7 +95,7 @@ iface = gr.Interface(
     ],
     outputs=gr.Audio(type="numpy", label="Output Audio"),
     title="AudioSR",
-    description="Audio Super Resolution with AudioSR"
 )
-iface.launch(share=False)

 from audiosr import super_resolution, build_model
 import torch
 import gc # free up memory
+import soundfile as sf # read audio
+import math # For dynamic gpu duration calculation
+# Estimate a dynamic gpu duration done by a private Benchmarking HuggingFace ZeroGPU (H200) Space on the 16th November 2025 for saving quota
+def get_duration(audio_file, model_name, guidance_scale, ddim_steps, seed):
+    if not audio_file:
+        return 0
+    try:
+        info = sf.info(audio_file)
+        audio_duration = info.duration
+        # 1. Base overhead for model loading (using the higher 'speech' model value).
+        base_overhead = 24  # seconds
+        # 2. Multipliers for the core ML task.
+        # From benchmark: ~11s for 8s audio @ 50 steps.
+        # Formula: (8s * C1) + (50 steps * C2) = 11s.
+        # We'll estimate C1=1.0 and C2=0.06.
+        time_per_audio_second = 1.0
+        time_per_ddim_step = 0.06
+        # 3. Calculate the estimated processing time.
+        estimated_time = base_overhead + (audio_duration * time_per_audio_second) + (ddim_steps * time_per_ddim_step)
+        # 4. Add a safety buffer to prevent unexpected timeouts.
+        safety_buffer = 10
+        calculated_duration = estimated_time + safety_buffer
+        # 5. Apply min/max constraints.
+        min_duration = 50  # Must be enough for model load + buffer
+        max_duration = 180 # Current ZeroGPU maximum duration
+        final_duration = max(min_duration, min(max_duration, calculated_duration))
+        print("FINAL DURATION", final_duration)
+        return math.ceil(final_duration)
+    except Exception as e:
+        # Fallback to a safe default duration if reading the audio fails.
+        print(f"Error in get_duration, using fallback (60): {e}")
+        return 60
+@spaces.GPU(duration=get_duration)
 def inference(audio_file, model_name, guidance_scale, ddim_steps, seed):
+    if not audio_file:
+        print("No audio file provided, skipping inference.")
+        raise gr.Error(
+            "Please upload an audio file."
+            )
     audiosr = build_model(model_name=model_name)
     if torch.cuda.is_available():
     ],
     outputs=gr.Audio(type="numpy", label="Output Audio"),
     title="AudioSR",
+    description="Audio Super Resolution with AudioSR. <br> It estimates a dynamic gpu duration done by a private Benchmarking HuggingFace ZeroGPU (H200) Space on the 16th November 2025 for saving quota."
 )
+iface.launch(share=False)