gunnerforlife52 Claude commited on
Commit
e64f574
Β·
1 Parent(s): 78680b9

Fix: Improve model loading and audio processing

Browse files

Major changes:
- Changed device=0 to device_map="auto" for HF Spaces compatibility
- Increased audio cap from 30 to 60 seconds (Whisper may chunk internally)
- Added comprehensive error handling and debugging
- Improved response parsing with multiple fallback methods
- Added detailed error traceback for troubleshooting

This should fix issues with model not responding to queries.

πŸ€– Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +23 -7
app.py CHANGED
@@ -36,13 +36,14 @@ def load_model():
36
  pipe = transformers.pipeline(
37
  model=MODEL_ID,
38
  trust_remote_code=True, # required for Shuka custom pipeline
39
- device=0,
40
  torch_dtype="bfloat16",
41
  )
42
  print("βœ… Pipeline loaded successfully!")
43
  return "βœ… Model pipeline loaded successfully!"
44
  except Exception as e:
45
- err = f"❌ Error loading model: {e}"
 
46
  print(err)
47
  return err
48
 
@@ -89,8 +90,9 @@ def load_audio_from_gradio(audio_input):
89
  audio = librosa.resample(audio, orig_sr=sr, target_sr=TARGET_SR)
90
  sr = TARGET_SR
91
 
92
- # Safety cap at 30 seconds (mel features limited to 3000)
93
- max_sec = 30
 
94
  if len(audio) / float(sr) > max_sec:
95
  audio = audio[: int(max_sec * sr)]
96
 
@@ -143,10 +145,24 @@ def analyze_audio(audio_file, system_prompt):
143
  {"audio": audio, "turns": turns, "sampling_rate": sr},
144
  max_new_tokens=512,
145
  )
146
- text = out[0].get("generated_text", str(out)) if isinstance(out, list) and out else str(out)
 
 
 
 
 
 
 
 
 
 
 
147
  return f"βœ… Processed.\n\n{text}"
148
  except Exception as e:
149
- return f"❌ Inference error: {e}"
 
 
 
150
 
151
 
152
  # ---------------------------
@@ -162,7 +178,7 @@ with gr.Blocks(title="Shuka v1 (8.73B) β€” Audio Analyzer", theme=gr.themes.Soft
162
 
163
  **Shuka** is a multilingual audio-language model with strong capabilities in **11 Indic languages** including Hindi, Bengali, Tamil, Telugu, Marathi, Gujarati, Kannada, Malayalam, Punjabi, Odia, and Assamese.
164
 
165
- ⚠️ **Note:** Audio is automatically capped at 30 seconds due to model constraints.
166
  """)
167
 
168
  with gr.Row():
 
36
  pipe = transformers.pipeline(
37
  model=MODEL_ID,
38
  trust_remote_code=True, # required for Shuka custom pipeline
39
+ device_map="auto", # Use auto device mapping for HF Spaces
40
  torch_dtype="bfloat16",
41
  )
42
  print("βœ… Pipeline loaded successfully!")
43
  return "βœ… Model pipeline loaded successfully!"
44
  except Exception as e:
45
+ import traceback
46
+ err = f"❌ Error loading model: {e}\n\n{traceback.format_exc()}"
47
  print(err)
48
  return err
49
 
 
90
  audio = librosa.resample(audio, orig_sr=sr, target_sr=TARGET_SR)
91
  sr = TARGET_SR
92
 
93
+ # Safety cap at 60 seconds to allow for longer queries
94
+ # Note: Whisper encoder has mel features limit, but may chunk internally
95
+ max_sec = 60
96
  if len(audio) / float(sr) > max_sec:
97
  audio = audio[: int(max_sec * sr)]
98
 
 
145
  {"audio": audio, "turns": turns, "sampling_rate": sr},
146
  max_new_tokens=512,
147
  )
148
+ # Debug: print raw output
149
+ print(f"Raw output type: {type(out)}")
150
+ print(f"Raw output: {out}")
151
+
152
+ # Extract text from response
153
+ if isinstance(out, list) and len(out) > 0:
154
+ text = out[0].get("generated_text", str(out[0]))
155
+ elif isinstance(out, dict):
156
+ text = out.get("generated_text", str(out))
157
+ else:
158
+ text = str(out)
159
+
160
  return f"βœ… Processed.\n\n{text}"
161
  except Exception as e:
162
+ import traceback
163
+ error_details = traceback.format_exc()
164
+ print(f"Full error: {error_details}")
165
+ return f"❌ Inference error: {e}\n\nDetails:\n{error_details}"
166
 
167
 
168
  # ---------------------------
 
178
 
179
  **Shuka** is a multilingual audio-language model with strong capabilities in **11 Indic languages** including Hindi, Bengali, Tamil, Telugu, Marathi, Gujarati, Kannada, Malayalam, Punjabi, Odia, and Assamese.
180
 
181
+ ⚠️ **Note:** Audio is automatically capped at 60 seconds. For best results, use clear audio recordings.
182
  """)
183
 
184
  with gr.Row():