Mr7Explorer commited on
Commit
42ea287
Β·
verified Β·
1 Parent(s): 8186a73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -78
app.py CHANGED
@@ -10,7 +10,7 @@ import soundfile as sf
10
  import matplotlib.pyplot as plt
11
  import scipy.signal as sps
12
 
13
- # Local modules
14
  from io_utils import read_audio_info
15
  from time_domain import compute_time_domain_stats
16
  from spectral import compute_spectral_analysis
@@ -25,90 +25,90 @@ from report_generator import create_report
25
  # ============================================================
26
 
27
  def analyze_audio(audio_file, progress=gr.Progress()):
28
- """Full forensic audio analysis pipeline."""
29
-
30
  if audio_file is None:
31
  return None, "⚠️ Please upload an audio file."
32
 
33
  try:
34
  path = Path(audio_file)
35
 
36
- # -----------------------------
37
  # FILE INFO + LOAD AUDIO
38
- # -----------------------------
39
- progress(0.10, desc="Reading file info...")
40
  info = read_audio_info(str(path))
41
 
42
  progress(0.25, desc="Loading waveform...")
43
  y, sr = librosa.load(str(path), sr=None, mono=True)
44
 
45
- # -----------------------------
46
- # TIME DOMAIN
47
- # -----------------------------
48
  progress(0.35, desc="Time-domain analysis...")
49
  time_stats = compute_time_domain_stats(y)
50
 
51
- # -----------------------------
52
  # SPECTRAL ANALYSIS
53
- # -----------------------------
54
  progress(0.50, desc="Spectral analysis...")
55
  spectral = compute_spectral_analysis(y, sr)
56
 
57
- # -----------------------------
58
  # LOUDNESS
59
- # -----------------------------
60
- progress(0.60, desc="Computing loudness...")
61
  lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None
62
 
63
- # -----------------------------
64
  # ISSUE DETECTION
65
- # -----------------------------
66
- progress(0.70, desc="Detecting anomalies...")
67
  issues = detect_audio_issues(spectral, time_stats)
68
 
69
- # -----------------------------
70
- # SYNTHETIC VOICE (INFO ONLY)
71
- # -----------------------------
72
- progress(0.78, desc="Synthetic voice estimation...")
73
  synthetic = detect_synthetic_voice(y, sr, spectral)
74
 
75
  # ======================================================
76
- # SCORING β€” CLEAN VERSION (NO DUPLICATE ASSIGNMENT)
77
  # ======================================================
78
- progress(0.82, desc="Scoring...")
79
 
80
  critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL")
81
- high = sum(1 for _, sev, _ in issues if sev == "HIGH")
82
- medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM")
83
- low = sum(1 for _, sev, _ in issues if sev == "LOW")
84
 
85
  score_value = 100 - (critical * 30) - (high * 15) - (medium * 5)
86
  score_value = max(0, score_value)
87
 
 
88
  if score_value >= 90:
89
  grade, quality = "A", "EXCELLENT"
90
- color = "#b3ffb3"
91
  recommendation = "Excellent for TTS dataset"
92
  elif score_value >= 75:
93
  grade, quality = "B", "GOOD"
94
  color = "#ccffcc"
95
- recommendation = "Very good quality; suitable for TTS"
96
  elif score_value >= 60:
97
  grade, quality = "C", "FAIR"
98
  color = "#fff6b3"
99
- recommendation = "Usable but may contain processing artifacts"
100
  elif score_value >= 40:
101
  grade, quality = "D", "POOR"
102
  color = "#ffd9b3"
103
- recommendation = "Not recommended for TTS (heavy processing)"
104
  else:
105
  grade, quality = "F", "CRITICAL"
106
  color = "#ffb3b3"
107
- recommendation = "Severely degraded or processed; avoid for TTS"
108
 
109
  cleanliness_score = max(0, 100 - (medium * 5 + low * 3))
110
  processing_severity = (critical * 3) + (high * 2) + medium
111
 
 
112
  score_dict = {
113
  "score": score_value,
114
  "grade": grade,
@@ -124,7 +124,7 @@ def analyze_audio(audio_file, progress=gr.Progress()):
124
  }
125
 
126
  # ======================================================
127
- # BUILD FINAL AUDIO_DATA PAYLOAD (SAFE ORDER)
128
  # ======================================================
129
  audio_data = {
130
  "filename": path.name,
@@ -139,29 +139,27 @@ def analyze_audio(audio_file, progress=gr.Progress()):
139
  }
140
 
141
  # ======================================================
142
- # REPORT GENERATION
143
  # ======================================================
144
- progress(0.92, desc="Rendering PNG report...")
145
-
146
- output_dir = Path("reports")
147
- output_dir.mkdir(exist_ok=True)
148
 
149
- output_file = output_dir / f"{path.stem}_report.png"
 
 
150
 
151
  create_report(audio_data, str(output_file))
152
 
153
  # ======================================================
154
- # MARKDOWN SUMMARY (CORRECTED)
155
  # ======================================================
156
 
157
- progress(1.00, desc="Complete!")
158
-
159
  s = score_dict
 
160
 
161
  md = f"""
162
- # 🎡 Audio Forensic Analysis β€” Summary Report
163
 
164
- ## πŸ“ File
165
  - **Name:** `{audio_data['filename']}`
166
  - **Duration:** {info['duration']:.2f}s
167
  - **Sample Rate:** {info['samplerate']} Hz
@@ -169,7 +167,21 @@ def analyze_audio(audio_file, progress=gr.Progress()):
169
 
170
  ---
171
 
172
- ## πŸ§ͺ Quality Assessment
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  - **Score:** {s['score']}/100
174
  - **Grade:** {s['grade']}
175
  - **Quality:** {s['quality']}
@@ -177,24 +189,42 @@ def analyze_audio(audio_file, progress=gr.Progress()):
177
 
178
  ---
179
 
180
- ## πŸ”§ Time-Domain Metrics
181
  | Metric | Value |
182
  |--------|--------|
183
- | Peak | {time_stats['peak_db']:.2f} dBFS |
184
- | RMS | {time_stats['rms_db']:.2f} dBFS |
185
- | Crest | {time_stats['crest_factor_db']:.2f} dB |
 
186
  | SNR | {time_stats['snr_db']:.1f} dB |
187
  | ZCR | {time_stats['zero_crossing_rate']:.4f} |
188
 
189
- """
190
 
191
- if lufs is not None:
192
- md += f"- **Integrated LUFS:** {lufs:.2f} LUFS\n"
 
 
 
 
 
193
 
194
- md += f"""
195
  ---
196
 
197
- ## πŸ€– Synthetic Voice (Informational Only)
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  - **Probability:** {synthetic['synthetic_probability']:.3f}
199
  - **Label:** {synthetic['synthetic_label']}
200
 
@@ -204,16 +234,16 @@ def analyze_audio(audio_file, progress=gr.Progress()):
204
  """
205
 
206
  if issues:
207
- icon_map = {"CRITICAL": "πŸ”΄", "HIGH": "🟠", "MEDIUM": "🟑", "LOW": "🟒"}
208
  for issue, sev, desc in issues:
209
- md += f"- {icon_map.get(sev,'βšͺ')} **[{sev}] {issue}** β€” {desc}\n"
210
  else:
211
- md += "- βœ… No significant issues detected\n"
212
 
213
  md += f"""
214
  ---
215
 
216
- πŸ“Š **PNG Report Saved:** `{output_file.name}`
217
  πŸ•’ Generated: {audio_data['timestamp']}
218
  """
219
 
@@ -225,6 +255,7 @@ def analyze_audio(audio_file, progress=gr.Progress()):
225
  return None, f"# ❌ Analysis Failed\n{str(e)}"
226
 
227
 
 
228
  # ============================================================
229
  # GRADIO UI
230
  # ============================================================
@@ -232,41 +263,35 @@ def analyze_audio(audio_file, progress=gr.Progress()):
232
  with gr.Blocks(title="Audio Forensic Analyzer", theme="soft") as demo:
233
 
234
  gr.Markdown("""
235
- # 🎧 Audio Forensic Analyzer
236
- Upload an audio file to generate a forensic-quality report including:
237
- - HF/LF analysis
238
  - Filtering detection (HPF / LPF / Brickwall)
239
  - Noise reduction artifacts
240
- - Compression indicators
241
  - Spectral notches
242
- - Loudness (LUFS)
243
- - Synthetic voice estimation
244
- The tool outputs a **PNG forensic report** + **Summary Markdown**.
245
  """)
246
 
247
  with gr.Row():
248
  with gr.Column(scale=1):
249
- audio_input = gr.Audio(
250
- label="πŸ“ Upload Audio",
251
- type="filepath"
252
- )
253
- analyze_button = gr.Button("πŸ” Analyze Audio", variant="primary")
254
 
255
  with gr.Column(scale=2):
256
- png_output = gr.Image(
257
- label="πŸ“Š Forensic Report (PNG)",
258
- type="filepath",
259
- height=600
260
- )
261
 
262
- summary_output = gr.Markdown(label="πŸ“‹ Summary Report")
263
 
264
- analyze_button.click(
265
  fn=analyze_audio,
266
- inputs=[audio_input],
267
- outputs=[png_output, summary_output]
268
  )
269
 
270
 
 
271
  if __name__ == "__main__":
272
  demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
 
10
  import matplotlib.pyplot as plt
11
  import scipy.signal as sps
12
 
13
+ # Local Modules
14
  from io_utils import read_audio_info
15
  from time_domain import compute_time_domain_stats
16
  from spectral import compute_spectral_analysis
 
25
  # ============================================================
26
 
27
  def analyze_audio(audio_file, progress=gr.Progress()):
 
 
28
  if audio_file is None:
29
  return None, "⚠️ Please upload an audio file."
30
 
31
  try:
32
  path = Path(audio_file)
33
 
34
+ # ------------------------------------------------------
35
  # FILE INFO + LOAD AUDIO
36
+ # ------------------------------------------------------
37
+ progress(0.10, desc="Reading file...")
38
  info = read_audio_info(str(path))
39
 
40
  progress(0.25, desc="Loading waveform...")
41
  y, sr = librosa.load(str(path), sr=None, mono=True)
42
 
43
+ # ------------------------------------------------------
44
+ # TIME-DOMAIN ANALYSIS
45
+ # ------------------------------------------------------
46
  progress(0.35, desc="Time-domain analysis...")
47
  time_stats = compute_time_domain_stats(y)
48
 
49
+ # ------------------------------------------------------
50
  # SPECTRAL ANALYSIS
51
+ # ------------------------------------------------------
52
  progress(0.50, desc="Spectral analysis...")
53
  spectral = compute_spectral_analysis(y, sr)
54
 
55
+ # ------------------------------------------------------
56
  # LOUDNESS
57
+ # ------------------------------------------------------
58
+ progress(0.60, desc="Computing LUFS loudness...")
59
  lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None
60
 
61
+ # ------------------------------------------------------
62
  # ISSUE DETECTION
63
+ # ------------------------------------------------------
64
+ progress(0.70, desc="Detecting spectral & time issues...")
65
  issues = detect_audio_issues(spectral, time_stats)
66
 
67
+ # ------------------------------------------------------
68
+ # SYNTHETIC VOICE ESTIMATION (INFO ONLY)
69
+ # ------------------------------------------------------
70
+ progress(0.78, desc="Synthetic voice probability...")
71
  synthetic = detect_synthetic_voice(y, sr, spectral)
72
 
73
  # ======================================================
74
+ # SCORING SYSTEM (FINAL, CLEAN VERSION)
75
  # ======================================================
76
+ progress(0.82, desc="Scoring audio quality...")
77
 
78
  critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL")
79
+ high = sum(1 for _, sev, _ in issues if sev == "HIGH")
80
+ medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM")
81
+ low = sum(1 for _, sev, _ in issues if sev == "LOW")
82
 
83
  score_value = 100 - (critical * 30) - (high * 15) - (medium * 5)
84
  score_value = max(0, score_value)
85
 
86
+ # SAFE, MATPLOTLIB-COMPATIBLE COLORS
87
  if score_value >= 90:
88
  grade, quality = "A", "EXCELLENT"
89
+ color = "#b3ffb3" # soft green
90
  recommendation = "Excellent for TTS dataset"
91
  elif score_value >= 75:
92
  grade, quality = "B", "GOOD"
93
  color = "#ccffcc"
94
+ recommendation = "Good quality; suitable for TTS"
95
  elif score_value >= 60:
96
  grade, quality = "C", "FAIR"
97
  color = "#fff6b3"
98
+ recommendation = "Fair; contains noticeable processing artifacts"
99
  elif score_value >= 40:
100
  grade, quality = "D", "POOR"
101
  color = "#ffd9b3"
102
+ recommendation = "Poor quality; not recommended for TTS"
103
  else:
104
  grade, quality = "F", "CRITICAL"
105
  color = "#ffb3b3"
106
+ recommendation = "Severely degraded or heavily processed audio"
107
 
108
  cleanliness_score = max(0, 100 - (medium * 5 + low * 3))
109
  processing_severity = (critical * 3) + (high * 2) + medium
110
 
111
+ # Score Dictionary
112
  score_dict = {
113
  "score": score_value,
114
  "grade": grade,
 
124
  }
125
 
126
  # ======================================================
127
+ # ASSEMBLE FINAL audio_data PAYLOAD
128
  # ======================================================
129
  audio_data = {
130
  "filename": path.name,
 
139
  }
140
 
141
  # ======================================================
142
+ # PNG REPORT GENERATION
143
  # ======================================================
144
+ progress(0.92, desc="Rendering PNG forensic report...")
 
 
 
145
 
146
+ report_dir = Path("reports")
147
+ report_dir.mkdir(exist_ok=True)
148
+ output_file = report_dir / f"{path.stem}_report.png"
149
 
150
  create_report(audio_data, str(output_file))
151
 
152
  # ======================================================
153
+ # SUMMARY MARKDOWN OUTPUT
154
  # ======================================================
155
 
 
 
156
  s = score_dict
157
+ e = spectral["energy_distribution"]
158
 
159
  md = f"""
160
+ # 🎡 Audio Forensic Summary Report
161
 
162
+ ## πŸ“ File Information
163
  - **Name:** `{audio_data['filename']}`
164
  - **Duration:** {info['duration']:.2f}s
165
  - **Sample Rate:** {info['samplerate']} Hz
 
167
 
168
  ---
169
 
170
+ ## 🎚 Loudness (ITU-R BS.1770-3)
171
+ - **Integrated LUFS:** {lufs:.2f} LUFS
172
+ """
173
+
174
+ # Loudness Compliance
175
+ if lufs is not None:
176
+ if -25 <= lufs <= -21:
177
+ md += f" - **Status:** PASS βœ… (Compliant βˆ’23 LUFS Β±2)\n"
178
+ else:
179
+ md += f" - **Status:** FAIL ❌ (Not compliant with βˆ’23 LUFS Β±2)\n"
180
+
181
+ md += f"""
182
+ ---
183
+
184
+ ## πŸ§ͺ Audio Quality Score
185
  - **Score:** {s['score']}/100
186
  - **Grade:** {s['grade']}
187
  - **Quality:** {s['quality']}
 
189
 
190
  ---
191
 
192
+ ## πŸ”§ Time-Domain Characteristics
193
  | Metric | Value |
194
  |--------|--------|
195
+ | Peak Level | {time_stats['peak_db']:.2f} dBFS |
196
+ | RMS Level | {time_stats['rms_db']:.2f} dBFS |
197
+ | Crest Factor | {time_stats['crest_factor_db']:.2f} dB |
198
+ | Noise Floor | {time_stats['noise_floor']:.6f} |
199
  | SNR | {time_stats['snr_db']:.1f} dB |
200
  | ZCR | {time_stats['zero_crossing_rate']:.4f} |
201
 
202
+ ---
203
 
204
+ ## πŸŽ› Spectral Analysis
205
+ | Parameter | Value |
206
+ |----------|--------|
207
+ | Spectral Centroid | {spectral['spectral_centroid']:.1f} Hz |
208
+ | 85% Rolloff | {spectral['rolloff_85pct']:.1f} Hz |
209
+ | 95% Rolloff | {spectral['rolloff_95pct']:.1f} Hz |
210
+ | Highest Frequency (βˆ’60 dB) | {spectral['highest_freq_minus60db']:.1f} Hz |
211
 
 
212
  ---
213
 
214
+ ## πŸ”Š Energy Distribution (Speech Frequency Bands)
215
+ | Band | Energy % |
216
+ |------|----------|
217
+ | <100 Hz | {e['below_100hz']:.2f}% |
218
+ | 100–500 Hz | {e['100_500hz']:.2f}% |
219
+ | 500–2k Hz | {e['500_2khz']:.2f}% |
220
+ | 2k–8k Hz | {e['2k_8khz']:.2f}% |
221
+ | 8k–12k Hz | {e['8k_12khz']:.2f}% |
222
+ | 12k–16k Hz | {e['12k_16hz']:.2f}% |
223
+ | >16k Hz | {e['above_16khz']:.2f}% |
224
+
225
+ ---
226
+
227
+ ## πŸ€– Synthetic Voice Estimate (For Information Only)
228
  - **Probability:** {synthetic['synthetic_probability']:.3f}
229
  - **Label:** {synthetic['synthetic_label']}
230
 
 
234
  """
235
 
236
  if issues:
237
+ icons = {"CRITICAL":"πŸ”΄","HIGH":"🟠","MEDIUM":"🟑","LOW":"🟒"}
238
  for issue, sev, desc in issues:
239
+ md += f"- {icons[sev]} **[{sev}] {issue}** β€” {desc}\n"
240
  else:
241
+ md += "- βœ… No issues detected.\n"
242
 
243
  md += f"""
244
  ---
245
 
246
+ πŸ“Š **PNG Forensic Report Saved:** `{output_file.name}`
247
  πŸ•’ Generated: {audio_data['timestamp']}
248
  """
249
 
 
255
  return None, f"# ❌ Analysis Failed\n{str(e)}"
256
 
257
 
258
+
259
  # ============================================================
260
  # GRADIO UI
261
  # ============================================================
 
263
  with gr.Blocks(title="Audio Forensic Analyzer", theme="soft") as demo:
264
 
265
  gr.Markdown("""
266
+ # 🎧 AUDIO FORENSIC ANALYZER
267
+ Upload an audio file to generate a **full forensic spectral report**:
268
+ - HF/LF rolloff
269
  - Filtering detection (HPF / LPF / Brickwall)
270
  - Noise reduction artifacts
271
+ - Clipping & compression indicators
272
  - Spectral notches
273
+ - LUFS (ITU-R BS.1770-3)
274
+ - Synthetic speech probability
275
+ Outputs a **PNG forensic report + Markdown summary**
276
  """)
277
 
278
  with gr.Row():
279
  with gr.Column(scale=1):
280
+ audio_in = gr.Audio(label="πŸ“ Upload Audio", type="filepath")
281
+ analyze_btn = gr.Button("πŸ” Analyze Audio", variant="primary")
 
 
 
282
 
283
  with gr.Column(scale=2):
284
+ png_out = gr.Image(label="πŸ“Š Forensic PNG Report", type="filepath", height=600)
 
 
 
 
285
 
286
+ summary_out = gr.Markdown(label="πŸ“‹ Summary Report")
287
 
288
+ analyze_btn.click(
289
  fn=analyze_audio,
290
+ inputs=[audio_in],
291
+ outputs=[png_out, summary_out]
292
  )
293
 
294
 
295
+ # Run Space
296
  if __name__ == "__main__":
297
  demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)