Mr7Explorer commited on
Commit
d278c12
Β·
verified Β·
1 Parent(s): 802ae2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -100
app.py CHANGED
@@ -8,10 +8,10 @@ import numpy as np
8
  import librosa
9
  import soundfile as sf
10
  import matplotlib.pyplot as plt
11
- import matplotlib.gridspec as gridspec
12
  import scipy.signal as sps
13
 
14
- from io_utils import read_audio_info, load_audio_mono
 
15
  from time_domain import compute_time_domain_stats
16
  from spectral import compute_spectral_analysis
17
  from loudness_calc import compute_loudness, LOUDNESS_AVAILABLE
@@ -19,84 +19,85 @@ from issue_detection import detect_audio_issues
19
  from synthetic_detector import detect_synthetic_voice
20
  from report_generator import create_report
21
 
 
22
  # ============================================================
23
- # MAIN ANALYSIS FUNCTION
24
  # ============================================================
25
 
26
  def analyze_audio(audio_file, progress=gr.Progress()):
27
- """Main Gradio callback β€” performs full forensic analysis."""
28
 
29
  if audio_file is None:
30
  return None, "⚠️ Please upload an audio file."
31
 
32
  try:
33
- progress(0.1, desc="Reading audio file...")
34
-
35
  path = Path(audio_file)
 
 
 
 
 
36
  info = read_audio_info(str(path))
37
 
38
  progress(0.25, desc="Loading waveform...")
39
  y, sr = librosa.load(str(path), sr=None, mono=True)
40
 
41
- # ======================================================
42
  # TIME DOMAIN
43
- # ======================================================
44
- progress(0.35, desc="Analyzing time-domain...")
45
  time_stats = compute_time_domain_stats(y)
46
 
47
- # ======================================================
48
- # SPECTRAL
49
- # ======================================================
50
- progress(0.50, desc="Computing spectral analysis...")
51
  spectral = compute_spectral_analysis(y, sr)
52
 
53
- # ======================================================
54
  # LOUDNESS
55
- # ======================================================
56
  progress(0.60, desc="Computing loudness...")
57
  lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None
58
 
59
- # ======================================================
60
- # ISSUES
61
- # ======================================================
62
- progress(0.70, desc="Detecting issues...")
63
  issues = detect_audio_issues(spectral, time_stats)
64
 
65
- # ======================================================
66
- # SYNTHETIC DETECTION (informational only)
67
- # ======================================================
68
  progress(0.78, desc="Synthetic voice estimation...")
69
  synthetic = detect_synthetic_voice(y, sr, spectral)
70
 
71
  # ======================================================
72
- # SCORING
73
  # ======================================================
74
  progress(0.82, desc="Scoring...")
75
-
76
- # Count issues by severity
77
  critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL")
78
  high = sum(1 for _, sev, _ in issues if sev == "HIGH")
79
  medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM")
80
  low = sum(1 for _, sev, _ in issues if sev == "LOW")
81
-
82
- # Score formula
83
- score = 100 - (critical * 30) - (high * 15) - (medium * 5)
84
- score = max(0, score)
85
-
86
- # Grade + Quality Label (same logic as your report)
87
- if score >= 90:
88
  grade, quality = "A", "EXCELLENT"
89
  color = "🟒"
90
  recommendation = "Excellent for TTS dataset"
91
- elif score >= 75:
92
  grade, quality = "B", "GOOD"
93
  color = "🟒"
94
  recommendation = "Very good quality; suitable for TTS"
95
- elif score >= 60:
96
  grade, quality = "C", "FAIR"
97
  color = "🟑"
98
  recommendation = "Usable but may contain processing artifacts"
99
- elif score >= 40:
100
  grade, quality = "D", "POOR"
101
  color = "🟠"
102
  recommendation = "Not recommended for TTS (heavy processing)"
@@ -104,15 +105,12 @@ def analyze_audio(audio_file, progress=gr.Progress()):
104
  grade, quality = "F", "CRITICAL"
105
  color = "πŸ”΄"
106
  recommendation = "Severely degraded or processed; avoid for TTS"
107
-
108
- # Cleanliness score (needed by report_generator)
109
  cleanliness_score = max(0, 100 - (medium * 5 + low * 3))
110
-
111
- # Processing severity index (needed by report_generator)
112
  processing_severity = (critical * 3) + (high * 2) + medium
113
-
114
- audio_data["score"] = {
115
- "score": score,
116
  "grade": grade,
117
  "quality": quality,
118
  "recommendation": recommendation,
@@ -124,16 +122,10 @@ def analyze_audio(audio_file, progress=gr.Progress()):
124
  "low": low,
125
  "color": color
126
  }
127
-
128
 
129
  # ======================================================
130
- # CREATE REPORT PNG
131
  # ======================================================
132
- output_dir = Path("reports")
133
- output_dir.mkdir(exist_ok=True)
134
-
135
- output_file = output_dir / (path.stem + "_report.png")
136
-
137
  audio_data = {
138
  "filename": path.name,
139
  "info": info,
@@ -141,73 +133,70 @@ def analyze_audio(audio_file, progress=gr.Progress()):
141
  "spectral": spectral,
142
  "lufs": lufs,
143
  "issues": issues,
144
- # Use the full score dictionary (correct)
145
- "score": {
146
- "score": score,
147
- "grade": grade,
148
- "quality": quality,
149
- "recommendation": recommendation,
150
- "cleanliness_score": cleanliness_score,
151
- "processing_severity": processing_severity,
152
- "critical": critical,
153
- "high": high,
154
- "medium": medium,
155
- "low": low,
156
- "color": color
157
- },
158
  "synthetic": synthetic,
159
  "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
160
  }
161
 
162
-
 
 
163
  progress(0.92, desc="Rendering PNG report...")
 
 
 
 
 
 
164
  create_report(audio_data, str(output_file))
165
 
166
  # ======================================================
167
- # SUMMARY MARKDOWN
168
  # ======================================================
169
 
170
- progress(1.0, desc="Done!")
 
 
171
 
172
  md = f"""
173
- # 🎡 Analysis Complete β€” Audio Forensic Analyzer
174
 
175
- ## File Information
176
- - **Filename:** `{audio_data['filename']}`
177
  - **Duration:** {info['duration']:.2f}s
178
  - **Sample Rate:** {info['samplerate']} Hz
179
- - **Channels:** {info['channels']}
180
 
181
  ---
182
 
183
- ## πŸ” Quality Assessment
184
- - **Score:** {score['score']}/100
185
- - **Grade:** {score['grade']}
186
- - **Quality:** {score['quality']}
187
- - **Recommendation:** {score['recommendation']}
188
 
189
  ---
190
 
191
- ## πŸ”§ Time-Domain Stats
192
  | Metric | Value |
193
  |--------|--------|
194
- | Peak Level | {time_stats['peak_db']:.2f} dBFS |
195
- | RMS Level | {time_stats['rms_db']:.2f} dBFS |
196
- | Crest Factor | {time_stats['crest_factor_db']:.2f} dB |
197
  | SNR | {time_stats['snr_db']:.1f} dB |
198
  | ZCR | {time_stats['zero_crossing_rate']:.4f} |
199
 
200
  """
201
 
202
  if lufs is not None:
203
- md += f"| Integrated LUFS | {lufs:.2f} LUFS |\n"
204
 
205
  md += f"""
206
  ---
207
 
208
- ## 🎚 Synthetic Voice Estimate (Informational Only)
209
- - **Probability:** {synthetic['synthetic_probability']:.2f}
210
- - **Label:** **{synthetic['synthetic_label']}**
211
 
212
  ---
213
 
@@ -215,18 +204,17 @@ def analyze_audio(audio_file, progress=gr.Progress()):
215
  """
216
 
217
  if issues:
218
- icons = {"CRITICAL": "πŸ”΄", "HIGH": "🟠", "MEDIUM": "🟑", "LOW": "🟒"}
219
  for issue, sev, desc in issues:
220
- md += f"- {icons.get(sev,'βšͺ')} **[{sev}] {issue}** β€” {desc}\n"
221
  else:
222
- md += "- βœ… No significant issues\n"
223
 
224
  md += f"""
225
-
226
  ---
227
 
228
- πŸ“Š **Report PNG saved:** `{output_file.name}`
229
-
230
  """
231
 
232
  return str(output_file), md
@@ -245,26 +233,23 @@ with gr.Blocks(title="Audio Forensic Analyzer", theme="soft") as demo:
245
 
246
  gr.Markdown("""
247
  # 🎧 Audio Forensic Analyzer
248
- Upload an audio file and generate a **full forensic report**:
249
- - HF/LF rolloff
250
- - Filtering (LPF/HPF/Brickwall)
251
- - Compression & clipping
252
  - Noise reduction artifacts
 
253
  - Spectral notches
254
  - Loudness (LUFS)
255
- - Synthetic Voice Probability (Informational Only)
256
-
257
- Report includes a PNG + formatted summary.
258
  """)
259
 
260
  with gr.Row():
261
  with gr.Column(scale=1):
262
  audio_input = gr.Audio(
263
  label="πŸ“ Upload Audio",
264
- type="filepath",
265
- sources=["upload"]
266
  )
267
-
268
  analyze_button = gr.Button("πŸ” Analyze Audio", variant="primary")
269
 
270
  with gr.Column(scale=2):
@@ -283,6 +268,5 @@ Report includes a PNG + formatted summary.
283
  )
284
 
285
 
286
- # Run in HuggingFace Space
287
  if __name__ == "__main__":
288
  demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
 
8
  import librosa
9
  import soundfile as sf
10
  import matplotlib.pyplot as plt
 
11
  import scipy.signal as sps
12
 
13
+ # Local modules
14
+ from io_utils import read_audio_info
15
  from time_domain import compute_time_domain_stats
16
  from spectral import compute_spectral_analysis
17
  from loudness_calc import compute_loudness, LOUDNESS_AVAILABLE
 
19
  from synthetic_detector import detect_synthetic_voice
20
  from report_generator import create_report
21
 
22
+
23
  # ============================================================
24
+ # MAIN FORENSIC ANALYSIS FUNCTION
25
  # ============================================================
26
 
27
  def analyze_audio(audio_file, progress=gr.Progress()):
28
+ """Full forensic audio analysis pipeline."""
29
 
30
  if audio_file is None:
31
  return None, "⚠️ Please upload an audio file."
32
 
33
  try:
 
 
34
  path = Path(audio_file)
35
+
36
+ # -----------------------------
37
+ # FILE INFO + LOAD AUDIO
38
+ # -----------------------------
39
+ progress(0.10, desc="Reading file info...")
40
  info = read_audio_info(str(path))
41
 
42
  progress(0.25, desc="Loading waveform...")
43
  y, sr = librosa.load(str(path), sr=None, mono=True)
44
 
45
+ # -----------------------------
46
  # TIME DOMAIN
47
+ # -----------------------------
48
+ progress(0.35, desc="Time-domain analysis...")
49
  time_stats = compute_time_domain_stats(y)
50
 
51
+ # -----------------------------
52
+ # SPECTRAL ANALYSIS
53
+ # -----------------------------
54
+ progress(0.50, desc="Spectral analysis...")
55
  spectral = compute_spectral_analysis(y, sr)
56
 
57
+ # -----------------------------
58
  # LOUDNESS
59
+ # -----------------------------
60
  progress(0.60, desc="Computing loudness...")
61
  lufs = compute_loudness(y, sr) if LOUDNESS_AVAILABLE else None
62
 
63
+ # -----------------------------
64
+ # ISSUE DETECTION
65
+ # -----------------------------
66
+ progress(0.70, desc="Detecting anomalies...")
67
  issues = detect_audio_issues(spectral, time_stats)
68
 
69
+ # -----------------------------
70
+ # SYNTHETIC VOICE (INFO ONLY)
71
+ # -----------------------------
72
  progress(0.78, desc="Synthetic voice estimation...")
73
  synthetic = detect_synthetic_voice(y, sr, spectral)
74
 
75
  # ======================================================
76
+ # SCORING β€” CLEAN VERSION (NO DUPLICATE ASSIGNMENT)
77
  # ======================================================
78
  progress(0.82, desc="Scoring...")
79
+
 
80
  critical = sum(1 for _, sev, _ in issues if sev == "CRITICAL")
81
  high = sum(1 for _, sev, _ in issues if sev == "HIGH")
82
  medium = sum(1 for _, sev, _ in issues if sev == "MEDIUM")
83
  low = sum(1 for _, sev, _ in issues if sev == "LOW")
84
+
85
+ score_value = 100 - (critical * 30) - (high * 15) - (medium * 5)
86
+ score_value = max(0, score_value)
87
+
88
+ if score_value >= 90:
 
 
89
  grade, quality = "A", "EXCELLENT"
90
  color = "🟒"
91
  recommendation = "Excellent for TTS dataset"
92
+ elif score_value >= 75:
93
  grade, quality = "B", "GOOD"
94
  color = "🟒"
95
  recommendation = "Very good quality; suitable for TTS"
96
+ elif score_value >= 60:
97
  grade, quality = "C", "FAIR"
98
  color = "🟑"
99
  recommendation = "Usable but may contain processing artifacts"
100
+ elif score_value >= 40:
101
  grade, quality = "D", "POOR"
102
  color = "🟠"
103
  recommendation = "Not recommended for TTS (heavy processing)"
 
105
  grade, quality = "F", "CRITICAL"
106
  color = "πŸ”΄"
107
  recommendation = "Severely degraded or processed; avoid for TTS"
108
+
 
109
  cleanliness_score = max(0, 100 - (medium * 5 + low * 3))
 
 
110
  processing_severity = (critical * 3) + (high * 2) + medium
111
+
112
+ score_dict = {
113
+ "score": score_value,
114
  "grade": grade,
115
  "quality": quality,
116
  "recommendation": recommendation,
 
122
  "low": low,
123
  "color": color
124
  }
 
125
 
126
  # ======================================================
127
+ # BUILD FINAL AUDIO_DATA PAYLOAD (SAFE ORDER)
128
  # ======================================================
 
 
 
 
 
129
  audio_data = {
130
  "filename": path.name,
131
  "info": info,
 
133
  "spectral": spectral,
134
  "lufs": lufs,
135
  "issues": issues,
136
+ "score": score_dict,
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  "synthetic": synthetic,
138
  "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
139
  }
140
 
141
+ # ======================================================
142
+ # REPORT GENERATION
143
+ # ======================================================
144
  progress(0.92, desc="Rendering PNG report...")
145
+
146
+ output_dir = Path("reports")
147
+ output_dir.mkdir(exist_ok=True)
148
+
149
+ output_file = output_dir / f"{path.stem}_report.png"
150
+
151
  create_report(audio_data, str(output_file))
152
 
153
  # ======================================================
154
+ # MARKDOWN SUMMARY (CORRECTED)
155
  # ======================================================
156
 
157
+ progress(1.00, desc="Complete!")
158
+
159
+ s = score_dict
160
 
161
  md = f"""
162
+ # 🎡 Audio Forensic Analysis β€” Summary Report
163
 
164
+ ## πŸ“ File
165
+ - **Name:** `{audio_data['filename']}`
166
  - **Duration:** {info['duration']:.2f}s
167
  - **Sample Rate:** {info['samplerate']} Hz
168
+ - **Channels:** {info['channels']}
169
 
170
  ---
171
 
172
+ ## πŸ§ͺ Quality Assessment
173
+ - **Score:** {s['score']}/100
174
+ - **Grade:** {s['grade']}
175
+ - **Quality:** {s['quality']}
176
+ - **Recommendation:** {s['recommendation']}
177
 
178
  ---
179
 
180
+ ## πŸ”§ Time-Domain Metrics
181
  | Metric | Value |
182
  |--------|--------|
183
+ | Peak | {time_stats['peak_db']:.2f} dBFS |
184
+ | RMS | {time_stats['rms_db']:.2f} dBFS |
185
+ | Crest | {time_stats['crest_factor_db']:.2f} dB |
186
  | SNR | {time_stats['snr_db']:.1f} dB |
187
  | ZCR | {time_stats['zero_crossing_rate']:.4f} |
188
 
189
  """
190
 
191
  if lufs is not None:
192
+ md += f"- **Integrated LUFS:** {lufs:.2f} LUFS\n"
193
 
194
  md += f"""
195
  ---
196
 
197
+ ## πŸ€– Synthetic Voice (Informational Only)
198
+ - **Probability:** {synthetic['synthetic_probability']:.3f}
199
+ - **Label:** {synthetic['synthetic_label']}
200
 
201
  ---
202
 
 
204
  """
205
 
206
  if issues:
207
+ icon_map = {"CRITICAL": "πŸ”΄", "HIGH": "οΏ½οΏ½οΏ½", "MEDIUM": "🟑", "LOW": "🟒"}
208
  for issue, sev, desc in issues:
209
+ md += f"- {icon_map.get(sev,'βšͺ')} **[{sev}] {issue}** β€” {desc}\n"
210
  else:
211
+ md += "- βœ… No significant issues detected\n"
212
 
213
  md += f"""
 
214
  ---
215
 
216
+ πŸ“Š **PNG Report Saved:** `{output_file.name}`
217
+ πŸ•’ Generated: {audio_data['timestamp']}
218
  """
219
 
220
  return str(output_file), md
 
233
 
234
  gr.Markdown("""
235
  # 🎧 Audio Forensic Analyzer
236
+ Upload an audio file to generate a forensic-quality report including:
237
+ - HF/LF analysis
238
+ - Filtering detection (HPF / LPF / Brickwall)
 
239
  - Noise reduction artifacts
240
+ - Compression indicators
241
  - Spectral notches
242
  - Loudness (LUFS)
243
+ - Synthetic voice estimation
244
+ The tool outputs a **PNG forensic report** + **Summary Markdown**.
 
245
  """)
246
 
247
  with gr.Row():
248
  with gr.Column(scale=1):
249
  audio_input = gr.Audio(
250
  label="πŸ“ Upload Audio",
251
+ type="filepath"
 
252
  )
 
253
  analyze_button = gr.Button("πŸ” Analyze Audio", variant="primary")
254
 
255
  with gr.Column(scale=2):
 
268
  )
269
 
270
 
 
271
  if __name__ == "__main__":
272
  demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)