Yoni232 commited on
Commit
a86b9b2
·
1 Parent(s): 1de6d5a

changed output file name to input file name and fixed stereo to mono bug

Browse files
Files changed (1) hide show
  1. app.py +25 -12
app.py CHANGED
@@ -12,10 +12,16 @@ from pathlib import Path
12
  import numpy as np
13
  import soundfile as sf
14
  import librosa
 
15
  from onsets_and_frames.hf_model import CountEMModel
16
  from onsets_and_frames.constants import SAMPLE_RATE
17
 
18
 
 
 
 
 
 
19
  # Cache for loaded models to avoid reloading
20
  model_cache = {}
21
 
@@ -23,9 +29,9 @@ model_cache = {}
23
  def load_model(model_name: str) -> CountEMModel:
24
  """Load model from cache or download from Hugging Face Hub."""
25
  if model_name not in model_cache:
26
- print(f"Loading model: {model_name}")
27
  model_cache[model_name] = CountEMModel.from_pretrained(model_name)
28
- print(f"Model loaded successfully")
29
  return model_cache[model_name]
30
 
31
 
@@ -61,6 +67,7 @@ def transcribe_audio(
61
 
62
  # Extract audio data
63
  # Gradio Audio component returns (sample_rate, audio_array) or audio file path
 
64
  if isinstance(audio_input, tuple):
65
  sr, audio = audio_input
66
  # Convert to float32 if needed
@@ -70,7 +77,9 @@ def transcribe_audio(
70
  audio = audio.astype(np.float32) / 2147483648.0
71
  elif isinstance(audio_input, str):
72
  # Audio file path provided
73
- audio, sr = librosa.load(audio_input, sr=None, mono=False)
 
 
74
  else:
75
  return None, f"Error: Unexpected audio input type: {type(audio_input)}"
76
 
@@ -80,7 +89,7 @@ def transcribe_audio(
80
 
81
  # Resample to 16kHz if needed
82
  if sr != SAMPLE_RATE:
83
- print(f"Resampling from {sr}Hz to {SAMPLE_RATE}Hz")
84
  audio = librosa.resample(audio, orig_sr=sr, target_sr=SAMPLE_RATE)
85
  sr = SAMPLE_RATE
86
 
@@ -96,16 +105,20 @@ def transcribe_audio(
96
 
97
  # Load model
98
  status = f"Loading {model_choice} model..."
99
- print(status)
100
  model = load_model(model_name)
101
 
102
  # Transcribe
103
  status = f"Transcribing {duration:.1f} seconds of audio..."
104
- print(status)
105
 
106
- # Create temporary MIDI file
107
- with tempfile.NamedTemporaryFile(suffix=".mid", delete=False) as tmp:
108
- output_path = tmp.name
 
 
 
 
109
 
110
  model.transcribe_to_midi(
111
  audio,
@@ -130,7 +143,7 @@ Download your MIDI file using the button below.
130
 
131
  except Exception as e:
132
  error_msg = f"Error during transcription: {str(e)}"
133
- print(error_msg)
134
  return None, error_msg
135
 
136
 
@@ -238,9 +251,9 @@ with gr.Blocks(title="CountEM - Music Transcription") as demo:
238
 
239
  if __name__ == "__main__":
240
  # Pre-load the default model to speed up first transcription
241
- print("Pre-loading default model...")
242
  load_model("Yoni232/countem-musicnet")
243
- print("Model pre-loaded. Starting Gradio interface...")
244
 
245
  # Launch the demo
246
  demo.launch(
 
12
  import numpy as np
13
  import soundfile as sf
14
  import librosa
15
+ import logging
16
  from onsets_and_frames.hf_model import CountEMModel
17
  from onsets_and_frames.constants import SAMPLE_RATE
18
 
19
 
20
+ # Set up logging
21
+ logging.basicConfig(level=logging.INFO)
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
  # Cache for loaded models to avoid reloading
26
  model_cache = {}
27
 
 
29
  def load_model(model_name: str) -> CountEMModel:
30
  """Load model from cache or download from Hugging Face Hub."""
31
  if model_name not in model_cache:
32
+ logger.info(f"Loading model: {model_name}")
33
  model_cache[model_name] = CountEMModel.from_pretrained(model_name)
34
+ logger.info(f"Model loaded successfully")
35
  return model_cache[model_name]
36
 
37
 
 
67
 
68
  # Extract audio data
69
  # Gradio Audio component returns (sample_rate, audio_array) or audio file path
70
+ input_filename = None
71
  if isinstance(audio_input, tuple):
72
  sr, audio = audio_input
73
  # Convert to float32 if needed
 
77
  audio = audio.astype(np.float32) / 2147483648.0
78
  elif isinstance(audio_input, str):
79
  # Audio file path provided
80
+ audio, sr = librosa.load(audio_input, sr=None, mono=True)
81
+ # Extract filename for output naming
82
+ input_filename = Path(audio_input).stem
83
  else:
84
  return None, f"Error: Unexpected audio input type: {type(audio_input)}"
85
 
 
89
 
90
  # Resample to 16kHz if needed
91
  if sr != SAMPLE_RATE:
92
+ logger.info(f"Resampling from {sr}Hz to {SAMPLE_RATE}Hz")
93
  audio = librosa.resample(audio, orig_sr=sr, target_sr=SAMPLE_RATE)
94
  sr = SAMPLE_RATE
95
 
 
105
 
106
  # Load model
107
  status = f"Loading {model_choice} model..."
108
+ logger.info(status)
109
  model = load_model(model_name)
110
 
111
  # Transcribe
112
  status = f"Transcribing {duration:.1f} seconds of audio..."
113
+ logger.info(status)
114
 
115
+ # Create temporary MIDI file with original filename if available
116
+ if input_filename:
117
+ temp_dir = tempfile.gettempdir()
118
+ output_path = os.path.join(temp_dir, f"{input_filename}.mid")
119
+ else:
120
+ with tempfile.NamedTemporaryFile(suffix=".mid", delete=False) as tmp:
121
+ output_path = tmp.name
122
 
123
  model.transcribe_to_midi(
124
  audio,
 
143
 
144
  except Exception as e:
145
  error_msg = f"Error during transcription: {str(e)}"
146
+ logger.error(error_msg)
147
  return None, error_msg
148
 
149
 
 
251
 
252
  if __name__ == "__main__":
253
  # Pre-load the default model to speed up first transcription
254
+ logger.info("Pre-loading default model...")
255
  load_model("Yoni232/countem-musicnet")
256
+ logger.info("Model pre-loaded. Starting Gradio interface...")
257
 
258
  # Launch the demo
259
  demo.launch(