juancopi81 commited on
Commit
1a06f79
1 Parent(s): 47e2c4f

Add pydub to cut the audio file to 10 seconds

Browse files
Files changed (2) hide show
  1. app.py +4 -2
  2. requirements.txt +2 -1
app.py CHANGED
@@ -9,6 +9,7 @@ import os
9
 
10
  import numpy as np
11
  import tensorflow.compat.v2 as tf
 
12
 
13
  import functools
14
  import gin
@@ -35,6 +36,8 @@ SAMPLE_RATE = 16000
35
  SF2_PATH = 'SGM-v2.01-Sal-Guit-Bass-V1.3.sf2'
36
 
37
  def upload_audio(audio, sample_rate):
 
 
38
  return note_seq.audio_io.wav_data_to_samples_librosa(
39
  audio, sample_rate=sample_rate)
40
 
@@ -242,8 +245,7 @@ def inference(audio):
242
  with open(audio, 'rb') as fd:
243
  contents = fd.read()
244
  audio = upload_audio(contents,sample_rate=16000)
245
- # Get only first 10 seconds of audio with free Space hosting
246
- audio = audio[:160000]
247
  est_ns = inference_model(audio)
248
 
249
  note_seq.sequence_proto_to_midi_file(est_ns, './transcribed.mid')
 
9
 
10
  import numpy as np
11
  import tensorflow.compat.v2 as tf
12
+ from pydub import AudioSegment
13
 
14
  import functools
15
  import gin
 
36
  SF2_PATH = 'SGM-v2.01-Sal-Guit-Bass-V1.3.sf2'
37
 
38
  def upload_audio(audio, sample_rate):
39
+ audio = AudioSegment.from_wav(audio)
40
+ audio = [:10000]
41
  return note_seq.audio_io.wav_data_to_samples_librosa(
42
  audio, sample_rate=sample_rate)
43
 
 
245
  with open(audio, 'rb') as fd:
246
  contents = fd.read()
247
  audio = upload_audio(contents,sample_rate=16000)
248
+
 
249
  est_ns = inference_model(audio)
250
 
251
  note_seq.sequence_proto_to_midi_file(est_ns, './transcribed.mid')
requirements.txt CHANGED
@@ -7,4 +7,5 @@ jax[cpu]==0.3.15 -f https://storage.googleapis.com/jax-releases/jax_releases.htm
7
  # pin CLU for python 3.7 compatibility
8
  clu==0.0.7
9
  # pin Orbax to use Checkpointer
10
- orbax==0.0.2
 
 
7
  # pin CLU for python 3.7 compatibility
8
  clu==0.0.7
9
  # pin Orbax to use Checkpointer
10
+ orbax==0.0.2
11
+ pydub