juancopi81 commited on
Commit
d9489e4
1 Parent(s): 7c17274

Add plotting function

Browse files
Files changed (2) hide show
  1. app.py +8 -5
  2. utils.py +43 -1
app.py CHANGED
@@ -11,6 +11,9 @@ from pydub import AudioSegment
11
  from inferencemodel import InferenceModel
12
  from utils import upload_audio
13
 
 
 
 
14
  SAMPLE_RATE = 16000
15
  SF2_PATH = "SGM-v2.01-Sal-Guit-Bass-V1.3.sf2"
16
 
@@ -20,13 +23,14 @@ current_model = "mt3"
20
 
21
  def change_model(model):
22
  global current_model
 
23
  checkpoint_path = f"/home/user/app/checkpoints/{model}/"
24
  if model == current_model:
25
  return
26
- global inference_model
27
  inference_model = InferenceModel(checkpoint_path, model)
28
  current_model = model
29
  print("Inferece model", inference_model)
 
30
 
31
  # Credits https://huggingface.co/spaces/rajesh1729/youtube-video-transcription-with-whisper
32
  def get_audio(url):
@@ -37,7 +41,6 @@ def get_audio(url):
37
  new_file = base + ".wav"
38
  os.rename(out_file, new_file)
39
  a = new_file
40
- print("file a is:", a)
41
  wav_to_cut = AudioSegment.from_file(a)
42
  # pydub does things in milliseconds
43
  ten_seconds = 10 * 1000
@@ -53,17 +56,17 @@ def populate_metadata(link):
53
  return yt.thumbnail_url, yt.title, audio
54
 
55
  def inference(yt_audio):
56
- with open(yt_audio, "rb") as fd:
57
  contents = fd.read()
58
 
59
- audio = upload_audio(contents,sample_rate=16000)
60
 
61
  est_ns = inference_model(audio)
62
 
63
  note_seq.sequence_proto_to_midi_file(est_ns, "./transcribed.mid")
64
  note_sequence = note_seq.midi_to_note_sequence("./transcribed.mid")
65
  synth = note_seq.midi_synth.fluidsynth
66
- array_of_floats = synth(note_sequence, sample_rate=SAMPLE_RATE)
67
  int16_data = note_seq.audio_io.float_samples_to_int16(array_of_floats)
68
  # piano_roll = create_image_from_note_sequence(note_sequence)
69
 
 
11
  from inferencemodel import InferenceModel
12
  from utils import upload_audio
13
 
14
+ import nest_asyncio
15
+ nest_asyncio.apply()
16
+
17
  SAMPLE_RATE = 16000
18
  SF2_PATH = "SGM-v2.01-Sal-Guit-Bass-V1.3.sf2"
19
 
 
23
 
24
  def change_model(model):
25
  global current_model
26
+ global inference_model
27
  checkpoint_path = f"/home/user/app/checkpoints/{model}/"
28
  if model == current_model:
29
  return
 
30
  inference_model = InferenceModel(checkpoint_path, model)
31
  current_model = model
32
  print("Inferece model", inference_model)
33
+ print("Current model", current_model)
34
 
35
  # Credits https://huggingface.co/spaces/rajesh1729/youtube-video-transcription-with-whisper
36
  def get_audio(url):
 
41
  new_file = base + ".wav"
42
  os.rename(out_file, new_file)
43
  a = new_file
 
44
  wav_to_cut = AudioSegment.from_file(a)
45
  # pydub does things in milliseconds
46
  ten_seconds = 10 * 1000
 
56
  return yt.thumbnail_url, yt.title, audio
57
 
58
  def inference(yt_audio):
59
+ with open(yt_audio[1], "rb") as fd:
60
  contents = fd.read()
61
 
62
+ audio = upload_audio(contents,sample_rate=SAMPLE_RATE)
63
 
64
  est_ns = inference_model(audio)
65
 
66
  note_seq.sequence_proto_to_midi_file(est_ns, "./transcribed.mid")
67
  note_sequence = note_seq.midi_to_note_sequence("./transcribed.mid")
68
  synth = note_seq.midi_synth.fluidsynth
69
+ array_of_floats = synth(note_sequence, sample_rate=44100)
70
  int16_data = note_seq.audio_io.float_samples_to_int16(array_of_floats)
71
  # piano_roll = create_image_from_note_sequence(note_sequence)
72
 
utils.py CHANGED
@@ -1,8 +1,14 @@
1
 
2
  import tempfile
 
3
 
4
  import librosa
5
 
 
 
 
 
 
6
  class AudioIOReadError(BaseException): # pylint:disable=g-bad-exception-name
7
  pass
8
 
@@ -51,4 +57,40 @@ def load_audio(audio_filename, sample_rate, duration=10):
51
  y, unused_sr = librosa.load(audio_filename, sr=sample_rate, mono=True, duration=duration)
52
  except Exception as e: # pylint: disable=broad-except
53
  raise AudioIOReadError(e)
54
- return y
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
  import tempfile
3
+ import collections
4
 
5
  import librosa
6
 
7
+ import pandas as pd
8
+ import matplotlib.pyplot as plt
9
+ from matplotlib.patches import Rectangle
10
+ from PIL import Image
11
+
12
  class AudioIOReadError(BaseException): # pylint:disable=g-bad-exception-name
13
  pass
14
 
 
57
  y, unused_sr = librosa.load(audio_filename, sr=sample_rate, mono=True, duration=duration)
58
  except Exception as e: # pylint: disable=broad-except
59
  raise AudioIOReadError(e)
60
+ return y
61
+
62
+ # Generate piano_roll
63
+ def sequence_to_pandas_dataframe(sequence):
64
+ pd_dict = collections.defaultdict(list)
65
+ for note in sequence.notes:
66
+ pd_dict["start_time"].append(note.start_time)
67
+ pd_dict["end_time"].append(note.end_time)
68
+ pd_dict["duration"].append(note.end_time - note.start_time)
69
+ pd_dict["pitch"].append(note.pitch)
70
+
71
+ return pd.DataFrame(pd_dict)
72
+
73
+ def dataframe_to_pianoroll_img(df):
74
+ fig = plt.figure(figsize=(8, 5))
75
+ ax = fig.add_subplot(111)
76
+ ax.scatter(df.start_time, df.pitch, c="white")
77
+ for _, row in df.iterrows():
78
+ ax.add_patch(Rectangle((row["start_time"], row["pitch"]-0.4), row["duration"], 0.4, color="black"))
79
+ plt.xlabel('time (sec.)', fontsize=18)
80
+ plt.ylabel('pitch (MIDI)', fontsize=16)
81
+ return fig
82
+
83
+ def fig2img(fig):
84
+ """Convert a Matplotlib figure to a PIL Image and return it"""
85
+ import io
86
+ buf = io.BytesIO()
87
+ fig.savefig(buf, format="png")
88
+ buf.seek(0)
89
+ img = Image.open(buf)
90
+ return img
91
+
92
+ def create_image_from_note_sequence(sequence):
93
+ df_sequence = sequence_to_pandas_dataframe(sequence)
94
+ fig = dataframe_to_pianoroll_img(df_sequence)
95
+ img = fig2img(fig)
96
+ return img