Sheet_Music_Generator / markov_chain.py
va35
Naive attempt using Markov chains
5fc3f34
raw
history blame
10.3 kB
import os
# from mido import MidiFile
import mido
import music21
import numpy as np
import pandas as pd
from music21 import *
from mido import Message, MidiFile, MidiTrack
#number of notes to be used for prediction
window = 3
#num of notes to generate
#TODO: change this to accept values according to user
num_notes = 100
#midi ticks per quarter note, indicates tempo of track
quarter_note_ticks = 480
#accepted note durations: ranges from 16th note to whole dotted notes
accepeted_lengths = [0.25,0.375,0.5,0.75,1,1.5,2.0,3.0,4.0]
#Finds all absolute paths in directory
#https://stackoverflow.com/questions/9816816/get-absolute-paths-of-all-files-in-a-directory
def abs_paths(dir):
for dir_path,_,filenames in os.walk(dir):
for f in filenames:
yield os.path.abspath(os.path.join(dir_path, f))
def pitch_to_int(nameWithOctave):
# letter names with corresponding values
letter_dict = {'C':0,'D':2,'E':4,'F':5,'G':7,'A':9,'B':11}
# parse characters from strings
chars = list(nameWithOctave)
# convert octave number to corresponding midi value
octave = 12*(int(chars[-1])+1)
# select value from letter_dict using first character
note = letter_dict[chars[0]]
# set accidental value
accidental = 0
# does accidental exist?
if not len(chars)==2:
# increase (sharp) or decrease (flat) value by one
accidental = 1 if chars[1]=='#' else -1
# return sum of these numbers, middle C(4) == 60
return octave + note + accidental
def generate_notes():
df_notes = pd.read_csv('prepared.csv')
print(df_notes.shape)
# define arrays for generated notes and durations
gen_notes = []
gen_durations = []
# define note and duration feature columns based on names
features = df_notes.columns[:-2]
note_features = [s for s in features if "note" in s]
duration_features = [s for s in features if "duration" in s]
# define target columns
note_target = df_notes.columns[-2]
duration_target = df_notes.columns[-1]
# sample random row from dataframe and define start notes and durations
initial_sample = df_notes.sample()
start_notes = list(initial_sample[note_features].values[0])
start_durations = list(initial_sample[duration_features].values[0])
# append starting notes and durations to gen arrays
for note in start_notes:
gen_notes.append(int(note))
for duration in start_durations:
gen_durations.append(duration)
for i in range(num_notes) :
rows = df_notes
for i in range(window-1):
rows = rows.loc[df_notes[note_features[i]] == start_notes[i]]
rows = rows.loc[df_notes[duration_features[i]]== start_durations[i]]
#This gives the same effect as probability.
# We effectively sample from a list which might have more than 1 C note, Hence increasing its probability
#Sometime, The start_notes and durations could be selected in such a way that we cannot generate any further notes uptill num_notes,
#This means there maybe some combinations of notes such as 76,68 which are not there in the dataset and hence cannot be sampled.
#In such cases, the only way about it would be to reset the start notes, because we cannot sample from an empty row
#So here we check if any rows which we ta
if len(rows):
next_sample = rows.sample()
next_note = next_sample[note_target].values[0]
next_duration = next_sample[duration_target].values[0]
gen_notes.append(int(next_note))
gen_durations.append(next_duration)
start_notes.pop()
start_durations.pop()
start_notes.append(next_note)
start_durations.append(next_duration)
else:
#Received empty row
# print("Exiting!!!!!!")
#restarting again to get new start notes
return [],[]
# print(rows[note_target].value_counts(normalize=True))
# print(rows[duration_target].value_counts(normalize=True))
return gen_notes, gen_durations
#MAIN FUNCTION
if __name__=="__main__":
# https://stackoverflow.com/questions/49462107/how-can-i-get-all-piano-parts-from-a-music21-score
if not os.path.exists('tracks'):
os.mkdir('tracks')
i = 0
#Parse midi files into tracks folder
for path in abs_paths('data'):
print(path)
# mid = MidiFile(path)
piece = converter.parse(path)
print(list(piece.parts))
for part in piece.parts:
part_notes = []
#get all note messages from all tracks
for event in part:
if getattr(event, 'isNote', None) and event.isNote:
print('note in {}'.format(part))
#check if note is in accepted length
#convert string to numerical value
if event.quarterLength in accepeted_lengths:
part_notes.append([pitch_to_int(event.nameWithOctave), event.quarterLength])
if not len(part_notes) == 0:
np.save('tracks/{}.npy'.format(i), np.array(part_notes))
i+=1
print('Number of tracks parsed: {}'.format(i))
if not os.path.exists('prepared.csv'):
columns = []
for i in range(window):
columns.append('note' + str(i))
columns.append('duration' + str(i))
df_notes = pd.DataFrame(columns=columns)
# append segments from each track as rows to dataframe
for path in abs_paths('tracks'):
notes = np.load(path)
for i in range(len(notes)-window):
# take every x notes and durations
segment = notes[i:i+window].flatten()
# make into pd.Series row
row = pd.Series(segment, index=df_notes.columns)
# append row to dataframe
df_notes = df_notes.append(row, ignore_index=True)
# export
df_notes.to_csv('prepared.csv', index=False)
success = False
gen_notes =[]
gen_durations =[]
#Retry mechanism
while len(gen_notes)<num_notes:
gen_notes,gen_durations = generate_notes()
# import
# df_notes = pd.read_csv('prepared.csv')
# print(df_notes.shape)
# # define arrays for generated notes and durations
# gen_notes = []
# gen_durations = []
# # define note and duration feature columns based on names
# features = df_notes.columns[:-2]
# note_features = [s for s in features if "note" in s]
# duration_features = [s for s in features if "duration" in s]
# # define target columns
# note_target = df_notes.columns[-2]
# duration_target = df_notes.columns[-1]
# # sample random row from dataframe and define start notes and durations
# initial_sample = df_notes.sample()
# start_notes = list(initial_sample[note_features].values[0])
# start_durations = list(initial_sample[duration_features].values[0])
# # append starting notes and durations to gen arrays
# for note in start_notes:
# gen_notes.append(int(note))
# for duration in start_durations:
# gen_durations.append(duration)
# for i in range(num_notes) :
# rows = df_notes
# for i in range(window-1):
# rows = rows.loc[df_notes[note_features[i]] == start_notes[i]]
# rows = rows.loc[df_notes[duration_features[i]]== start_durations[i]]
# #This gives the same effect as probability.
# # We effectively sample from a list which might have more than 1 C note, Hence increasing its probability
# #Sometime, The start_notes and durations could be selected in such a way that we cannot generate any further notes uptill num_notes,
# #This means there maybe some combinations of notes such as 76,68 which are not there in the dataset and hence cannot be sampled.
# #In such cases, the only way about it would be to reset the start notes, because we cannot sample from an empty row
# #So here we check if any rows which we ta
# if len(rows):
# next_sample = rows.sample()
# next_note = next_sample[note_target].values[0]
# next_duration = next_sample[duration_target].values[0]
# gen_notes.append(int(next_note))
# gen_durations.append(next_duration)
# start_notes.pop()
# start_durations.pop()
# start_notes.append(next_note)
# start_durations.append(next_duration)
# else:
# #Received empty row
# print("Exiting!!!!!!")
# print(rows[note_target].value_counts(normalize=True))
# print(rows[duration_target].value_counts(normalize=True))
print('Generated notes/durations'.format(num_notes))
print(gen_notes)
print(gen_durations)
mid = MidiFile()
track = MidiTrack()
mid.tracks.append(track)
for i in range(num_notes):
track.append(Message('note_on', channel=0, note=gen_notes[i], velocity=60, time=0))
track.append(Message('note_on', channel=0, note=gen_notes[i], velocity=0,time=int(gen_durations[i]*quarter_note_ticks)))
mid.save('new_song.mid')
#create new midi file which can be engraved
#https://mido.readthedocs.io/en/latest/midi_files.html , crreating a New file sectoin
# mid = MidiFile()
# track = MidiTrack
# mid.tracks.append(track)
# for i in range(num_notes):
# track.append(Message('note_on', channel=0, note=gen_notes[i], velocity=60, time=0))
# track.append(Message('note_on', channel=0, note=gen_notes[i], velocity=0,time=int(gen_durations[i]*quarter_note_ticks)))
# mid.save('output.mid')
# def inspect_midi():
# #Just inspecting midi file 1.
# for path in abs_paths('data'):
# # print(path)
# mid = MidiFile(path)
# for i, track in enumerate(mid.tracks):
# print('Track {}: {}'.format(i, track.name))
# for message in track:
# print(message)
# break
# inspect_midi()
# def isolate_note_on_msgs():
# #round each note duration to 250ms
# #Build adjaceny matrix
# LILYPOND COMMANDS : To be used for generating music scores
# Installation : sudo apt-get install -y lilypond
# !midi2ly "new_song.ly"
# !lilypond -fpng "new_song-midi.ly"