diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..144cb69b1c6da603b1fe06698fac665485699636 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +*.sf2 filter=lfs diff=lfs merge=lfs -text diff --git a/app.py b/app.py index a699bc5b3c2e987102ca93e0ee28d601e0a93d02..418f623e612ce5f8c695c82b7cfd4131318e34d2 100644 --- a/app.py +++ b/app.py @@ -1,7 +1,741 @@ import gradio as gr +from pathlib import Path -def greet(name): - return "Hello " + name + "!!" +import torch +import shutil +import os +import subprocess +import cv2 +import math +import clip +import numpy as np +from PIL import Image +from scenedetect import open_video, SceneManager, split_video_ffmpeg +from scenedetect.detectors import ContentDetector, AdaptiveDetector +from scenedetect.video_splitter import split_video_ffmpeg +from scenedetect.scene_manager import save_images +from utilities.constants import * +from utilities.chord_to_midi import * + +from model.video_music_transformer import VideoMusicTransformer +from model.video_regression import VideoRegression + +import json +from midi2audio import FluidSynth +import moviepy.editor as mp +from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip +import random +from moviepy.editor import * +import time + +from tqdm import tqdm +from huggingface_hub import snapshot_download + +from gradio import Markdown + +all_key_names = ['C major', 'G major', 'D major', 'A major', + 'E major', 'B major', 'F major', 'Bb major', + 'Eb major', 'Ab major', 'Db major', 'Gb major', + 'A minor', 'E minor', 'B minor', 'F# minor', + 'C# minor', 'G# minor', 'D minor', 'G minor', + 'C minor', 'F minor', 'Bb minor', 'Eb minor', + ] + +traspose_key_dic = { + 'F major' : -7, + 'Gb major' : -6, + 'G major' : -5, + 'Ab major' : -4, + 'A major' : -3, + 'Bb major' : -2, + 'B major' : -1, + 'C major' : 0, + 'Db major' : 1, + 'D major' : 2, + 'Eb major' : 3, + 'E major' : 4, + 'D minor' : -7, + 'Eb minor' : -6, + 'E minor' : -5, + 'F minor' : -4, + 'F# minor' : -3, + 'G minor' : -2, + 'G# minor' : -1, + 'A minor' : 0, + 'Bb minor' : 1, + 'B minor' : 2, + 'C minor' : 3, + 'C# minor' : 4 +} + +flatsharpDic = { + 'Db':'C#', + 'Eb':'D#', + 'Gb':'F#', + 'Ab':'G#', + 'Bb':'A#' +} + +max_conseq_N = 0 +max_conseq_chord = 2 +tempo = 120 +duration = 2 + +min_loudness = 0 # Minimum loudness level in the input range +max_loudness = 50 # Maximum loudness level in the input range +min_velocity = 49 # Minimum velocity value in the output range +max_velocity = 112 # Maximum velocity value in the output range + + +def split_video_into_frames(video, frame_dir): + output_path = os.path.join(frame_dir, f"%03d.jpg") + cmd = f"ffmpeg -i {video} -vf \"select=bitor(gte(t-prev_selected_t\,1)\,isnan(prev_selected_t))\" -vsync 0 -qmin 1 -q:v 1 {output_path}" + subprocess.call(cmd, shell=True) + +def gen_semantic_feature(frame_dir, semantic_dir): + device = "cuda:0" if torch.cuda.is_available() else "cpu" + model, preprocess = clip.load("ViT-L/14@336px", device=device) + file_names = os.listdir(frame_dir) + sorted_file_names = sorted(file_names) + + output_path = semantic_dir / "semantic.npy" + features = torch.cuda.FloatTensor(len(sorted_file_names), 768).fill_(0) + + for idx, file_name in enumerate(sorted_file_names): + fpath = frame_dir / file_name + image = preprocess(Image.open(fpath)).unsqueeze(0).to(device) + with torch.no_grad(): + image_features = model.encode_image(image) + features[idx] = image_features[0] + features = features.cpu().numpy() + np.save(output_path, features) + +def gen_emotion_feature(frame_dir, emotion_dir): + device = "cuda:0" if torch.cuda.is_available() else "cpu" + model, preprocess = clip.load("ViT-L/14@336px", device=device) + text = clip.tokenize(["exciting", "fearful", "tense", "sad", "relaxing", "neutral"]).to(device) + + file_names = os.listdir(frame_dir) + sorted_file_names = sorted(file_names) + output_path = emotion_dir / "emotion.lab" + + emolist = [] + for file_name in sorted_file_names: + fpath = frame_dir / file_name + image = preprocess(Image.open(fpath)).unsqueeze(0).to(device) + with torch.no_grad(): + logits_per_image, logits_per_text = model(image, text) + probs = logits_per_image.softmax(dim=-1).cpu().numpy() + + fp1 = format(probs[0][0], ".4f") + fp2 = format(probs[0][1], ".4f") + fp3 = format(probs[0][2], ".4f") + fp4 = format(probs[0][3], ".4f") + fp5 = format(probs[0][4], ".4f") + fp6 = format(probs[0][5], ".4f") + + emo_val = str(fp1) +" "+ str(fp2) +" "+ str(fp3) +" "+ str(fp4) +" "+ str(fp5) + " " + str(fp6) + emolist.append(emo_val) + + with open(output_path ,'w' ,encoding = 'utf-8') as f: + f.write("time exciting_prob fearful_prob tense_prob sad_prob relaxing_prob neutral_prob\n") + for i in range(0, len(emolist) ): + f.write(str(i) + " "+emolist[i]+"\n") + +def gen_scene_feature(video, scene_dir): + video_stream = open_video(str(video)) + + scene_manager = SceneManager() + scene_manager.add_detector(AdaptiveDetector()) + scene_manager.detect_scenes(video_stream, show_progress=False) + scene_list = scene_manager.get_scene_list() + + sec = 0 + scenedict = {} + for idx, scene in enumerate(scene_list): + end_int = math.ceil(scene[1].get_seconds()) + for s in range (sec, end_int): + scenedict[s] = str(idx) + sec += 1 + + fpathname = scene_dir / "scene.lab" + with open(fpathname,'w',encoding = 'utf-8') as f: + for i in range(0, len(scenedict)): + f.write(str(i) + " "+scenedict[i]+"\n") + +def gen_scene_offset_feature(scene_dir, scene_offset_dir): + src = scene_dir / "scene.lab" + tgt = scene_offset_dir / "scene_offset.lab" + + id_list = [] + with open(src, encoding = 'utf-8') as f: + for line in f: + line = line.strip() + line_arr = line.split(" ") + if len(line_arr) == 2 : + time = int(line_arr[0]) + scene_id = int(line_arr[1]) + id_list.append(scene_id) + + offset_list = [] + current_id = id_list[0] + offset = 0 + for i in range(len(id_list)): + if id_list[i] != current_id: + current_id = id_list[i] + offset = 0 + offset_list.append(offset) + offset += 1 + + with open(tgt,'w',encoding = 'utf-8') as f: + for i in range(0, len(offset_list)): + f.write(str(i) + " " + str(offset_list[i]) + "\n") + +def gen_motion_feature(video, motion_dir): + cap = cv2.VideoCapture(str(video)) + prev_frame = None + prev_time = 0 + motion_value = 0 + motiondict = {} + + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + curr_time = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0 + motiondict[0] = "0.0000" + if prev_frame is not None and curr_time - prev_time >= 1: + diff = cv2.absdiff(frame, prev_frame) + diff_rgb = cv2.cvtColor(diff, cv2.COLOR_BGR2RGB) + motion_value = diff_rgb.mean() + motion_value = format(motion_value, ".4f") + motiondict[int(curr_time)] = str(motion_value) + prev_time = int(curr_time) + prev_frame = frame.copy() + cap.release() + cv2.destroyAllWindows() + fpathname = motion_dir / "motion.lab" + + with open(fpathname,'w',encoding = 'utf-8') as f: + for i in range(0, len(motiondict)): + f.write(str(i) + " "+motiondict[i]+"\n") + + +# def get_motion_feature(scene_dir, scene_offset_dir): +# fpath_emotion = emotion_dir / "emotion.lab" +# fpath_motion = motion_dir / "motion.lab" + +def get_scene_offset_feature(scene_offset_dir, max_seq_chord=300, max_seq_video=300): + feature_scene_offset = np.empty(max_seq_video) + feature_scene_offset.fill(SCENE_OFFSET_PAD) + fpath_scene_offset = scene_offset_dir / "scene_offset.lab" + + with open(fpath_scene_offset, encoding = 'utf-8') as f: + for line in f: + line = line.strip() + line_arr = line.split(" ") + time = line_arr[0] + time = int(time) + if time >= max_seq_chord: + break + sceneID = line_arr[1] + feature_scene_offset[time] = int(sceneID)+1 + + feature_scene_offset = torch.from_numpy(feature_scene_offset) + feature_scene_offset = feature_scene_offset.to(torch.float32) + + return feature_scene_offset + +def get_motion_feature(motion_dir, max_seq_chord=300, max_seq_video=300): + fpath_motion = motion_dir / "motion.lab" + feature_motion = np.empty(max_seq_video) + feature_motion.fill(MOTION_PAD) + with open(fpath_motion, encoding = 'utf-8') as f: + for line in f: + line = line.strip() + line_arr = line.split(" ") + time = line_arr[0] + time = int(time) + if time >= max_seq_chord: + break + motion = line_arr[1] + feature_motion[time] = float(motion) + + feature_motion = torch.from_numpy(feature_motion) + feature_motion = feature_motion.to(torch.float32) + return feature_motion + +def get_emotion_feature(emotion_dir, max_seq_chord=300, max_seq_video=300): + fpath_emotion = emotion_dir / "emotion.lab" + feature_emotion = np.empty((max_seq_video, 6)) + feature_emotion.fill(EMOTION_PAD) + + with open(fpath_emotion, encoding = 'utf-8') as f: + for line in f: + line = line.strip() + line_arr = line.split(" ") + if line_arr[0] == "time": + continue + time = line_arr[0] + time = int(time) + if time >= max_seq_chord: + break + emo1, emo2, emo3, emo4, emo5, emo6 = \ + line_arr[1],line_arr[2],line_arr[3],line_arr[4],line_arr[5],line_arr[6] + emoList = [ float(emo1), float(emo2), float(emo3), float(emo4), float(emo5), float(emo6) ] + emoList = np.array(emoList) + feature_emotion[time] = emoList + + feature_emotion = torch.from_numpy(feature_emotion) + feature_emotion = feature_emotion.to(torch.float32) + return feature_emotion + +def get_semantic_feature(semantic_dir, max_seq_chord=300, max_seq_video=300): + fpath_semantic = semantic_dir / "semantic.npy" + + video_feature = np.load(fpath_semantic) + dim_vf = video_feature.shape[1] + + video_feature_tensor = torch.from_numpy( video_feature ) + feature_semantic = torch.full((max_seq_video, dim_vf,), SEMANTIC_PAD , dtype=torch.float32, device=torch.device("cpu")) + + if(video_feature_tensor.shape[0] < max_seq_video): + feature_semantic[:video_feature_tensor.shape[0]] = video_feature_tensor + else: + feature_semantic = video_feature_tensor[:max_seq_video] + + return feature_semantic + + +def text_clip(text: str, duration: int, start_time: int = 0): + t = TextClip(text, font='Georgia-Regular', fontsize=24, color='white') + t = t.set_position(("center", 20)).set_duration(duration) + t = t.set_start(start_time) + return t + +def convert_format_id_to_offset(id_list): + offset_list = [] + current_id = id_list[0] + offset = 0 + for i in range(len(id_list)): + if id_list[i] != current_id: + current_id = id_list[i] + offset = 0 + offset_list.append(offset) + offset += 1 + return offset_list + + +class Video2music: + def __init__( + self, + name="amaai-lab/video2music", + device="cuda:0", + cache_dir=None, + local_files_only=False, + ): + # path = snapshot_download(repo_id=name, cache_dir=cache_dir) + + self.device = device + + # self.model.device = device + # self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + # f"{path}/beats/microsoft-deberta-v3-large.pt" + + # self.model_weights = f"{path}/saved_models/AMT/best_loss_weights.pickle" + # self.modelReg_weights = f"{path}/saved_models/AMT/best_rmse_weights.pickle" + + self.model_weights = "saved_models/AMT/best_loss_weights.pickle" + self.modelReg_weights = "saved_models/AMT/best_rmse_weights.pickle" + + self.total_vf_dim = 776 + # 768 (sem) + 1 (mo) + 1 (scene) + 6 (emo) + self.max_seq_video = 300 + self.max_seq_chord = 300 + + self.model = VideoMusicTransformer(n_layers=6, num_heads=8, + d_model=512, dim_feedforward=1024, + max_sequence_midi=2048, max_sequence_video=300, + max_sequence_chord=300, total_vf_dim=self.total_vf_dim, rpr=RPR).to(device) + + self.model.load_state_dict(torch.load(self.model_weights, map_location=device)) + self.modelReg = VideoRegression(max_sequence_video=300, total_vf_dim=self.total_vf_dim, regModel= "bigru").to(device) + self.modelReg.load_state_dict(torch.load(self.modelReg_weights, map_location=device)) + + self.model.eval() + self.modelReg.eval() + + self.SF2_FILE = "default_sound_font.sf2" + + def generate(self, video, primer, key): + + feature_dir = Path("./feature") + output_dir = Path("./output") + if feature_dir.exists(): + shutil.rmtree(str(feature_dir)) + if output_dir.exists(): + shutil.rmtree(str(output_dir)) + + feature_dir.mkdir(parents=True) + output_dir.mkdir(parents=True) + + frame_dir = feature_dir / "vevo_frame" + + #video features + semantic_dir = feature_dir / "vevo_semantic" + emotion_dir = feature_dir / "vevo_emotion" + scene_dir = feature_dir / "vevo_scene" + scene_offset_dir = feature_dir / "vevo_scene_offset" + motion_dir = feature_dir / "vevo_motion" + + frame_dir.mkdir(parents=True) + semantic_dir.mkdir(parents=True) + emotion_dir.mkdir(parents=True) + scene_dir.mkdir(parents=True) + scene_offset_dir.mkdir(parents=True) + motion_dir.mkdir(parents=True) + + #music features + chord_dir = feature_dir / "vevo_chord" + loudness_dir = feature_dir / "vevo_loudness" + note_density_dir = feature_dir / "vevo_note_density" + + chord_dir.mkdir(parents=True) + loudness_dir.mkdir(parents=True) + note_density_dir.mkdir(parents=True) + + split_video_into_frames(video, frame_dir) + gen_semantic_feature(frame_dir, semantic_dir) + gen_emotion_feature(frame_dir, emotion_dir) + gen_scene_feature(video, scene_dir) + gen_scene_offset_feature(scene_dir, scene_offset_dir) + gen_motion_feature(video, motion_dir) + + feature_scene_offset = get_scene_offset_feature(scene_offset_dir) + feature_motion = get_motion_feature(motion_dir) + feature_emotion = get_emotion_feature(emotion_dir) + feature_semantic = get_semantic_feature(semantic_dir) + + # cuda + feature_scene_offset = feature_scene_offset.to(self.device) + feature_motion = feature_motion.to(self.device) + feature_emotion = feature_emotion.to(self.device) + + feature_scene_offset = feature_scene_offset.unsqueeze(0) + feature_motion = feature_motion.unsqueeze(0) + feature_emotion = feature_emotion.unsqueeze(0) + + feature_semantic = feature_semantic.to(self.device) + feature_semantic_list = [] + feature_semantic = torch.unsqueeze(feature_semantic, 0) + feature_semantic_list.append( feature_semantic.to(self.device) ) + #feature_semantic_list.append( feature_semantic ) + + if "major" in key: + feature_key = torch.tensor([0]) + feature_key = feature_key.float() + elif "minor" in key: + feature_key = torch.tensor([1]) + feature_key = feature_key.float() + + feature_key = feature_key.to(self.device) + + with open('dataset/vevo_meta/chord.json') as json_file: + chordDic = json.load(json_file) + with open('dataset/vevo_meta/chord_inv.json') as json_file: + chordInvDic = json.load(json_file) + with open('dataset/vevo_meta/chord_root.json') as json_file: + chordRootDic = json.load(json_file) + with open('dataset/vevo_meta/chord_attr.json') as json_file: + chordAttrDic = json.load(json_file) + + if primer.strip() == "": + if "major" in key: + primer = "C" + else: + primer = "Am" + + pChordList = primer.split(" ") + + primerCID = [] + primerCID_root = [] + primerCID_attr = [] + + for pChord in pChordList: + if len(pChord) > 1: + if pChord[1] == "b": + pChord = flatsharpDic [ pChord[0:2] ] + pChord[2:] + type_idx = 0 + if pChord[1] == "#": + pChord = pChord[0:2] + ":" + pChord[2:] + type_idx = 2 + else: + pChord = pChord[0:1] + ":" + pChord[1:] + type_idx = 1 + if pChord[type_idx+1:] == "m": + pChord = pChord[0:type_idx] + ":min" + if pChord[type_idx+1:] == "m6": + pChord = pChord[0:type_idx] + ":min6" + if pChord[type_idx+1:] == "m7": + pChord = pChord[0:type_idx] + ":min7" + if pChord[type_idx+1:] == "M6": + pChord = pChord[0:type_idx] + ":maj6" + if pChord[type_idx+1:] == "M7": + pChord = pChord[0:type_idx] + ":maj7" + if pChord[type_idx+1:] == "": + pChord = pChord[0:type_idx] + + chordID = chordDic[pChord] + primerCID.append(chordID) + + chord_arr = pChord.split(":") + if len(chord_arr) == 1: + chordRootID = chordRootDic[chord_arr[0]] + primerCID_root.append(chordRootID) + primerCID_attr.append(0) + elif len(chord_arr) == 2: + chordRootID = chordRootDic[chord_arr[0]] + chordAttrID = chordAttrDic[chord_arr[1]] + primerCID_root.append(chordRootID) + primerCID_attr.append(chordAttrID) + + primerCID = np.array(primerCID) + primerCID = torch.from_numpy(primerCID) + primerCID = primerCID.to(torch.long) + primerCID = primerCID.to(self.device) + + primerCID_root = np.array(primerCID_root) + primerCID_root = torch.from_numpy(primerCID_root) + primerCID_root = primerCID_root.to(torch.long) + primerCID_root = primerCID_root.to(self.device) + + primerCID_attr = np.array(primerCID_attr) + primerCID_attr = torch.from_numpy(primerCID_attr) + primerCID_attr = primerCID_attr.to(torch.long) + primerCID_attr = primerCID_attr.to(self.device) + + # self.model.eval() + # self.modelReg.eval() + + with torch.set_grad_enabled(False): + rand_seq = self.model.generate(feature_semantic_list=feature_semantic_list, + feature_key=feature_key, + feature_scene_offset=feature_scene_offset, + feature_motion=feature_motion, + feature_emotion=feature_emotion, + primer = primerCID, + primer_root = primerCID_root, + primer_attr = primerCID_attr, + target_seq_length = 300, + beam=0, + max_conseq_N= max_conseq_N, + max_conseq_chord = max_conseq_chord) + + y = self.modelReg( + feature_semantic_list, + feature_scene_offset, + feature_motion, + feature_emotion) + + y = y.reshape(y.shape[0] * y.shape[1], -1) + + y_note_density, y_loudness = torch.split(y, split_size_or_sections=1, dim=1) + y_note_density_np = y_note_density.cpu().numpy() + y_note_density_np = np.round(y_note_density_np).astype(int) + y_note_density_np = np.clip(y_note_density_np, 0, 40) + + y_loudness_np = y_loudness.cpu().numpy() + y_loudness_np_lv = (y_loudness_np * 100).astype(int) + y_loudness_np_lv = np.clip(y_loudness_np_lv, 0, 50) + velolistExp = [] + exponent = 0.3 + for item in y_loudness_np_lv: + loudness = item[0] + velocity_exp = np.round(((loudness - min_loudness) / (max_loudness - min_loudness)) ** exponent * (max_velocity - min_velocity) + min_velocity) + velocity_exp = int(velocity_exp) + velolistExp.append(velocity_exp) + + densitylist = [] + for item in y_loudness_np_lv: + density = item[0] + if density <= 6: + densitylist.append(0) + elif density <= 12: + densitylist.append(1) + elif density <= 18: + densitylist.append(2) + elif density <= 24: + densitylist.append(3) + else: + densitylist.append(4) + + # generated ChordID to ChordSymbol + chord_genlist = [] + chordID_genlist= rand_seq[0].cpu().numpy() + for i in chordID_genlist: + chord_genlist.append(chordInvDic[str(i)]) + + chord_offsetlist = convert_format_id_to_offset(chord_genlist) + f_path_midi = output_dir / "output.mid" + f_path_flac = output_dir / "output.flac" + f_path_video_out = output_dir / "output.mp4" + + # ChordSymbol to MIDI file with voicing + MIDI = MIDIFile(1) + MIDI.addTempo(0, 0, tempo) + midi_chords_orginal = [] + for i, k in enumerate(chord_genlist): + k = k.replace(":", "") + if k == "N": + midi_chords_orginal.append([]) + else: + midi_chords_orginal.append(Chord(k).getMIDI("c", 4)) + midi_chords = voice(midi_chords_orginal) + trans = traspose_key_dic[key] + + for i, chord in enumerate(midi_chords): + if densitylist[i] == 0: + if len(chord) >= 4: + if chord_offsetlist[i] % 2 == 0: + MIDI.addNote(0, 0, chord[0]+trans, i * duration + 0 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[1]+trans, i * duration + 1 , duration, velolistExp[i]) + else: + MIDI.addNote(0, 0, chord[2]+trans, i * duration + 0 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[3]+trans, i * duration + 1 , duration, velolistExp[i]) + elif densitylist[i] == 1: + if len(chord) >= 4: + if chord_offsetlist[i] % 2 == 0: + MIDI.addNote(0, 0, chord[0]+trans, i * duration + 0 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[1]+trans, i * duration + 0.5 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[2]+trans, i * duration + 1 , duration, velolistExp[i]) + else: + MIDI.addNote(0, 0, chord[3]+trans, i * duration + 0 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[1]+trans, i * duration + 0.5 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[2]+trans, i * duration + 1 , duration, velolistExp[i]) + elif densitylist[i] == 2: + if len(chord) >= 4: + if chord_offsetlist[i] % 2 == 0: + MIDI.addNote(0, 0, chord[0]+trans, i * duration + 0 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[1]+trans, i * duration + 0.5 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[2]+trans, i * duration + 1 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[3]+trans, i * duration + 1.5 , duration, velolistExp[i]) + else: + MIDI.addNote(0, 0, chord[2]+trans, i * duration + 0 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[1]+trans, i * duration + 0.5 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[2]+trans, i * duration + 1 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[3]+trans, i * duration + 1.5 , duration, velolistExp[i]) + elif densitylist[i] == 3: + if len(chord) >= 4: + if chord_offsetlist[i] % 2 == 0: + MIDI.addNote(0, 0, chord[0]+trans, i * duration + 0 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[1]+trans, i * duration + 0.25 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[2]+trans, i * duration + 0.5 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[1]+trans, i * duration + 0.75 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[3]+trans, i * duration + 1 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[2]+trans, i * duration + 1.5 , duration, velolistExp[i]) + else: + MIDI.addNote(0, 0, chord[1]+trans, i * duration + 0 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[0]+trans, i * duration + 0.25 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[1]+trans, i * duration + 0.5 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[2]+trans, i * duration + 0.75 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[3]+trans, i * duration + 1 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[2]+trans, i * duration + 1.5 , duration, velolistExp[i]) + elif densitylist[i] == 4: + if len(chord) >= 4: + if chord_offsetlist[i] % 2 == 0: + MIDI.addNote(0, 0, chord[0]+trans, i * duration + 0 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[1]+trans, i * duration + 0.25 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[2]+trans, i * duration + 0.5 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[1]+trans, i * duration + 0.75 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[3]+trans, i * duration + 1 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[2]+trans, i * duration + 1.25 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[1]+trans, i * duration + 1.5 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[2]+trans, i * duration + 1.75 , duration, velolistExp[i]) + else: + MIDI.addNote(0, 0, chord[1]+trans, i * duration + 0 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[0]+trans, i * duration + 0.25 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[1]+trans, i * duration + 0.5 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[2]+trans, i * duration + 0.75 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[3]+trans, i * duration + 1 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[2]+trans, i * duration + 1.25 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[1]+trans, i * duration + 1.5 , duration, velolistExp[i]) + MIDI.addNote(0, 0, chord[2]+trans, i * duration + 1.75 , duration, velolistExp[i]) + + with open(f_path_midi, "wb") as outputFile: + MIDI.writeFile(outputFile) + + # Convert midi to audio (e.g., flac) + fs = FluidSynth(sound_font=self.SF2_FILE) + fs.midi_to_audio(str(f_path_midi), str(f_path_flac)) + + # Render generated music into input video + audio_mp = mp.AudioFileClip(str(f_path_flac)) + video_mp = mp.VideoFileClip(str(video)) + + audio_mp = audio_mp.subclip(0, video_mp.duration ) + final = video_mp.set_audio(audio_mp) + + final.write_videofile(str(f_path_video_out), + codec='libx264', + audio_codec='aac', + temp_audiofile='temp-audio.m4a', + remove_temp=True + ) + return Path(str(f_path_video_out)) + + +# Initialize Mustango +if torch.cuda.is_available(): + video2music = Video2music() +else: + video2music = Video2music(device="cpu") + + +def gradio_generate(input_video, input_primer, input_key): + output_filename = video2music.generate(input_video, input_primer, input_key) + return str(output_filename) + + +title="Video2Music: Suitable Music Generation from Videos using an Affective Multimodal Transformer model" +description_text = """ +

+Generate background music using Video2Music by providing an input video. +

This is the demo for Video2Music: Suitable Music Generation from Videos using an Affective Multimodal Transformer model +Read our paper. +

+""" +input_video = gr.Video(label="Input Video") +input_primer = gr.Textbox(label="Input Primer", value="C Am F G") +input_key = gr.Dropdown(choices=["C major", "A minor"], value="C major", label="Input Key") +output_video = gr.Video(label="Output Video") + +css = ''' +#duplicate-button { +margin: auto; +color: white; +background: #1565c0; +border-radius: 100vh; +} +''' + +# Gradio interface +gr_interface = gr.Interface( + fn=gradio_generate, + inputs=[input_video, input_primer, input_key ], + outputs=[output_video], + description=description_text, + allow_flagging='never', + cache_examples=True, +) + + +# with gr.Blocks() as demo: +with gr.Blocks(css=css) as demo: + title=gr.HTML(f"

{title}

") + gr_interface.render() + +#demo.queue() +# demo.launch(debug=True) + +demo.queue().launch() -iface = gr.Interface(fn=greet, inputs="text", outputs="text") -iface.launch() \ No newline at end of file diff --git a/dataset/README.md b/dataset/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8665e83e25d91c7bce9b6351679a6866b152df11 --- /dev/null +++ b/dataset/README.md @@ -0,0 +1,31 @@ +# MuVi-Sync Dataset + +- Dataset (MuVi-Sync) + * MuVi-Sync (features) [(Link)](https://zenodo.org/records/10057093) + * MuVi-Sync (original video) [(Link)](https://zenodo.org/records/10050294) + +## Overview +Welcome to the MuVi-Sync dataset! This collection provides a rich array of features for both music and video elements. Here's a breakdown of the directory structure: + +### Music Features +- **vevo_chord:** Chord feature data +- **vevo_note_density:** Note density feature data +- **vevo_loudness:** Loudness feature data + +### Video Features +- **vevo_scene_offset:** Scene offset feature data +- **vevo_emotion:** Emotion feature data + - *5c_l14p:* 5 emotion categories (exciting, fearful, tense, sad, relaxing) + - *6c_l14p:* 6 emotion categories (exciting, fearful, tense, sad, relaxing, neutral) +- **vevo_semantic:** Semantic feature +- **vevo_motion:** Motion feature + +### Others +- **vevo_meta:** + - *idlist.txt:* List of features, titles, and YouTube IDs +- **vevo:** Original video files (.mp4) + +Explore and utilize this dataset for innovative research and applications. + +For more details, refer to our [GitHub repository](https://github.com/AMAAI-Lab/Video2Music). + diff --git a/dataset/__pycache__/vevo_dataset.cpython-37.pyc b/dataset/__pycache__/vevo_dataset.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ba60ba07fae3faba4dfc512743065182986e39f0 Binary files /dev/null and b/dataset/__pycache__/vevo_dataset.cpython-37.pyc differ diff --git a/dataset/vevo_dataset.py b/dataset/vevo_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..0373f5e15120481da4ea524d254ba121000e762a --- /dev/null +++ b/dataset/vevo_dataset.py @@ -0,0 +1,720 @@ +import os +import pickle +import random +import torch +import torch.nn as nn +import numpy as np + +from torch.utils.data import Dataset +from utilities.constants import * +from utilities.device import cpu_device +from utilities.device import get_device + +import json + +SEQUENCE_START = 0 + +class VevoDataset(Dataset): + def __init__(self, dataset_root = "./dataset/", split="train", split_ver="v1", vis_models="2d/clip_l14p", emo_model="6c_l14p", max_seq_chord=300, max_seq_video=300, random_seq=True, is_video = True): + + self.dataset_root = dataset_root + + self.vevo_chord_root = os.path.join( dataset_root, "vevo_chord", "lab_v2_norm", "all") + self.vevo_emotion_root = os.path.join( dataset_root, "vevo_emotion", emo_model, "all") + self.vevo_motion_root = os.path.join( dataset_root, "vevo_motion", "all") + self.vevo_scene_offset_root = os.path.join( dataset_root, "vevo_scene_offset", "all") + self.vevo_meta_split_path = os.path.join( dataset_root, "vevo_meta", "split", split_ver, split + ".txt") + + self.vevo_loudness_root = os.path.join( dataset_root, "vevo_loudness", "all") + self.vevo_note_density_root = os.path.join( dataset_root, "vevo_note_density", "all") + + self.max_seq_video = max_seq_video + self.max_seq_chord = max_seq_chord + self.random_seq = random_seq + self.is_video = is_video + + self.vis_models_arr = vis_models.split(" ") + self.vevo_semantic_root_list = [] + self.id_list = [] + + self.emo_model = emo_model + + if IS_VIDEO: + for i in range( len(self.vis_models_arr) ): + p1 = self.vis_models_arr[i].split("/")[0] + p2 = self.vis_models_arr[i].split("/")[1] + vevo_semantic_root = os.path.join(dataset_root, "vevo_semantic" , "all" , p1, p2) + self.vevo_semantic_root_list.append( vevo_semantic_root ) + + with open( self.vevo_meta_split_path ) as f: + for line in f: + self.id_list.append(line.strip()) + + self.data_files_chord = [] + self.data_files_emotion = [] + self.data_files_motion = [] + self.data_files_scene_offset = [] + self.data_files_semantic_list = [] + + self.data_files_loudness = [] + self.data_files_note_density = [] + + for i in range(len(self.vis_models_arr)): + self.data_files_semantic_list.append([]) + + for fid in self.id_list: + fpath_chord = os.path.join( self.vevo_chord_root, fid + ".lab" ) + fpath_emotion = os.path.join( self.vevo_emotion_root, fid + ".lab" ) + fpath_motion = os.path.join( self.vevo_motion_root, fid + ".lab" ) + fpath_scene_offset = os.path.join( self.vevo_scene_offset_root, fid + ".lab" ) + + fpath_loudness = os.path.join( self.vevo_loudness_root, fid + ".lab" ) + fpath_note_density = os.path.join( self.vevo_note_density_root, fid + ".lab" ) + + fpath_semantic_list = [] + for vevo_semantic_root in self.vevo_semantic_root_list: + fpath_semantic = os.path.join( vevo_semantic_root, fid + ".npy" ) + fpath_semantic_list.append(fpath_semantic) + + checkFile_semantic = True + for fpath_semantic in fpath_semantic_list: + if not os.path.exists(fpath_semantic): + checkFile_semantic = False + + checkFile_chord = os.path.exists(fpath_chord) + checkFile_emotion = os.path.exists(fpath_emotion) + checkFile_motion = os.path.exists(fpath_motion) + checkFile_scene_offset = os.path.exists(fpath_scene_offset) + + checkFile_loudness = os.path.exists(fpath_loudness) + checkFile_note_density = os.path.exists(fpath_note_density) + + if checkFile_chord and checkFile_emotion and checkFile_motion \ + and checkFile_scene_offset and checkFile_semantic and checkFile_loudness and checkFile_note_density : + + self.data_files_chord.append(fpath_chord) + self.data_files_emotion.append(fpath_emotion) + self.data_files_motion.append(fpath_motion) + self.data_files_scene_offset.append(fpath_scene_offset) + + self.data_files_loudness.append(fpath_loudness) + self.data_files_note_density.append(fpath_note_density) + + if IS_VIDEO: + for i in range(len(self.vis_models_arr)): + self.data_files_semantic_list[i].append( fpath_semantic_list[i] ) + + chordDicPath = os.path.join( dataset_root, "vevo_meta/chord.json") + + chordRootDicPath = os.path.join( dataset_root, "vevo_meta/chord_root.json") + chordAttrDicPath = os.path.join( dataset_root, "vevo_meta/chord_attr.json") + + with open(chordDicPath) as json_file: + self.chordDic = json.load(json_file) + + with open(chordRootDicPath) as json_file: + self.chordRootDic = json.load(json_file) + + with open(chordAttrDicPath) as json_file: + self.chordAttrDic = json.load(json_file) + + def __len__(self): + return len(self.data_files_chord) + + def __getitem__(self, idx): + #### ---- CHORD ----- #### + feature_chord = np.empty(self.max_seq_chord) + feature_chord.fill(CHORD_PAD) + + feature_chordRoot = np.empty(self.max_seq_chord) + feature_chordRoot.fill(CHORD_ROOT_PAD) + feature_chordAttr = np.empty(self.max_seq_chord) + feature_chordAttr.fill(CHORD_ATTR_PAD) + + key = "" + with open(self.data_files_chord[idx], encoding = 'utf-8') as f: + for line in f: + line = line.strip() + line_arr = line.split(" ") + if line_arr[0] == "key": + key = line_arr[1] + " "+ line_arr[2] + continue + time = line_arr[0] + time = int(time) + if time >= self.max_seq_chord: + break + chord = line_arr[1] + chordID = self.chordDic[chord] + feature_chord[time] = chordID + chord_arr = chord.split(":") + + if len(chord_arr) == 1: + if chord_arr[0] == "N": + chordRootID = self.chordRootDic["N"] + chordAttrID = self.chordAttrDic["N"] + feature_chordRoot[time] = chordRootID + feature_chordAttr[time] = chordAttrID + else: + chordRootID = self.chordRootDic[chord_arr[0]] + feature_chordRoot[time] = chordRootID + feature_chordAttr[time] = 1 + elif len(chord_arr) == 2: + chordRootID = self.chordRootDic[chord_arr[0]] + chordAttrID = self.chordAttrDic[chord_arr[1]] + feature_chordRoot[time] = chordRootID + feature_chordAttr[time] = chordAttrID + + if "major" in key: + feature_key = torch.tensor([0]) + else: + feature_key = torch.tensor([1]) + + feature_chord = torch.from_numpy(feature_chord) + feature_chord = feature_chord.to(torch.long) + + feature_chordRoot = torch.from_numpy(feature_chordRoot) + feature_chordRoot = feature_chordRoot.to(torch.long) + + feature_chordAttr = torch.from_numpy(feature_chordAttr) + feature_chordAttr = feature_chordAttr.to(torch.long) + + feature_key = feature_key.float() + + x = feature_chord[:self.max_seq_chord-1] + tgt = feature_chord[1:self.max_seq_chord] + + x_root = feature_chordRoot[:self.max_seq_chord-1] + tgt_root = feature_chordRoot[1:self.max_seq_chord] + x_attr = feature_chordAttr[:self.max_seq_chord-1] + tgt_attr = feature_chordAttr[1:self.max_seq_chord] + + if time < self.max_seq_chord: + tgt[time] = CHORD_END + tgt_root[time] = CHORD_ROOT_END + tgt_attr[time] = CHORD_ATTR_END + + #### ---- SCENE OFFSET ----- #### + feature_scene_offset = np.empty(self.max_seq_video) + feature_scene_offset.fill(SCENE_OFFSET_PAD) + with open(self.data_files_scene_offset[idx], encoding = 'utf-8') as f: + for line in f: + line = line.strip() + line_arr = line.split(" ") + time = line_arr[0] + time = int(time) + if time >= self.max_seq_chord: + break + sceneID = line_arr[1] + feature_scene_offset[time] = int(sceneID)+1 + + feature_scene_offset = torch.from_numpy(feature_scene_offset) + feature_scene_offset = feature_scene_offset.to(torch.float32) + + #### ---- MOTION ----- #### + feature_motion = np.empty(self.max_seq_video) + feature_motion.fill(MOTION_PAD) + with open(self.data_files_motion[idx], encoding = 'utf-8') as f: + for line in f: + line = line.strip() + line_arr = line.split(" ") + time = line_arr[0] + time = int(time) + if time >= self.max_seq_chord: + break + motion = line_arr[1] + feature_motion[time] = float(motion) + + feature_motion = torch.from_numpy(feature_motion) + feature_motion = feature_motion.to(torch.float32) + + #### ---- NOTE_DENSITY ----- #### + feature_note_density = np.empty(self.max_seq_video) + feature_note_density.fill(NOTE_DENSITY_PAD) + with open(self.data_files_note_density[idx], encoding = 'utf-8') as f: + for line in f: + line = line.strip() + line_arr = line.split(" ") + time = line_arr[0] + time = int(time) + if time >= self.max_seq_chord: + break + note_density = line_arr[1] + feature_note_density[time] = float(note_density) + + feature_note_density = torch.from_numpy(feature_note_density) + feature_note_density = feature_note_density.to(torch.float32) + + #### ---- LOUDNESS ----- #### + feature_loudness = np.empty(self.max_seq_video) + feature_loudness.fill(LOUDNESS_PAD) + with open(self.data_files_loudness[idx], encoding = 'utf-8') as f: + for line in f: + line = line.strip() + line_arr = line.split(" ") + time = line_arr[0] + time = int(time) + if time >= self.max_seq_chord: + break + loudness = line_arr[1] + feature_loudness[time] = float(loudness) + + feature_loudness = torch.from_numpy(feature_loudness) + feature_loudness = feature_loudness.to(torch.float32) + + #### ---- EMOTION ----- #### + if self.emo_model.startswith("6c"): + feature_emotion = np.empty( (self.max_seq_video, 6)) + else: + feature_emotion = np.empty( (self.max_seq_video, 5)) + + feature_emotion.fill(EMOTION_PAD) + with open(self.data_files_emotion[idx], encoding = 'utf-8') as f: + for line in f: + line = line.strip() + line_arr = line.split(" ") + if line_arr[0] == "time": + continue + time = line_arr[0] + time = int(time) + if time >= self.max_seq_chord: + break + + if len(line_arr) == 7: + emo1, emo2, emo3, emo4, emo5, emo6 = \ + line_arr[1],line_arr[2],line_arr[3],line_arr[4],line_arr[5],line_arr[6] + emoList = [ float(emo1), float(emo2), float(emo3), float(emo4), float(emo5), float(emo6) ] + elif len(line_arr) == 6: + emo1, emo2, emo3, emo4, emo5 = \ + line_arr[1],line_arr[2],line_arr[3],line_arr[4],line_arr[5] + emoList = [ float(emo1), float(emo2), float(emo3), float(emo4), float(emo5) ] + + emoList = np.array(emoList) + feature_emotion[time] = emoList + + feature_emotion = torch.from_numpy(feature_emotion) + feature_emotion = feature_emotion.to(torch.float32) + + feature_emotion_argmax = torch.argmax(feature_emotion, dim=1) + _, max_prob_indices = torch.max(feature_emotion, dim=1) + max_prob_values = torch.gather(feature_emotion, dim=1, index=max_prob_indices.unsqueeze(1)) + max_prob_values = max_prob_values.squeeze() + + # -- emotion to chord + # maj dim sus4 min7 min sus2 aug dim7 maj6 hdim7 7 min6 maj7 + # 0. extcing : [1,0,1,0,0,0,0,0,0,0,1,0,0] + # 1. fearful : [0,1,0,1,0,0,0,1,0,1,0,0,0] + # 2. tense : [0,1,1,1,0,0,0,0,0,0,1,0,0] + # 3. sad : [0,0,0,1,1,1,0,0,0,0,0,0,0] + # 4. relaxing: [1,0,0,0,0,0,0,0,1,0,0,0,1] + # 5. neutral : [0,0,0,0,0,0,0,0,0,0,0,0,0] + + a0 = [0]+[1,0,1,0,0,0,0,0,0,0,1,0,0]*12+[0,0] + a1 = [0]+[0,1,0,1,0,0,0,1,0,1,0,0,0]*12+[0,0] + a2 = [0]+[0,1,1,1,0,0,0,0,0,0,1,0,0]*12+[0,0] + a3 = [0]+[0,0,0,1,1,1,0,0,0,0,0,0,0]*12+[0,0] + a4 = [0]+[1,0,0,0,0,0,0,0,1,0,0,0,1]*12+[0,0] + a5 = [0]+[0,0,0,0,0,0,0,0,0,0,0,0,0]*12+[0,0] + + aend = [0]+[0,0,0,0,0,0,0,0,0,0,0,0,0]*12+[1,0] + apad = [0]+[0,0,0,0,0,0,0,0,0,0,0,0,0]*12+[0,1] + + a0_tensor = torch.tensor(a0) + a1_tensor = torch.tensor(a1) + a2_tensor = torch.tensor(a2) + a3_tensor = torch.tensor(a3) + a4_tensor = torch.tensor(a4) + a5_tensor = torch.tensor(a5) + + aend_tensor = torch.tensor(aend) + apad_tensor = torch.tensor(apad) + + mapped_tensor = torch.zeros((300, 159)) + for i, val in enumerate(feature_emotion_argmax): + if feature_chord[i] == CHORD_PAD: + mapped_tensor[i] = apad_tensor + elif feature_chord[i] == CHORD_END: + mapped_tensor[i] = aend_tensor + elif val == 0: + mapped_tensor[i] = a0_tensor + elif val == 1: + mapped_tensor[i] = a1_tensor + elif val == 2: + mapped_tensor[i] = a2_tensor + elif val == 3: + mapped_tensor[i] = a3_tensor + elif val == 4: + mapped_tensor[i] = a4_tensor + elif val == 5: + mapped_tensor[i] = a5_tensor + + # feature emotion : [1, 300, 6] + # y : [299, 159] + # tgt : [299] + # tgt_emo : [299, 159] + # tgt_emo_prob : [299] + + tgt_emotion = mapped_tensor[1:] + tgt_emotion_prob = max_prob_values[1:] + + feature_semantic_list = [] + if self.is_video: + for i in range( len(self.vis_models_arr) ): + video_feature = np.load(self.data_files_semantic_list[i][idx]) + dim_vf = video_feature.shape[1] # 2048 + video_feature_tensor = torch.from_numpy( video_feature ) + + feature_semantic = torch.full((self.max_seq_video, dim_vf,), SEMANTIC_PAD , dtype=torch.float32, device=cpu_device()) + if(video_feature_tensor.shape[0] < self.max_seq_video): + feature_semantic[:video_feature_tensor.shape[0]] = video_feature_tensor + else: + feature_semantic = video_feature_tensor[:self.max_seq_video] + feature_semantic_list.append(feature_semantic) + + return { "x":x, + "tgt":tgt, + "x_root":x_root, + "tgt_root":tgt_root, + "x_attr":x_attr, + "tgt_attr":tgt_attr, + "semanticList": feature_semantic_list, + "key": feature_key, + "scene_offset": feature_scene_offset, + "motion": feature_motion, + "emotion": feature_emotion, + "tgt_emotion" : tgt_emotion, + "tgt_emotion_prob" : tgt_emotion_prob, + "note_density" : feature_note_density, + "loudness" : feature_loudness + } + +def create_vevo_datasets(dataset_root = "./dataset", max_seq_chord=300, max_seq_video=300, vis_models="2d/clip_l14p", emo_model="6c_l14p", split_ver="v1", random_seq=True, is_video=True): + + train_dataset = VevoDataset( + dataset_root = dataset_root, split="train", split_ver=split_ver, + vis_models=vis_models, emo_model =emo_model, max_seq_chord=max_seq_chord, max_seq_video=max_seq_video, + random_seq=random_seq, is_video = is_video ) + + val_dataset = VevoDataset( + dataset_root = dataset_root, split="val", split_ver=split_ver, + vis_models=vis_models, emo_model =emo_model, max_seq_chord=max_seq_chord, max_seq_video=max_seq_video, + random_seq=random_seq, is_video = is_video ) + + test_dataset = VevoDataset( + dataset_root = dataset_root, split="test", split_ver=split_ver, + vis_models=vis_models, emo_model =emo_model, max_seq_chord=max_seq_chord, max_seq_video=max_seq_video, + random_seq=random_seq, is_video = is_video ) + + return train_dataset, val_dataset, test_dataset + +def compute_vevo_accuracy(out, tgt): + softmax = nn.Softmax(dim=-1) + out = torch.argmax(softmax(out), dim=-1) + + out = out.flatten() + tgt = tgt.flatten() + + mask = (tgt != CHORD_PAD) + + out = out[mask] + tgt = tgt[mask] + + if(len(tgt) == 0): + return 1.0 + + num_right = (out == tgt) + num_right = torch.sum(num_right).type(TORCH_FLOAT) + + acc = num_right / len(tgt) + + return acc + +def compute_hits_k(out, tgt, k): + softmax = nn.Softmax(dim=-1) + out = softmax(out) + _, topk_indices = torch.topk(out, k, dim=-1) # Get the indices of top-k values + + tgt = tgt.flatten() + + topk_indices = torch.squeeze(topk_indices, dim = 0) + + num_right = 0 + pt = 0 + for i, tlist in enumerate(topk_indices): + if tgt[i] == CHORD_PAD: + num_right += 0 + else: + pt += 1 + if tgt[i].item() in tlist: + num_right += 1 + + # Empty + if len(tgt) == 0: + return 1.0 + + num_right = torch.tensor(num_right, dtype=torch.float32) + hitk = num_right / pt + + return hitk + +def compute_hits_k_root_attr(out_root, out_attr, tgt, k): + softmax = nn.Softmax(dim=-1) + out_root = softmax(out_root) + out_attr = softmax(out_attr) + + tensor_shape = torch.Size([1, 299, 159]) + out = torch.zeros(tensor_shape) + for i in range(out.shape[-1]): + if i == 0 : + out[0, :, i] = out_root[0, :, 0] * out_attr[0, :, 0] + elif i == 157: + out[0, :, i] = out_root[0, :, 13] * out_attr[0, :, 14] + elif i == 158: + out[0, :, i] = out_root[0, :, 14] * out_attr[0, :, 15] + else: + rootindex = int( (i-1)/13 ) + 1 + attrindex = (i-1)%13 + 1 + out[0, :, i] = out_root[0, :, rootindex] * out_attr[0, :, attrindex] + + out = softmax(out) + _, topk_indices = torch.topk(out, k, dim=-1) # Get the indices of top-k values + + tgt = tgt.flatten() + + topk_indices = torch.squeeze(topk_indices, dim = 0) + + num_right = 0 + pt = 0 + for i, tlist in enumerate(topk_indices): + if tgt[i] == CHORD_PAD: + num_right += 0 + else: + pt += 1 + if tgt[i].item() in tlist: + num_right += 1 + + if len(tgt) == 0: + return 1.0 + + num_right = torch.tensor(num_right, dtype=torch.float32) + hitk = num_right / pt + + return hitk + +def compute_vevo_correspondence(out, tgt, tgt_emotion, tgt_emotion_prob, emotion_threshold): + + tgt_emotion = tgt_emotion.squeeze() + tgt_emotion_prob = tgt_emotion_prob.squeeze() + + dataset_root = "./dataset/" + chordRootInvDicPath = os.path.join( dataset_root, "vevo_meta/chord_root_inv.json") + chordAttrInvDicPath = os.path.join( dataset_root, "vevo_meta/chord_attr_inv.json") + chordAttrDicPath = os.path.join( dataset_root, "vevo_meta/chord_attr.json") + + chordDicPath = os.path.join( dataset_root, "vevo_meta/chord.json") + chordInvDicPath = os.path.join( dataset_root, "vevo_meta/chord_inv.json") + + with open(chordRootInvDicPath) as json_file: + chordRootInvDic = json.load(json_file) + with open(chordAttrDicPath) as json_file: + chordAttrDic = json.load(json_file) + with open(chordAttrInvDicPath) as json_file: + chordAttrInvDic = json.load(json_file) + with open(chordDicPath) as json_file: + chordDic = json.load(json_file) + with open(chordInvDicPath) as json_file: + chordInvDic = json.load(json_file) + + softmax = nn.Softmax(dim=-1) + out = torch.argmax(softmax(out), dim=-1) + out = out.flatten() + + tgt = tgt.flatten() + + num_right = 0 + tgt_emotion_quality = tgt_emotion[:, 0:14] + pt = 0 + for i, out_element in enumerate( out ): + + all_zeros = torch.all(tgt_emotion_quality[i] == 0) + if tgt_emotion[i][-1] == 1 or all_zeros or tgt_emotion_prob[i] < emotion_threshold: + num_right += 0 + else: + pt += 1 + if out_element.item() != CHORD_END and out_element.item() != CHORD_PAD: + gen_chord = chordInvDic[ str( out_element.item() ) ] + + chord_arr = gen_chord.split(":") + if len(chord_arr) == 1: + out_quality = 1 + elif len(chord_arr) == 2: + chordAttrID = chordAttrDic[chord_arr[1]] + out_quality = chordAttrID # 0:N, 1:maj ... 13:maj7 + + if tgt_emotion_quality[i][out_quality] == 1: + num_right += 1 + + + if(len(tgt_emotion) == 0): + return 1.0 + + if(pt == 0): + return -1 + + num_right = torch.tensor(num_right, dtype=torch.float32) + acc = num_right / pt + + return acc + +def compute_vevo_correspondence_root_attr(y_root, y_attr, tgt, tgt_emotion, tgt_emotion_prob, emotion_threshold): + + tgt_emotion = tgt_emotion.squeeze() + tgt_emotion_prob = tgt_emotion_prob.squeeze() + + dataset_root = "./dataset/" + chordRootInvDicPath = os.path.join( dataset_root, "vevo_meta/chord_root_inv.json") + chordAttrInvDicPath = os.path.join( dataset_root, "vevo_meta/chord_attr_inv.json") + chordAttrDicPath = os.path.join( dataset_root, "vevo_meta/chord_attr.json") + + chordDicPath = os.path.join( dataset_root, "vevo_meta/chord.json") + chordInvDicPath = os.path.join( dataset_root, "vevo_meta/chord_inv.json") + + with open(chordRootInvDicPath) as json_file: + chordRootInvDic = json.load(json_file) + with open(chordAttrDicPath) as json_file: + chordAttrDic = json.load(json_file) + with open(chordAttrInvDicPath) as json_file: + chordAttrInvDic = json.load(json_file) + with open(chordDicPath) as json_file: + chordDic = json.load(json_file) + with open(chordInvDicPath) as json_file: + chordInvDic = json.load(json_file) + + softmax = nn.Softmax(dim=-1) + + y_root = torch.argmax(softmax(y_root), dim=-1) + y_attr = torch.argmax(softmax(y_attr), dim=-1) + + y_root = y_root.flatten() + y_attr = y_attr.flatten() + + tgt = tgt.flatten() + y = np.empty( len(tgt) ) + + y.fill(CHORD_PAD) + + for i in range(len(tgt)): + if y_root[i].item() == CHORD_ROOT_PAD or y_attr[i].item() == CHORD_ATTR_PAD: + y[i] = CHORD_PAD + elif y_root[i].item() == CHORD_ROOT_END or y_attr[i].item() == CHORD_ATTR_END: + y[i] = CHORD_END + else: + chordRoot = chordRootInvDic[str(y_root[i].item())] + chordAttr = chordAttrInvDic[str(y_attr[i].item())] + if chordRoot == "N": + y[i] = 0 + else: + if chordAttr == "N" or chordAttr == "maj": + y[i] = chordDic[chordRoot] + else: + chord = chordRoot + ":" + chordAttr + y[i] = chordDic[chord] + + y = torch.from_numpy(y) + y = y.to(torch.long) + y = y.to(get_device()) + y = y.flatten() + + num_right = 0 + tgt_emotion_quality = tgt_emotion[:, 0:14] + pt = 0 + for i, y_element in enumerate( y ): + all_zeros = torch.all(tgt_emotion_quality[i] == 0) + if tgt_emotion[i][-1] == 1 or all_zeros or tgt_emotion_prob[i] < emotion_threshold: + num_right += 0 + else: + pt += 1 + if y_element.item() != CHORD_END and y_element.item() != CHORD_PAD: + gen_chord = chordInvDic[ str( y_element.item() ) ] + chord_arr = gen_chord.split(":") + if len(chord_arr) == 1: + y_quality = 1 + elif len(chord_arr) == 2: + chordAttrID = chordAttrDic[chord_arr[1]] + y_quality = chordAttrID # 0:N, 1:maj ... 13:maj7 + + if tgt_emotion_quality[i][y_quality] == 1: + num_right += 1 + + if(len(tgt_emotion) == 0): + return 1.0 + + if(pt == 0): + return -1 + + num_right = torch.tensor(num_right, dtype=torch.float32) + acc = num_right / pt + return acc + +def compute_vevo_accuracy_root_attr(y_root, y_attr, tgt): + + dataset_root = "./dataset/" + chordRootInvDicPath = os.path.join( dataset_root, "vevo_meta/chord_root_inv.json") + chordAttrInvDicPath = os.path.join( dataset_root, "vevo_meta/chord_attr_inv.json") + chordDicPath = os.path.join( dataset_root, "vevo_meta/chord.json") + + with open(chordRootInvDicPath) as json_file: + chordRootInvDic = json.load(json_file) + with open(chordAttrInvDicPath) as json_file: + chordAttrInvDic = json.load(json_file) + with open(chordDicPath) as json_file: + chordDic = json.load(json_file) + + softmax = nn.Softmax(dim=-1) + + y_root = torch.argmax(softmax(y_root), dim=-1) + y_attr = torch.argmax(softmax(y_attr), dim=-1) + + y_root = y_root.flatten() + y_attr = y_attr.flatten() + + tgt = tgt.flatten() + + mask = (tgt != CHORD_PAD) + y = np.empty( len(tgt) ) + y.fill(CHORD_PAD) + + for i in range(len(tgt)): + if y_root[i].item() == CHORD_ROOT_PAD or y_attr[i].item() == CHORD_ATTR_PAD: + y[i] = CHORD_PAD + elif y_root[i].item() == CHORD_ROOT_END or y_attr[i].item() == CHORD_ATTR_END: + y[i] = CHORD_END + else: + chordRoot = chordRootInvDic[str(y_root[i].item())] + chordAttr = chordAttrInvDic[str(y_attr[i].item())] + if chordRoot == "N": + y[i] = 0 + else: + if chordAttr == "N" or chordAttr == "maj": + y[i] = chordDic[chordRoot] + else: + chord = chordRoot + ":" + chordAttr + y[i] = chordDic[chord] + + y = torch.from_numpy(y) + y = y.to(torch.long) + y = y.to(get_device()) + + y = y[mask] + tgt = tgt[mask] + + # Empty + if(len(tgt) == 0): + return 1.0 + + num_right = (y == tgt) + num_right = torch.sum(num_right).type(TORCH_FLOAT) + + acc = num_right / len(tgt) + + return acc + diff --git a/dataset/vevo_meta/chord.json b/dataset/vevo_meta/chord.json new file mode 100644 index 0000000000000000000000000000000000000000..cafa2d8e0f8e842773de8b1e9fe9c0313342b4ae --- /dev/null +++ b/dataset/vevo_meta/chord.json @@ -0,0 +1 @@ +{"N": 0, "C": 1, "C:dim": 2, "C:sus4": 3, "C:min7": 4, "C:min": 5, "C:sus2": 6, "C:aug": 7, "C:dim7": 8, "C:maj6": 9, "C:hdim7": 10, "C:7": 11, "C:min6": 12, "C:maj7": 13, "C#": 14, "C#:dim": 15, "C#:sus4": 16, "C#:min7": 17, "C#:min": 18, "C#:sus2": 19, "C#:aug": 20, "C#:dim7": 21, "C#:maj6": 22, "C#:hdim7": 23, "C#:7": 24, "C#:min6": 25, "C#:maj7": 26, "D": 27, "D:dim": 28, "D:sus4": 29, "D:min7": 30, "D:min": 31, "D:sus2": 32, "D:aug": 33, "D:dim7": 34, "D:maj6": 35, "D:hdim7": 36, "D:7": 37, "D:min6": 38, "D:maj7": 39, "D#": 40, "D#:dim": 41, "D#:sus4": 42, "D#:min7": 43, "D#:min": 44, "D#:sus2": 45, "D#:aug": 46, "D#:dim7": 47, "D#:maj6": 48, "D#:hdim7": 49, "D#:7": 50, "D#:min6": 51, "D#:maj7": 52, "E": 53, "E:dim": 54, "E:sus4": 55, "E:min7": 56, "E:min": 57, "E:sus2": 58, "E:aug": 59, "E:dim7": 60, "E:maj6": 61, "E:hdim7": 62, "E:7": 63, "E:min6": 64, "E:maj7": 65, "F": 66, "F:dim": 67, "F:sus4": 68, "F:min7": 69, "F:min": 70, "F:sus2": 71, "F:aug": 72, "F:dim7": 73, "F:maj6": 74, "F:hdim7": 75, "F:7": 76, "F:min6": 77, "F:maj7": 78, "F#": 79, "F#:dim": 80, "F#:sus4": 81, "F#:min7": 82, "F#:min": 83, "F#:sus2": 84, "F#:aug": 85, "F#:dim7": 86, "F#:maj6": 87, "F#:hdim7": 88, "F#:7": 89, "F#:min6": 90, "F#:maj7": 91, "G": 92, "G:dim": 93, "G:sus4": 94, "G:min7": 95, "G:min": 96, "G:sus2": 97, "G:aug": 98, "G:dim7": 99, "G:maj6": 100, "G:hdim7": 101, "G:7": 102, "G:min6": 103, "G:maj7": 104, "G#": 105, "G#:dim": 106, "G#:sus4": 107, "G#:min7": 108, "G#:min": 109, "G#:sus2": 110, "G#:aug": 111, "G#:dim7": 112, "G#:maj6": 113, "G#:hdim7": 114, "G#:7": 115, "G#:min6": 116, "G#:maj7": 117, "A": 118, "A:dim": 119, "A:sus4": 120, "A:min7": 121, "A:min": 122, "A:sus2": 123, "A:aug": 124, "A:dim7": 125, "A:maj6": 126, "A:hdim7": 127, "A:7": 128, "A:min6": 129, "A:maj7": 130, "A#": 131, "A#:dim": 132, "A#:sus4": 133, "A#:min7": 134, "A#:min": 135, "A#:sus2": 136, "A#:aug": 137, "A#:dim7": 138, "A#:maj6": 139, "A#:hdim7": 140, "A#:7": 141, "A#:min6": 142, "A#:maj7": 143, "B": 144, "B:dim": 145, "B:sus4": 146, "B:min7": 147, "B:min": 148, "B:sus2": 149, "B:aug": 150, "B:dim7": 151, "B:maj6": 152, "B:hdim7": 153, "B:7": 154, "B:min6": 155, "B:maj7": 156} \ No newline at end of file diff --git a/dataset/vevo_meta/chord_attr.json b/dataset/vevo_meta/chord_attr.json new file mode 100644 index 0000000000000000000000000000000000000000..f55cf806aa5f034812a38385f3c6cff21f8e617d --- /dev/null +++ b/dataset/vevo_meta/chord_attr.json @@ -0,0 +1 @@ +{"N": 0, "maj": 1, "dim": 2, "sus4": 3, "min7": 4, "min": 5, "sus2": 6, "aug": 7, "dim7": 8, "maj6": 9, "hdim7": 10, "7": 11, "min6": 12, "maj7": 13} diff --git a/dataset/vevo_meta/chord_attr_inv.json b/dataset/vevo_meta/chord_attr_inv.json new file mode 100644 index 0000000000000000000000000000000000000000..1f355aaa2c26e0c141f64057f17a054a608d4d32 --- /dev/null +++ b/dataset/vevo_meta/chord_attr_inv.json @@ -0,0 +1,16 @@ +{ + "0": "N", + "1": "maj", + "2": "dim", + "3": "sus4", + "4": "min7", + "5": "min", + "6": "sus2", + "7": "aug", + "8": "dim7", + "9": "maj6", + "10": "hdim7", + "11": "7", + "12": "min6", + "13": "maj7" +} diff --git a/dataset/vevo_meta/chord_inv.json b/dataset/vevo_meta/chord_inv.json new file mode 100644 index 0000000000000000000000000000000000000000..b3dc21d8972ff10457557821481a94b16cdf5936 --- /dev/null +++ b/dataset/vevo_meta/chord_inv.json @@ -0,0 +1 @@ +{"0": "N", "1": "C", "2": "C:dim", "3": "C:sus4", "4": "C:min7", "5": "C:min", "6": "C:sus2", "7": "C:aug", "8": "C:dim7", "9": "C:maj6", "10": "C:hdim7", "11": "C:7", "12": "C:min6", "13": "C:maj7", "14": "C#", "15": "C#:dim", "16": "C#:sus4", "17": "C#:min7", "18": "C#:min", "19": "C#:sus2", "20": "C#:aug", "21": "C#:dim7", "22": "C#:maj6", "23": "C#:hdim7", "24": "C#:7", "25": "C#:min6", "26": "C#:maj7", "27": "D", "28": "D:dim", "29": "D:sus4", "30": "D:min7", "31": "D:min", "32": "D:sus2", "33": "D:aug", "34": "D:dim7", "35": "D:maj6", "36": "D:hdim7", "37": "D:7", "38": "D:min6", "39": "D:maj7", "40": "D#", "41": "D#:dim", "42": "D#:sus4", "43": "D#:min7", "44": "D#:min", "45": "D#:sus2", "46": "D#:aug", "47": "D#:dim7", "48": "D#:maj6", "49": "D#:hdim7", "50": "D#:7", "51": "D#:min6", "52": "D#:maj7", "53": "E", "54": "E:dim", "55": "E:sus4", "56": "E:min7", "57": "E:min", "58": "E:sus2", "59": "E:aug", "60": "E:dim7", "61": "E:maj6", "62": "E:hdim7", "63": "E:7", "64": "E:min6", "65": "E:maj7", "66": "F", "67": "F:dim", "68": "F:sus4", "69": "F:min7", "70": "F:min", "71": "F:sus2", "72": "F:aug", "73": "F:dim7", "74": "F:maj6", "75": "F:hdim7", "76": "F:7", "77": "F:min6", "78": "F:maj7", "79": "F#", "80": "F#:dim", "81": "F#:sus4", "82": "F#:min7", "83": "F#:min", "84": "F#:sus2", "85": "F#:aug", "86": "F#:dim7", "87": "F#:maj6", "88": "F#:hdim7", "89": "F#:7", "90": "F#:min6", "91": "F#:maj7", "92": "G", "93": "G:dim", "94": "G:sus4", "95": "G:min7", "96": "G:min", "97": "G:sus2", "98": "G:aug", "99": "G:dim7", "100": "G:maj6", "101": "G:hdim7", "102": "G:7", "103": "G:min6", "104": "G:maj7", "105": "G#", "106": "G#:dim", "107": "G#:sus4", "108": "G#:min7", "109": "G#:min", "110": "G#:sus2", "111": "G#:aug", "112": "G#:dim7", "113": "G#:maj6", "114": "G#:hdim7", "115": "G#:7", "116": "G#:min6", "117": "G#:maj7", "118": "A", "119": "A:dim", "120": "A:sus4", "121": "A:min7", "122": "A:min", "123": "A:sus2", "124": "A:aug", "125": "A:dim7", "126": "A:maj6", "127": "A:hdim7", "128": "A:7", "129": "A:min6", "130": "A:maj7", "131": "A#", "132": "A#:dim", "133": "A#:sus4", "134": "A#:min7", "135": "A#:min", "136": "A#:sus2", "137": "A#:aug", "138": "A#:dim7", "139": "A#:maj6", "140": "A#:hdim7", "141": "A#:7", "142": "A#:min6", "143": "A#:maj7", "144": "B", "145": "B:dim", "146": "B:sus4", "147": "B:min7", "148": "B:min", "149": "B:sus2", "150": "B:aug", "151": "B:dim7", "152": "B:maj6", "153": "B:hdim7", "154": "B:7", "155": "B:min6", "156": "B:maj7"} \ No newline at end of file diff --git a/dataset/vevo_meta/chord_root.json b/dataset/vevo_meta/chord_root.json new file mode 100644 index 0000000000000000000000000000000000000000..f80daf81c00ab965bbb09b5a3424bf828b3be1f7 --- /dev/null +++ b/dataset/vevo_meta/chord_root.json @@ -0,0 +1 @@ +{"N": 0, "C": 1, "C#": 2, "D": 3, "D#": 4, "E": 5, "F": 6, "F#": 7, "G": 8, "G#": 9, "A": 10, "A#": 11, "B": 12} \ No newline at end of file diff --git a/dataset/vevo_meta/chord_root_inv.json b/dataset/vevo_meta/chord_root_inv.json new file mode 100644 index 0000000000000000000000000000000000000000..9febc158c8b1aba9899c06b4aec88b4a7e7b6543 --- /dev/null +++ b/dataset/vevo_meta/chord_root_inv.json @@ -0,0 +1,15 @@ +{ + "0": "N", + "1": "C", + "2": "C#", + "3": "D", + "4": "D#", + "5": "E", + "6": "F", + "7": "F#", + "8": "G", + "9": "G#", + "10": "A", + "11": "A#", + "12": "B" +} \ No newline at end of file diff --git a/dataset/vevo_meta/exclude.txt b/dataset/vevo_meta/exclude.txt new file mode 100644 index 0000000000000000000000000000000000000000..13ef0a79b8787de65a0c008583ab7dd8a4a47fd0 --- /dev/null +++ b/dataset/vevo_meta/exclude.txt @@ -0,0 +1 @@ +453 \ No newline at end of file diff --git a/dataset/vevo_meta/idlist.txt b/dataset/vevo_meta/idlist.txt new file mode 100644 index 0000000000000000000000000000000000000000..620ed37e15a6984c0284408eaa45da4b35b8ae38 --- /dev/null +++ b/dataset/vevo_meta/idlist.txt @@ -0,0 +1,748 @@ +001-Luis Fonsi - Despacito ft. Daddy Yankee kJQP7kiw5Fk +002-Mark Ronson - Uptown Funk (Official Video) ft. Bruno Mars OPf0YbXqDm0 +003-Maroon 5 - Sugar (Official Music Video) 09R8_2nJtjg +004-Justin Bieber - Sorry (PURPOSE - The Movement) fRh_vgS2dFE +005-Katy Perry - Roar (Official) CevxZvSJLk8 +006-OneRepublic - Counting Stars (Official Music Video) hT_nvWreIhg +007-Katy Perry - Dark Horse (Official) ft. Juicy J 0KSOMA3QBU0 +008-Crazy Frog - Axel F (Official Video) k85mRPqvMbE +009-Enrique Iglesias - Bailando ft. Descemer Bueno, Gente De Zona (Español) NUsoVlDFqZg +010-Taylor Swift - Shake It Off nfWlot6h_JM +011-J Balvin, Willy William - Mi Gente (Official Video) wnJ6LuUFpMo +012-Shakira - Waka Waka (This Time for Africa) (The Official 2010 FIFA World Cup™ Song) pRpeEdMmmQ0 +013-Adele - Hello YQHsXMglC9A +014-Taylor Swift - Blank Space e-ORhEE9VVg +016-Shakira - Chantaje (Official Video) ft. Maluma 6Mgqbai3fKo +017-Justin Bieber - Baby (Official Music Video) ft. Ludacris kffacxfA7G4 +018-Calvin Harris - This Is What You Came For (Official Video) ft. Rihanna kOkQ4T5WO9E +019-Fifth Harmony - Work from Home (Official Video) ft. Ty Dolla $ign 5GL9JoH4Sws +020-Meghan Trainor - All About That Bass 7PCkvCPvDXk +021-Sia - Chandelier (Official Video) 2vjPBrBU-TM +022-Eminem - Love The Way You Lie ft. Rihanna uelHwf8o7_U +023-Ellie Goulding - Love Me Like You Do (Official Video) AJtDXIazrMo +024-Shawn Mendes - Treat You Better lY2yjAdbvdQ +025-Justin Bieber - What Do You Mean (Official Music Video) DK_0jXPuIr0 +026-MAGIC! - Rude (Official Video) PIh2xe4jnpk +027-Luis Fonsi, Demi Lovato - Échame La Culpa (Video Oficial) TyHvyGVs42U +028-Avicii - Wake Me Up (Official Video) IcrbM1l_BoI +029-LMFAO ft. Lauren Bennett, GoonRock - Party Rock Anthem (Official Video) KQ6zr6kCPj8 +030-Imagine Dragons - Believer 7wtfhZwyrcc +031-Becky G, Bad Bunny - Mayores (Official Video) GMFewiplIbw +032-John Legend - All of Me (Official Video) 450p7goxZqg +033-Fifth Harmony - Worth It (Official Video) ft. Kid Ink YBHQbu5rbdQ +035-The Weeknd - Starboy ft. Daft Punk (Official Video) 34Na4j8AVgA +036-Ariana Grande ft. Nicki Minaj - Side To Side (Official Video) ft. Nicki Minaj SXiSVQZLje8 +037-Adele - Rolling in the Deep (Official Music Video) rYEDA3JcQqw +038-Rihanna - Diamonds lWA2pjMjpBs +039-Jennifer Lopez - On The Floor ft. Pitbull t4H_Zoh7G5A +041-Silentó - Watch Me (Whip_Nae Nae) (Official) vjW8wmF5VWc +042-Romeo Santos - Propuesta Indecente (Official Video) QFs3PIZb3js +043-J. Balvin - Ay Vamos (Official Video) TapXs54Ah3E +044-Adele - Someone Like You (Official Music Video) hLQl3WQQoQ0 +045-Drake - Hotline Bling uxpDa-c-4Mc +046-Guns N' Roses - November Rain 8SbUC-UaAxE +047-ZAYN - Dusk Till Dawn (Official Video) ft. Sia tt2k8PGm-TI +048-The Chainsmokers - Don't Let Me Down (Official Video) ft. Daya Io0fBr1XBUA +049-The Weeknd - The Hills (Official Video) yzTuBuRdAyA +050-Imagine Dragons - Thunder fKopy74weus +051-Jessie J, Ariana Grande, Nicki Minaj - Bang Bang (Official Video) 0HDdjwpPM3Y +052-Ricky Martin - Vente Pa' Ca (Official Video) ft. Maluma iOe6dI2JhgU +054-CNCO - Reggaetón Lento (Bailemos) 7jpqqBX-Myw +055-Chino y Nacho - Andas En Mi Cabeza ft. Daddy Yankee (Video Oficial) AMTAQ-AJS4Y +056-Justin Bieber - Love Yourself (Official Music Video) oyEuk8j8imI +057-DJ Khaled - I'm The One ft. Justin Bieber, Quavo, Chance the Rapper, Lil Wayne weeI1G46q0o +058-Eminem - Not Afraid (Official Video) j5-yKhDd64s +059-Calvin Harris - Summer (Official Video) ebXbLfLACGM +060-CAN'T STOP THE FEELING! (from DreamWorks Animation's 'TROLLS') (Official Video) ru0K8uYEZWw +061-Lady Gaga - Bad Romance (Official Music Video) qrO4YZeyl0I +062-Carlos Vives, Sebastián Yatra - Robarte un Beso (Official Video) Mtau4v6foHA +063-Ellie Goulding - Burn (Official Video) CGyEd0aKWZE +064-Calvin Harris & Disciples - How Deep Is Your Love EgqUJOudrcM +065-Carlos Vives, Shakira - La Bicicleta -UV0QGLmYys +066-Taylor Swift - Bad Blood ft. Kendrick Lamar QcIy9NiNbmo +067-Mike Posner - I Took A Pill In Ibiza (Seeb Remix) (Explicit) foE1mO2yM04 +068-Sam Smith - I'm Not The Only One (Official Video) nCkpzqqog4k +069-Rag'n'Bone Man - Human (Official Video) L3wKzyIN1yk +070-Carly Rae Jepsen - Call Me Maybe fWNaR-rxAic +071-Shawn Mendes - Stitches (Official Video) VbfpW0pbvaU +072-Wisin - Escápate Conmigo (Official Video) ft. Ozuna 3X9wEwulYhk +073-Post Malone - Congratulations ft. Quavo SC4xMk98Pdc +074-Nirvana - Smells Like Teen Spirit (Official Music Video) hTWKbfoikeg +075-Gente de Zona - La Gozadera (Official Video) ft. Marc Anthony VMp55KH_3wo +076-Katy Perry - Last Friday Night (T.G.I.F.) (Official Music Video) KlyXNRrsk4A +077-P!nk - Just Give Me A Reason ft. Nate Ruess OpQFFLBMEPI +078-Katy Perry - Firework (Official Music Video) QGJuMBdaqIw +079-Imagine Dragons - Radioactive ktvTqknDobU +080-Pitbull - Timber (Official Video) ft. Ke$ha hHUbLv4ThOo +081-French Montana - Unforgettable ft. Swae Lee CTFtOOh47oo +082-50 Cent - In Da Club (Official Music Video) 5qm8PH4xAss +083-Guns N' Roses - Sweet Child O' Mine (Official Music Video) 1w7OgIMMRc4 +084-One Direction - What Makes You Beautiful (Official Video) QJO3ROT-A4E +085-Ariana Grande ft. Iggy Azalea - Problem (Official Video) iS1g8G_njx8 +086-Sam Smith - Too Good At Goodbyes (Official Video) J_ub7Etch2U +087-AronChupa - I'm an Albatraoz _ OFFICIAL VIDEO Bznxx12Ptl0 +088-Taylor Swift - Look What You Made Me Do 3tmd-ClpJxA +089-Chris Jedi - Ahora Dice (Official Video) ft. J. Balvin, Ozuna, Arcángel c73Cu3TQnlg +090-Joey Montana - Picky RqpKDkVzlqU +091-Eminem - Without Me (Official Music Video) YVkUvmDQ3HY +092-Prince Royce - Darte un Beso bdOXnTbyk0g +093-Taylor Swift - You Belong With Me VuNIsY6JdUw +094-Eminem - Rap God (Explicit) XbGs_qK2PQA +095-Don Omar - Danza Kuduro ft. Lucenzo 7zp1TbLFPp8 +096-Maluma - El Perdedor (Official Video) PJniSb91tvo +097-Rihanna - Work (Explicit) ft. Drake HL1UzIK-flA +098-Ricky Martin - La Mordidita (Official Video) ft. Yotuel lBztnahrOFw +099-Beyoncé - Halo bnVUHWCynig +100-The Weeknd - Can't Feel My Face (Official Video) KEI4qSrkPAs +101-Shakira - La La La (Brazil 2014) ft. Carlinhos Brown 7-7knsP2n5w +102-Sia - Elastic Heart feat. Shia LaBeouf & Maddie Ziegler (Official Video) KWZGAExj-es +103-Katy Perry - Bon Appétit (Official) ft. Migos dPI-mRFEIH0 +104-The Cranberries - Zombie (Official Music Video) 6Ejga4kJUts +105-Shakira - Can't Remember to Forget You (Official Video) ft. Rihanna o3mP3mJDL2k +106-Daddy Yankee - Limbo (Video Oficial) 6BTjG-dhf5s +107-Whitney Houston - I Will Always Love You (Official 4K Video) 3JWTaaS7LdU +108-Miley Cyrus - Wrecking Ball (Official Video) My2FRPA3Gf8 +109-Chris Brown - Loyal (Official Video) ft. Lil Wayne, Tyga JXRN_LkCa_o +110-Pitbull - Rain Over Me ft. Marc Anthony SmM0653YvXU +111-Enrique Iglesias - El Perdedor (Pop) ft. Marco Antonio Solís tLcfAnN2QgY +112-J Balvin - 6 AM ft. Farruko (Official Video) yUV9JwiQLog +113-System Of A Down - Chop Suey! (Official HD Video) CSvFpBOe8eY +114-Naughty Boy - La la la ft. Sam Smith (Official Video) 3O1_3zBUKM8 +115-Rick Astley - Never Gonna Give You Up (Official Music Video) dQw4w9WgXcQ +116-Ariana Grande - Break Free ft. Zedd L8eRzOYhLuw +117-Sam Smith - Stay With Me (Official Video) pB-5XG-DbAA +118-Michael Jackson - Billie Jean (Official Video) Zi_XLOBDo_Y +119-Nelly - Dilemma (Official Music Video) ft. Kelly Rowland 8WYHDfJDPDc +120-ZAYN - PILLOWTALK (Official Music Video) C_3d6GntKbk +121-DJ Snake, Lil Jon - Turn Down for What HMUDVMiITOU +122-Katy Perry - Hot N Cold (Official) kTHNpusq654 +123-Iggy Azalea - Fancy ft. Charli XCX (Official Music Video) O-zpOMYRi0w +124-Bon Jovi - It's My Life (Official Music Video) vx2u5uUu3DE +125-Chino & Nacho - Me Voy Enamorando ft. Farruko (Remix) (Official Music Video) 0yr75-gxVtM +126-Marc Anthony - Vivir Mi Vida (Official Video) YXnjy5YlDwk +127-Justin Bieber - Never Say Never (Official Music Video) ft. Jaden Smith _Z5-P9v3F8w +128-Shawn Mendes - There's Nothing Holdin' Me Back dT2owtxkU8k +129-Enrique Iglesias - DUELE EL CORAZON ft. Wisin xFutjZEBTXs +130-DJ Khaled - Wild Thoughts (Official Video) ft. Rihanna, Bryson Tiller fyaI4-5849w +131-Maluma - Sin Contrato (Official Video) 9xByMBYDRmY +132-Nicki Minaj - Anaconda LDZX4ooRsWs +133-Maluma - Borro Cassette (Official Video) Xk0wdDTTPA0 +134-AC_DC - Thunderstruck (Official Video) v2AC41dglnM +135-Romeo Santos - Eres Mía 8iPcqtHoR3U +136-Backstreet Boys - I Want It That Way (Official HD Video) 4fndeDfaWCg +137-Shakira - Hips Don't Lie (Official 4K Video) ft. Wyclef Jean DUT5rEU6pqM +138-Camila Cabello - Havana ft. Young Thug BQ0mxQXmLsk +139-Rihanna - We Found Love ft. Calvin Harris tg00YEETFzg +140-J Balvin - Safari ft. Pharrell Williams, BIA, Sky (Official Video) JWESLtAKKlU +141-Rihanna - Stay ft. Mikky Ekko JF8BRvqGCNs +142-Maluma - Cuatro Babys (Official Video) ft. Trap Capos, Noriel, Bryant Myers, Juhn OXq-JP8w5H4 +143-Cyndi Lauper - Girls Just Want To Have Fun (Official Video) PIb6AZdTr-A +144-Evanescence - Bring Me To Life (Official Music Video) 3YxaaGgTQYM +145-Justin Bieber - Beauty And A Beat ft. Nicki Minaj (Official Music Video) Ys7-6_t7OEQ +146-One Direction - Drag Me Down (Official Video) Jwgf3wmiA04 +147-Auli'i Cravalho - How Far I'll Go (from Moana_Official Video) cPAbx5kgCJo +148-Aqua - Barbie Girl (Official Music Video) ZyhrYis509A +149-Dr. Dre ft. Snoop Dogg - Still D.R.E. (Official Video) _CL6n0FJZpk +150-Justin Timberlake - Mirrors (Official Video) uuZE_IRwLNI +151-Katy Perry - Wide Awake (Official Video) k0BWlvnBmIE +152-J Balvin - Si Tu Novio Te Deja Sola ft. Bad Bunny (Official Video) Km4BayZykwE +153-Maroon 5 - One More Night (Official Music Video) fwK7ggA3-bU +154-Imagine Dragons - Demons (Official Video) mWRsgZuwf_8 +155-Ariana Grande - Focus lf_wVfwpfp8 +156-Europe - The Final Countdown (Official Video) 9jK-NcRmVcw +157-Lady Gaga - Poker Face (Official Music Video) bESGLojNYSo +158-Post Malone - rockstar ft. 21 Savage UceaB4D0jpo +159-Ayo & Teo - Rolex (Official Video) lwk5OUII9Vc +160-Thalia - Desde Esa Noche (Premio Lo Nuestro 2016) ft. Maluma 6C_s56iscpQ +161-Cali Y El Dandee - Por Fin Te Encontré ft. Juan Magan, Sebastian Yatra (Video Oficiel) _kxz7WX4mLU +162-One Direction - Story of My Life W-TE_Ys4iwM +163-Miley Cyrus - We Can't Stop (Official Video) LrUvu1mlWco +164-Mike WiLL Made-It - 23 ft. Miley Cyrus, Wiz Khalifa, Juicy J (Official Music Video) bbEoRnaOIbs +165-Scorpions - Wind Of Change (Official Music Video) n4RjJKxsamQ +166-Nicki Minaj - Super Bass 4JipHEz53sU +167-Karol G, Bad Bunny - Ahora Me Llama (Official Video) 4NNRy_Wz16k +168-Tove Lo - Habits (Stay High) - Hippie Sabotage Remix SYM-RJwSGQ8 +169-Harry Styles - Sign of the Times (Official Video) qN4ooNx77u0 +170-The Police - Every Breath You Take (Official Video) OMOGaugKpzs +171-Avicii - Waiting For Love cHHLHGNpCSA +172-Ariana Grande - Into You (Official Video) 1ekZEVeXwek +173-will.i.am - Scream & Shout ft. Britney Spears (Official Music Video) kYtGl1dX5qI +174-Rihanna - What's My Name (Official Music Video) ft. Drake U0CGsw6h60k +175-Katy Perry - Part Of Me (Official) uuwfgXD8qV8 +176-Pitbull - Give Me Everything ft. Ne-Yo, Afrojack, Nayer EPo5wWmKEaI +177-Audioslave - Like a Stone (Official Video) 7QU1nvuxaMA +178-HA-ASH - Perdón, Perdón (Primera Fila - Hecho Realidad [En Vivo]) _wL3Pc-EmjA +179-Katy Perry - The One That Got Away (Official Music Video) Ahha3Cqe_fk +180-Nacho, Yandel, Bad Bunny - Báilame (Remix) T7VewKI44rQ +181-Sean Kingston - Beautiful Girls MrTz5xjmso4 +182-LMFAO - Sexy and I Know It (Official Video) wyx6JDQCslE +183-Eminem - When I'm Gone (Official Music Video) 1wYNFfgrXTI +184-Michael Jackson - They Don’t Care About Us (Brazil Version) (Official Video) QNJL6nfu__Q +185-Lorde - Royals (US Version) nlcIKh6sBtc +186-R. City - Locked Away ft. Adam Levine 6GUm5g8SG4o +187-John Newman - Love Me Again CfihYWRWRTQ +188-No Doubt - Don't Speak (Official 4K Music Video) TR3Vdo5etCQ +189-Bon Jovi - Livin' On A Prayer (Official Music Video) lDK9QqIzhwk +190-Stromae - Papaoutai oiKj0Z_Xnjc +191-Rae Sremmurd - Black Beatles ft. Gucci Mane (Official Video) b8m9zhNAgKs +192-Little Mix - Black Magic (Official Video) MkElfR_NPBI +193-Shakira - Perro Fiel (Official Video) ft. Nicky Jam SHq2qrFUlGY +194-Wisin - Adrenalina (Official Video) ft. Jennifer Lopez, Ricky Martin ME2Hufquz0k +195-Beyoncé - Single Ladies (Put a Ring on It) (Video Version) 4m1EFMoRFvY +196-Bonnie Tyler - Total Eclipse of the Heart (Video) lcOxhH8N3Bo +197-Rihanna - Only Girl (In The World) (Official Music Video) pa14VNsdSYM +198-Miley Cyrus - Party In The U.S.A. (Official Video) M11SvDtPBhA +199-Rae Sremmurd - No Type (Official Video) wzMrK-aGCug +200-J. Balvin - Ginza (Official Video) zZjSX01P5dE +201-Justin Bieber - Boyfriend (Official Music Video) 4GuqB1BQVr4 +202-Akon - Smack That (Official Music Video) ft. Eminem bKDdT_nyP54 +203-Rihanna - Man Down sEhy-RXkNo0 +204-Indila - Dernière Danse (Clip Officiel) K5KAc5CoCuk +205-Hoobastank - The Reason (Official Music Video) fV4DiAyExN0 +206-Kendrick Lamar - HUMBLE. tvTRZJ-4EyI +207-Foster The People - Pumped Up Kicks (Official Video) SDTZ7iX4vTQ +208-Khalid - Young Dumb & Broke (Official Video) IPfJnp1guPc +209-Michael Jackson - Thriller (Official Video) sOnqjkJTMaA +210-Pitbull - International Love (Official Video) ft. Chris Brown CdXesX6mYUE +211-Calvin Harris - I Need Your Love (Official Video) ft. Ellie Goulding AtKZKl7Bgu0 +212-Eminem ft. Rihanna - The Monster (Explicit) [Official Video] EHkozMIXZ8w +213-Evanescence - My Immortal (Official Music Video) 5anLPw0Efmo +214-Swedish House Mafia ft. John Martin - Don't You Worry Child (Official Video) 1y6smkh6c-0 +215-George Michael - Careless Whisper (Official Video) izGwDsrQ1eQ +216-Jennifer Lopez - Ain't Your Mama (Official Video) Pgmx7z49OEk +217-Shakira - Me Enamoré (Official Video) sPTn0QEhxds +218-We Are One (Ole Ola) [The Official 2014 FIFA World Cup Song] (Olodum Mix) TGtWWb9emYI +219-AC_DC - Back In Black (Official Video) pAgnJDJN4VA +220-Avicii - The Nights UtF6Jej8yb4 +221-La Adictiva Banda San José de Mesillas - Después de Ti, ¿Quién (Video Oficial) YWu9mB6X9Oc +222-Kygo - Firestone ft. Conrad Sewell (Official Video) 9Sc-ir2UwGU +223-Taylor Swift - Wildest Dreams IdneKLhsWOQ +224-Bon Jovi - Always (Official Music Video) 9BMwcO6_hyA +225-Maroon 5 - Animals (Official Music Video) qpgTC9MDx1o +226-Farruko - Chillax ft. Ky-Mani Marley (Official Video) 7fEQmJ98x_Y +227-Michael Jackson - Beat It (Official Video) oRdxUFDoQe0 +228-Bobby Shmurda - Hot N_gga (Official Music Video) vJwKKKd2ZYE +229-Adele - Send My Love (To Your New Lover) fk4BbF7B29w +230-Robin Thicke - Blurred Lines ft. T.I., Pharrell (Official Music Video) yyDUC1LUXSU +231-Calvin Harris - Blame ft. John Newman 6ACl8s_tBzE +232-Jessie J - Price Tag ft. B.o.B qMxX-QOV9tI +233-Katy Perry - This Is How We Do (Official) 7RMQksXpQSk +234-Don Omar - Taboo lRWqYR3e7xE +235-Romeo Santos - Yo También (Official Video) ft. Marc Anthony QBaIMZ8QjcU +236-Alvaro Soler - Sofia (Official Music Video) qaZ0oAh4evU +237-Rihanna - Umbrella (Orange Version) (Official Music Video) ft. JAY-Z CvBfHwUxHIk +238-Farruko, Bad Bunny, Rvssian - Krippy Kush (Official Video) j1_JW7An2l0 +239-Selena Gomez - The Heart Wants What It Wants (Official Video) ij_0p_6qTss +240-Enrique Iglesias, Juan Luis Guerra - Cuando Me Enamoro (Official Music Video) 4DO8GsIYfhQ +241-Zara Larsson - Lush Life tD4HCZe-tew +242-The Verve - Bitter Sweet Symphony (Official Music Video) 1lyu1KKwC74 +243-The Black Eyed Peas - Where Is The Love (Official Music Video) WpYeekQkAdc +244-One Direction - Best Song Ever o_v9MY_FMcw +245-Maroon 5 - Moves Like Jagger ft. Christina Aguilera (Official Music Video) iEPTlhBmwRg +246-Sia - The Greatest GKSRyLdjsPA +247-Akon - Lonely (Official Music Video) 6EEW-9NDM5k +248-Ariana Grande, The Weeknd - Love Me Harder (Official Video) g5qU7p7yOY8 +249-50 Cent - Candy Shop (Official Music Video) ft. Olivia SRcnnId15BA +250-Selena Gomez - Come & Get It n-D1EB74Ckg +251-Meghan Trainor - Like I'm Gonna Lose You (Official Video) ft. John Legend 2-MBfn8XjIU +252-Jonas Blue - Mama ft. William Singe (Official Video) qPTfXwPf_HM +253-One Direction - One Thing Y1xs_xPb46M +254-Mariah Carey - All I Want For Christmas Is You (Official Video) yXQViqx6GMY +255-Jonas Blue - Perfect Strangers ft. JP Cooper (Official Video) Ey_hgKCCYU4 +256-Maroon 5 - Payphone ft. Wiz Khalifa (Explicit) (Official Music Video) KRaWnd3LJfs +257-Simone & Simaria - Loka (Ao Vivo) ft. Anitta UrT0zCmsN8c +258-Future - Low Life (Official Music Video) ft. The Weeknd K_9tX4eHztY +259-Silvestre Dangond, Nicky Jam - Cásate Conmigo (Official Video) cpN78ZjnCZY +261-One Direction - Live While We're Young AbPED9bisSc +262-Lil Wayne - Mirror ft. Bruno Mars (Official Music Video) OZLUa8JUR18 +263-Katy Perry - Chained To The Rhythm (Official) ft. Skip Marley Um7pMggPnug +264-Justin Bieber - One Time (Official Music Video) CHVhwcOg6y8 +265-Dillon Francis, DJ Snake - Get Low (Official Music Video) 12CeaxLiMgE +266-The Weeknd - Earned It (from Fifty Shades Of Grey) (Official Video - Explicit) waU75jdUnYw +267-Taylor Swift - Style -CmadmM5cOk +268-Adele - Set Fire To The Rain (Live at The Royal Albert Hall) Ri7-vnrJD3k +269-Wham! - Last Christmas (Official Video) E8gmARGvPlI +270-3 Doors Down - Here Without You (Official Music Video) kPBzTxZQG5Q +271-Shakira - Try Everything (Official Video) c6rP-YP4c5I +272-Guns N' Roses - Paradise City Rbm6GXllBiw +273-MC Hammer - U Can't Touch This (Official Music Video) otCpCn0l4Wo +274-Taylor Swift - We Are Never Ever Getting Back Together WA4iX5D9Z64 +275-The Black Eyed Peas - Pump It (Official Music Video) ZaI2IlHwmgQ +276-Sia - Cheap Thrills (Performance Edit) 31crA53Dgu0 +277-Nelly Furtado - Say It Right (Official Music Video) 6JnGBs88sL0 +278-Britney Spears - ...Baby One More Time (Official Video) C-u5WLJ9Yk4 +279-Banda Los Recoditos - Mi Último Deseo (Video Oficial) cVlAmP-KDT4 +280-Jessie J - Flashlight (from Pitch Perfect 2) (Official Video) DzwkcbTQ7ZE +282-Demi Lovato - Heart Attack (Official Video) AByfaYcOm4A +283-Meghan Trainor - Me Too qDRORgoZxZU +284-Guns N' Roses - Don't Cry zRIbf6JqkNc +285-Bastille - Pompeii (Official Music Video) F90Cw4l-8NY +286-Akon - Right Now (Na Na Na) (Official Video) vIaH35-MLsk +287-Katy Perry - Swish Swish (Official) ft. Nicki Minaj iGk5fR-t5AU +288-Shakira - La Tortura (Official HD Video) ft. Alejandro Sanz Dsp_8Lm1eSk +289-ZAYN, Taylor Swift - I Don’t Wanna Live Forever (Fifty Shades Darker) 7F37r50VUTQ +290-Ariana Grande - Dangerous Woman 9WbCfHutDSE +291-Pitbull - Feel This Moment (Official Video) ft. Christina Aguilera 5jlI4uzZGjU +292-Selena Gomez & The Scene - Love You Like A Love Song EgT_us6AsDg +293-Wisin, Carlos Vives - Nota de Amor (Official Video) ft. Daddy Yankee wZRWpr1G1Qw +294-Beyoncé - Drunk in Love (Explicit) ft. JAY Z p1JPKLa-Ofc +295-Romeo Santos - La Diabla_Mi Santa ft. Tomatito Hz9lhqxl_gQ +296-Maroon 5 - She Will Be Loved (Official Music Video) nIjVuRTm-dc +297-The Black Eyed Peas - My Humps iEe_eraFWWs +298-Duke Dumont - Ocean Drive (Official Music Video) KDxJlW6cxRk +299-Iggy Azalea - Black Widow ft. Rita Ora (Official Music Video) u3u22OYqFGo +300-Justin Bieber - Company (Official Music Video) gdx7gN1UyX0 +301-Anna Kendrick - Cups (Pitch Perfect’s “When I’m Gone”) [Official Video] cmSbXsFE3l8 +302-J Balvin - Bobo (Official Video) 0GvLP2C2w9U +303-Alicia Keys - No One (Official Music Video) rywUS-ohqeE +304-Adele - When We Were Young (Live at The Church Studios) DDWKuo3gXMQ +305-Waka Waka (Esto es Africa) (Cancion Oficial de la Copa Mundial de la FIFA� Sudafrica 2010) dzsuE5ugxf4 +306-Katy Perry - California Gurls (Official Music Video) ft. Snoop Dogg F57P9C4SAW4 +307-The Pussycat Dolls - Buttons (Official Music Video) ft. Snoop Dogg VCLxJd1d84s +308-Taylor Swift - Love Story 8xg3vE8Ie_E +309-Demi Lovato - Let It Go (from 'Frozen') (Official Video) kHue-HaXXzg +311-Calvin Harris - Outside (Official Video) ft. Ellie Goulding J9NQFACZYEU +312-Shakira - Whenever, Wherever (Official HD Video) weRHyjj34ZE +313-Maroon 5 - What Lovers Do ft. SZA (Official Music Video) 5Wiio4KoGe8 +314-Wisin & Yandel - Follow The Leader ft. Jennifer Lopez Xmap94TcDNs +315-Enrique Iglesias - Loco ft. Romeo Santos RSyUWjftHrs +316-Toni Braxton - Un-Break My Heart (Official HD Video) p2Rch6WvPJE +317-Daddy Yankee - Sígueme y Te Sigo (Video Oficial) EfF9EE6ZR5E +318-Ke$ha - TiK ToK (Official HD Video) iP6XpLQM2Cs +319-Katy Perry - E.T. ft. Kanye West (Official Music Video) t5Sd5c4o9UM +320-Meghan Trainor - NO cMTAUr3Nm6I +321-Katy Perry - Unconditionally (Official) XjwZAa2EjKA +322-Taylor Swift - 22 AgFeZr5ptV8 +323-Roxette - It Must Have Been Love (Official Music Video) k2C5TjS2sh4 +324-Capital Cities - Safe And Sound (Official Music Video) 47dtFZ8CFo8 +325-Shakira - Loca (Spanish Version) ft. El Cata XAhTt60W7qo +326-Wisin & Yandel - Algo Me Gusta De Ti ft. Chris Brown, T-Pain 3rgwIp6D3ow +327-Rihanna - Rude Boy (Official Music Video) e82VE8UtW8A +328-Beyoncé - Crazy In Love ft. JAY Z ViwtNLUqkMY +329-Rihanna - Don't Stop The Music yd8jh9QYfEs +330-Meghan Trainor - Dear Future Husband (Official Video) ShlW5plD_40 +331-Eminem - Mockingbird (Official Music Video) S9bCLPwzSC0 +332-A Great Big World, Christina Aguilera - Say Something -2U0Ivkn2Ds +333-Nelly - Just A Dream (Official Music Video) N6O2ncUKvlg +334-Meghan Trainor - Lips Are Movin (Official Music Video) qDc_5zpBj7s +335-Avril Lavigne - Girlfriend Bg59q4puhmg +336-Rihanna - Take A Bow (Official Music Video) J3UjJ4wKLkg +337-The Black Eyed Peas - I Gotta Feeling (Official Music Video) uSD4vsh1zDA +338-Rihanna - Where Have You Been HBxt_v0WF6Y +339-Avicii - Levels _ovdm2yX4MA +340-Eminem - No Love (Explicit Version) ft. Lil Wayne KV2ssT8lzj8 +341-Rachel Platten - Fight Song (Official Video) xo1VInw-SKc +342-LMFAO - Sorry For Party Rocking SkTt9k4Y-a8 +343-Abba - Dancing Queen (Official Music Video Remastered) xFrGuyw1V8s +344-The Black Eyed Peas - The Time (Dirty Bit) (Official Music Video) JwQZQygg3Lk +345-Plan B - Mi Vecinita SB8-YY2DyHI +346-One Direction - You & I _kqQDCxRCzM +347-Jennifer Lopez - Dance Again (Official Video) ft. Pitbull bjgFH01k0gU +348-Britney Spears - Toxic (Official HD Video) LOZuxwVk7TU +349-Chris Brown - With You (Official Video) nmjdaBaZe8Y +351-G-Eazy x Bebe Rexha - Me, Myself & I bSfpSOBD30U +352-Chris Brown - Look at Me Now (Official Video) ft. Lil Wayne, Busta Rhymes 8gyLR4NfMiI +353-Beyoncé - If I Were A Boy AWpsOqh8q0M +354-Beyoncé - Run the World (Girls) (Official Video) VBmMU_iwe6U +355-Rich Gang ft. Young Thug, Rich Homie Quan - Lifestyle (Official Video) nGt_JGHYEO4 +356-One Direction - Kiss You (Official) T4cdfRohhcg +357-Pia Mia - Do It Again ft. Chris Brown, Tyga (Official Music Video) cNw8A5pwbVI +358-HA-ASH - Te Dejo En Libertad (En Vivo) ZxvI1epOAWE +359-Maître Gims - Bella (Clip officiel) rMltoD1jCGI +360-Selena Gomez - Kill Em With Kindness HHP5MKgK0o8 +361-AC_DC - Highway to Hell (Live At River Plate, December 2009) gEPmA3USJdI +362-Imagine Dragons - It's Time sENM2wA_FTg +363-Lana Del Rey - Born To Die (Official Music Video) Bag1gUxuU0g +364-Sean Kingston, Justin Bieber - Eenie Meenie (Video Version) prmmCg5bKxA +365-Alejandro Fernández - Hoy Tengo Ganas De Ti ft. Christina Aguilera (Video Oficial) Z81hsLIY1sQ +366-Prince Royce, Shakira - Deja vu (Official Video) XEvKn-QgAY0 +367-Camila - Mientes (Video) xftFxCYQTdk +368-Dr. Dre ft. Snoop Dogg, Kurupt, Nate Dogg - The Next Episode (Official Video) QZXc39hT8t4 +369-Soulja Boy Tell'em - Crank That (Soulja Boy) (Official Music Video) 8UFIYGkROII +370-The Script - Hall of Fame (Official Video) ft. will.i.am mk48xRzuNvA +371-Zara Larsson, MNEK - Never Forget You GTyN-DB_v5M +372-Mariah Carey - We Belong Together (Official Music Video) 0habxsuXW4g +374-Mr. Probz - Waves (Robin Schulz Remix Radio Edit) pUjE9H8QlA4 +375-P!nk - Try (Official Video) yTCDVfMz15M +376-Justin Bieber - I'll Show You (Official Music Video) PfGaX8G0f2E +377-One Direction - Perfect (Official Video) Ho32Oh6b4jc +378-Selena Gomez - Good For You 1TsVjvEkc4s +379-Demi Lovato - Sorry Not Sorry (Official Video) -MsvER1dpjM +380-Plan B - Fanatica Sensual Official Video QvypZSdjO8M +381-Eminem - Stan (Long Version) ft. Dido gOMhN-hfMtY +382-Nicki Minaj - Only ft. Drake, Lil Wayne, Chris Brown zXtsGAkyeIo +383-Foo Fighters - The Pretender SBjQ9tuuTJQ +384-Taylor Swift - I Knew You Were Trouble vNoKguSdy4Y +385-Drake - Started From The Bottom (Explicit) RubBzkZzpUA +386-Rihanna, Kanye West, Paul McCartney - FourFiveSeconds kt0g4dWxEBo +387-Miley Cyrus - Malibu (Official Video) 8j9zMok6two +388-Lady Antebellum - Need You Now eM213aMKTHg +389-Beyoncé - Love On Top (Official Video) Ob7vObnFUJc +390-Guns N' Roses - Welcome To The Jungle o1tj2zJ2Wvg +391-Tyga - Hookah (Official Music Video) ft. Young Thug b-J95fYuVz4 +392-Justin Bieber - As Long As You Love Me ft. Big Sean (Official Music Video) R4em3LKQCAQ +393-J Balvin - Tranquila (Official Video) HWyEEj2pSt0 +394-One Direction - Night Changes syFZfO_wfMQ +395-Farruko - Passion Whine ft. Sean Paul (Official Video) MNmc_XJp5rI +396-Lady Gaga - Alejandro niqrrmev4mA +397-Justin Bieber - Somebody To Love Remix ft. Usher (Official Music Video) SOI4OF7iIr4 +398-J Balvin - Sigo Extrañándote (Official Video) nZ0zbsZOdwg +399-Avril Lavigne - When You're Gone (Official Video) 0G3_kG5FFfQ +400-Desiigner - Panda (Official Music Video) E5ONTXHS2mM +402-The Calling - Wherever You Will Go (Official Video) iAP9AF6DCu4 +403-Nego do Borel - Você Partiu Meu Coração ft. Anitta, Wesley Safadão (Video Oficial) Xp-dKdSUuLk +404-Louis Tomlinson - Back to You (Official Video) ft. Bebe Rexha, Digital Farm Animals -HjpL-Ns6_A +405-Maroon 5 - Maps (Explicit) (Official Music Video) NmugSMBh_iI +406-The Weeknd - Often (NSFW) (Official Video) JPIhUaONiLU +407-Nicki Minaj - Right By My Side (Explicit) ft. Chris Brown he3DJLXbebI +408-Cali Y El Dandee - Yo Te Esperaré _KSyWS8UgA4 +409-Lana Del Rey - Young and Beautiful (Official Music Video) o_1aF54DO60 +411-The Killers - Mr. Brightside (Official Music Video) gGdGFtwCNBE +412-One Direction - One Way Or Another (Teenage Kicks) 36mCEZzzQ3o +413-Lil Wayne - Lollipop ft. Static (Official Music Video) 2IH8tNQAzSs +415-Sam Smith - Lay Me Down (Official Video) HaMq2nn5ac0 +416-Kungs vs Cookin’ on 3 Burners - This Girl (Official Music Video) 2Y6Nne8RvaA +417-Becky G - Shower 50-_oTkmF5I +418-Jennifer Lopez - Papi (Official Video) 6XbIuSLaCnk +419-Selena Gomez - Same Old Love 9h30Bx4Klxg +420-Justin Bieber - Mistletoe (Official Music Video) LUjn3RpkcKY +421-Dr. Dre ft. Eminem, Skylar Grey - I Need A Doctor (Explicit) [Official Video] VA770wpLX-Q +422-Akon - Don't Matter (Official Music Video) JWA5hJl4Dv0 +423-Kelly Clarkson - Because Of You (VIDEO) Ra-Om7UMSJc +424-DNCE - Cake By The Ocean vWaRiD5ym74 +425-Fifth Harmony - All In My Head (Flex) (Official Video) ft. Fetty Wap jsbeemdD2rQ +426-Timbaland - Apologize ft. OneRepublic ZSM3w1v-A_Y +427-Beyoncé, Shakira - Beautiful Liar QrOe2h9RtWI +428-Demi Lovato - Give Your Heart a Break (Official Video) 1zfzka5VwRc +429-Ariana Grande - The Way ft. Mac Miller _sV0S8qWSy0 +430-Beyoncé - Irreplaceable 2EwViQxSJJQ +431-Logic - 1-800-273-8255 ft. Alessia Cara, Khalid (Official Video) Kb24RrHIbFk +432-50 Cent - 21 Questions (Official Music Video) ft. Nate Dogg cDMhlvbOFaM +433-Enrique Iglesias - Hero (Official Music Video) koJlIGDImiU +434-Miley Cyrus - The Climb NG2zyeVRcbs +435-Owl City - Fireflies (Official Music Video) psuRGfAaju4 +436-Thalia - Equivocada (Live Version) QPeNUfc8hGk +437-Avril Lavigne - Wish You Were Here (Video) VT1-sitWRtY +439-Eminem - Beautiful (Official Music Video) lgT1AidzRWM +440-Christina Aguilera, Lil' Kim, Mya, Pink - Lady Marmalade (Official Music Video) RQa7SvVCdZk +441-Rihanna - Pour It Up (Explicit) ehcVomMexkY +442-Rihanna - Unfaithful (Official Music Video) rp4UwPZfRis +443-J. Balvin, Jowell & Randy - Bonita (Official Video) SqpvOqRieYY +444-Selena Gomez - Hands To Myself FMlcn-_jpWY +445-One Direction - History (Official Video) yjmp8CoZBIo +446-Calvin Harris - My Way (Official Video) b4Bj7Zb-YD4 +447-Nicki Minaj - Starships (Explicit) SeIJmciN8mo +448-Reik - Creo en Ti snFhcHHdzT0 +449-Kings Of Leon - Sex on Fire (Official Video) RF0HhrwIwp0 +450-Justin Bieber - Love Me (Official Music Video) qdDVtFvJwUc +451-The Black Eyed Peas - Boom Boom Pow (Official Music Video) 4m48GqaOz90 +452-Justin Timberlake - Cry Me A River (Official Video) DksSPZTZES0 +453-Lady Gaga - Telephone ft. Beyoncé (Official Music Video) EVBsypHzF3U +454-Eminem - Like Toy Soldiers (Official Video) lexLAjh8fPA +455-Naughty Boy ft. Beyoncé, Arrow Benjamin - Runnin' (Lose It All) [Official Video] eJSik6ejkr0 +456-Lil Wayne - Love Me ft. Drake, Future (Explicit) (Official Music Video) KY44zvhWhp4 +457-Kelly Clarkson - Stronger (What Doesn't Kill You) [Official Video] Xn676-fLq7I +458-Descendants Cast - Rotten to the Core (from Descendants) (Official Video) zGlLe1w3DJM +459-P!nk - So What (Official Video) FJfFZqTlWrQ +460-Timbaland - The Way I Are (Official Music Video) ft. Keri Hilson, D.O.E., Sebastian U5rLz5AZBIA +461-Vanilla Ice - Ice Ice Baby (Official Music Video) rog8ou-ZepE +462-Bryson Tiller - Don't (Explicit Version) d7cVLE4SaN0 +463-Michael Jackson - The Way You Make Me Feel (Official Video) HzZ_urpj4As +464-Machine Gun Kelly, Camila Cabello - Bad Things (Official Music Video) QpbQ4I3Eidg +465-Eminem - You Don't Know (Official Music Video) ft. 50 Cent, Cashis, Lloyd Banks ngH0fkiNo-g +467-Kanye West - Stronger PsO6ZnUZI0g +468-Bloodhound Gang - The Bad Touch (Official Video) xat1GVnl8-k +469-What Goes Around...Comes Around (Official Video) TOrnUquxtwA +470-Reyli Barba - Amor del Bueno (Video) FUinZg5MC5U +471-Owl City & Carly Rae Jepsen - Good Time (Official Video) H7HmzwI67ec +472-Plan B - Candy 9FWgcBfs5A0 +473-The Black Eyed Peas - Meet Me Halfway (Official Music Video) I7HahVwYpwo +474-Lady Gaga - Judas (Official Music Video) wagn8Wrmzuc +475-Justin Bieber - One Less Lonely Girl (Official Music Video) LXUSaVw3Mvk +476-Lady Gaga - Applause (Official Music Video) pco91kroVgQ +477-Rihanna - Rehab (Official Music Video) ft. Justin Timberlake rJYcmq__nDM +478-Ricardo Montaner - La Gloria de Dios (Video Oficial) ft. Evaluna Montaner LRsgqFu5c1o +479-Maître Gims - Est-ce que tu m'aimes (Clip officiel) 6TpyRE_juyA +480-Michael Jackson - Bad (Shortened Version) dsUXAEzaC3Q +481-Beyoncé - Best Thing I Never Had (Video) FHp2KgyQUFk +482-Shawn Mendes, Camila Cabello - I Know What You Did Last Summer (Official Video) ngORmvyvAaI +483-Drake - Take Care ft. Rihanna -zzP29emgpg +484-One Direction - Steal My Girl UpsKGvPjAgw +485-Selena Gomez - Slow Down (Official) Z8eXaXoUJRQ +486-Jennifer Lopez - Booty ft. Iggy Azalea (Official Video) nxtIRArhVD4 +487-Demi Lovato - Cool for the Summer (Official Video) il9nqWw9W3Y +488-Tove Lo - Habits (Stay High) oh2LWWORoiM +489-WALK THE MOON - Shut Up and Dance (Official Video) 6JCLY0Rlx6Q +490-One Direction - Little Things xGPeNN9S0Fg +491-Big Sean - I Don't Fuck With You (Official Music Video) ft. E-40 cZaJYDPY-YQ +492-Enrique Iglesias - No Me Digas Que No (Official Music Video) ft. Wisin & Yandel zyqt2avPkoA +494-Taylor Swift - Everything Has Changed ft. Ed Sheeran w1oM3kQpXRo +495-Britney Spears - Work B_ch (Official Music Video) pt8VYOfr8To +496-Nacho - Bailame a1J44C-PZ3E +497-Axel - Te Voy A Amar KZh60U1PqSE +498-Route 94 - My Love (Official Video) ft. Jess Glynne BS46C2z5lVE +499-Kendji Girac - Andalouse (Clip Officiel) FndmvPkI1Ms +500-Little Mix - Touch (Official Video) gBAfejjUQoA +501-Iggy Azalea - Work (Official Music Video) _zR6ROjoOX0 +502-Wisin & Yandel - Estoy Enamorado whBcmlaSLJM +503-Alicia Keys - Girl on Fire (Official Video) J91ti_MpdHA +504-Avril Lavigne - What The Hell (Official Music Video) tQmEd_UeeIk +505-Zara Larsson - Uncover (Official Music Video) U-PXEe-qeK4 +506-Lady Gaga - Just Dance ft. Colby O'Donis (Official Music Video) ft. Colby O'Donis 2Abk1jAONjw +507-Maluma - La Temperatura (Video) ft. Eli Palacios Tgt6iaSYMEM +508-Akon - Sorry, Blame It On Me (Official Music Video) ynMk2EwRi4Q +509-CNCO, Yandel - Hey DJ (Official Video) X6wQOW9ihDA +510-Selena Gomez & The Scene - Naturally a_YR4dKArgo +511-Eminem - Space Bound (Official Video) JByDbPn6A1o +512-YG - My Nigga ft. Jeezy, Rich Homie Quan (Explicit) (Official Music Video) MSrTnWDTdwI +513-August Alsina - No Love ft. Nicki Minaj nxvm4P0jFKY +514-Farruko - Obsesionado (Official Video) lkN51aqPOzU +515-Rihanna - Hate That I Love You ft. Ne-Yo KMOOr7GEkj8 +516-Madonna - Bitch I'm Madonna ft. Nicki Minaj 7hPMmzKs62w +517-Selena Gomez & The Scene - Who Says BzE1mX4Px0I +518-Ariana Grande - One Last Time (Official) BPgEgaPk62M +519-Calvin Harris - Sweet Nothing (Official Video) ft. Florence Welch 17ozSeGw-fY +520-Maroon 5 - Misery (Official Music Video) 6g6g2mvItp4 +521-Jay-Z & Kanye West - Ni_as In Paris (Explicit) gG_dA32oH44 +523-Beyoncé - Sorry (Video) QxsmWxxouIM +524-The Weeknd - Reminder (Official Video) JZjAg6fK-BQ +525-Pusho - Te Fuiste ft. Ozuna aZOGcaU7q1A +526-Jeremih - Down On Me ft. 50 Cent (Official Music Video) AaXaig_43lU +527-Jordin Sparks, Chris Brown - No Air (Official Video) ft. Chris Brown WBKnpyoFEBo +528-Marc Anthony - Valio La Pena (Salsa Version) Ns9YYSqLxyI +529-Prince Royce - Back It Up (Official Video) ft. Jennifer Lopez, Pitbull 9w9dXWU5nMI +530-Eminem - Cleanin' Out My Closet (Official Music Video) RQ9_TKayu9s +531-Chris Brown - Kiss Kiss ft. T-Pain eNII9PDlFJ0 +532-Avicii vs Nicky Romero - I Could Be The One (Nicktim) bek1y2uiQGA +533-Jessie J - Domino (Official Video) UJtB55MaoD0 +534-Don Omar - Zumba Campaign Video 8HpG0l9cLos +535-Britney Spears - Womanizer (Director's Cut) (Official HD Video) rMqayQ-U74s +536-Demi Lovato - Confident (Official Video) cwLRQn61oUY +537-Usher - DJ Got Us Fallin' In Love (Official Music Video) ft. Pitbull C-dvTjK_07c +538-Beyoncé - Pretty Hurts (Video) LXXQLa-5n5w +539-Akon - I Wanna Love You ft. Snoop Dogg GJzF7H2e3Tw +540-Of Monsters And Men - Little Talks (Official Video) ghb6eDopW8I +541-Enrique Iglesias - I Like It (Official Music Video) X9_n8jakvWU +542-Michael Jackson, Justin Timberlake - Love Never Felt So Good (Official Video) oG08ukJPtR8 +543-Akon - Beautiful (Official Music Video) ft. Colby O'Donis, Kardinal Offishall rSOzN0eihsE +544-Farruko - Sunset (Official Video) ft. Shaggy, Nicky Jam ZBMsSPR9QMg +545-Ace Hood - Bugatti (Official Music Video) (Explicit) ft. Future, Rick Ross djE-BLrdDDc +546-Jennifer Lopez - I'm Into You ft. Lil Wayne IgLcQmlN2Xg +547-Calibre 50 - Contigo oeeNs3KInbc +548-will.i.am - Feelin' Myself ft. Miley Cyrus, Wiz Khalifa, French Montana VRuoR--LdqQ +549-Diddy - Dirty Money - Coming Home ft. Skylar Grey (Official Video) k-ImCpNqbJw +550-Bon Jovi - You Give Love A Bad Name (Official Music Video) KrZHPOeOxQQ +551-Chamillionaire - Ridin' (Official Music Video) ft. Krayzie Bone CtwJvgPJ9xw +552-Zedd - Clarity ft. Foxes (Official Music Video) IxxstCcJlsc +553-Justin Bieber - Confident ft. Chance The Rapper (Official Music Video) 47YClVMlthI +554-Lana Del Rey - Blue Jeans JRWox-i6aAk +555-blink-182 - I Miss You (Official Video) s1tAYmMjLdY +556-Fergie - M.I.L.F. $ (Official Music Video) bsUWK-fixiA +557-Taylor Swift - Mine XPBwXKgDTdE +558-Three Days Grace - I Hate Everything About You (Official Video) d8ekz_CSBVg +559-T.I. - About The Money ft. Young Thug (Official Music Video) etfIdtm-OC8 +560-will.i.am - This Is Love ft. Eva Simons (Official Music Video) 9I9Ar6upx34 +561-Kid Ink - Show Me (Explicit) ft. Chris Brown xKkb13IU_DE +562-will.i.am - #thatPOWER ft. Justin Bieber (Official Music Video) DGIgXP9SvB8 +563-Marc Anthony - Ahora Quien (Salsa Version) toLrTToaN0M +564-Future - Where Ya At ft. Drake lw3Or6eqIpI +565-Taylor Swift - …Ready For It wIft-t-MQuE +566-Young Money - Bed Rock (Official Music Video) Ha80ZaecGkQ +567-Romeo Santos - Promise ft. Usher Y3XyWhrZnqE +568-Chris Brown, Tyga - Ayo (Official Video) zKCrSN9oXgQ +569-Hailee Steinfeld - Love Myself (Official Video) bMpFmHSgC4Q +570-Justin Bieber - Never Let You Go (Official Music Video) 3ExWsVFJlFo +571-Rihanna - California King Bed nhBorPm6JjQ +572-Ways to Be Wicked (from Descendants 2) (Official Video) lX6g_cm2rM4 +573-Leona Lewis - Bleeding Love (US Version - Official Video) Vzo-EL_62fQ +574-Labrinth - Beneath Your Beautiful (Official Video) ft. Emeli Sandé bqIxCtEveG8 +575-Marc Anthony - A Quién Quiero Mentirle (Video) GeApuPcMVeQ +576-Chris Brown - Next To You (Official Music Video) ft. Justin Bieber EEuQU6a90Pc +577-Sia - Big Girls Cry (Official Video) 4NhKWZpkw1Q +578-Shakira - She Wolf (Official HD Video) booKP974B0k +579-Jay Sean - Down ft. Lil Wayne (Official Music Video) oUbpGmR1-QM +581-Avicii - Addicted To You Qc9c12q3mrc +582-Pitbull - Hotel Room Service (Official Video) 2up_Eq6r6Ko +583-Lady Gaga - Paparazzi (Official Music Video) d2smz_1L2_0 +584-Chris Brown - Yeah 3x 3mC2ixOAivA +585-Nick Jonas - Close ft. Tove Lo XgJFqVvb2Ws +586-Hailee Steinfeld, Grey - Starving ft. Zedd (Official Video) xwjwCFZpdns +587-Ne-Yo - One In A Million (Official Music Video) 6tpl9LtkRRw +588-Lady Gaga - Born This Way (Official Music Video) wV1FrqwZyKw +589-Chris Brown - Turn Up the Music eQWG8BVeryU +590-Juan Magan - Si No Te Quisiera ft. Belinda, Lapiz Conciente XoNCV9BsU9c +591-Katy Perry - Teenage Dream (Official Music Video) 98WtmW-lfeE +592-Cher Lloyd - Want U Back (US Version) (Official Music Video) LPgvNlrBfb0 +593-Shakira - Addicted to You MntbN1DdEP0 +594-Sebastián Yatra - Alguien Robó ft. Wisin, Nacho EH0Wg8SaITQ +595-Chris Brown - Forever (Official HD Video) 5sMKX22BHeE +596-Snow Patrol - Chasing Cars (Official Video) GemKqzILV4w +597-Lil Wayne - Drop The World ft. Eminem (Official Music Video) ft. Eminem ErCAOMi5EGM +598-Miley Cyrus - 7 Things (Official Video) Hr0Wv5DJhuk +599-Matheus & Kauan - O Nosso Santo Bateu – Na Praia Ao Vivo kbCtpDwVCLQ +600-Maejor Ali - Lolly ft. Juicy J, Justin Bieber BiOmXeKyrxo +601-Kings Of Leon - Use Somebody (Official Video) gnhXHvRoUd0 +602-Pitbull - Fireball ft. John Ryan HMqgVXSvwGo +603-Calvin Harris - Feel So Close (Official Video) dGghkjpNCQ8 +604-Carly Rae Jepsen - I Really Like You qV5lzRHrGeg +605-Demi Lovato - Skyscraper (Official Video) r_8ydghbGSg +606-Keri Hilson - Knock You Down (Official Music Video) ft. Kanye West, Ne-Yo p_RqWocthcc +607-Wisin & Yandel - Te Siento SKWxOsbt9gU +608-The Black Eyed Peas - Just Can't Get Enough (Official Music Video) OrTyD7rjBpw +609-Jennifer Lopez - Live It Up ft. Pitbull BofL1AaiTjo +610-Eminem - Just Lose It (Official Music Video) 9dcVOmEQzKA +612-The Black Eyed Peas - Don't Stop The Party (Official Music Video) u9LH_y159sg +613-Tinie Tempah - Written In The Stars ft. Eric Turner YgFyi74DVjc +614-Big Sean - Blessings (Official Explicit Video) ft. Drake, Kanye West M6t47RI4bns +615-Britney Spears - I Wanna Go (Official Video) T-sxSd1uwoU +616-Rihanna - Russian Roulette (Official Music Video) ZQ2nCGawrSY +617-Ellie Goulding - On My Mind (Official Video) H202k7KfZL0 +618-Pitbull - Hey Baby (Drop It To The Floor) ft. T-Pain LefQdEMJP1I +619-Maître Gims - J'me tire (Clip officiel) F_rEHfLgdcY +620-LMFAO - Champagne Showers ft. Natalia Kills UA8rcLvS1BY +621-Nicki Minaj - Pound The Alarm (Explicit) vdrqA93sW-8 +622-Maluma - La Curiosidad 9t7eMteW-Tc +623-Shakira - Rabiosa (English Version) ft. Pitbull a5irTX82olg +624-Rich Homie Quan - Type of Way (Official Video) -KKbdErJkiY +625-P!nk - Just Like Fire (From'Alice Through The Looking Glass' - Official Video) 5Nrv5teMc9Y +626-Rihanna - What Now (Official) b-3BI9AspYc +627-Camila - De Que Me Sirve la Vida 3YhoejhnW8w +628-Jennifer Lopez - Goin' In ft. Flo Rida z5W7DVFKrcs +629-LMFAO ft. Lil Jon - Shots (Official Video) XNtTEibFvlQ +630-Ciara - Like A Boy (Official Video) _HKH7Emy1SY +631-Calvin Harris & Alesso - Under Control (Official Video) ft. Hurts yZqmarGShxg +632-Fifth Harmony - BO$$ (BOSS) Y4JfPlry-iQ +633-Eminem - Berzerk (Official Music Video) (Explicit) ab9176Srb5Y +634-Years & Years - King (Official Video) g_uoH6hJilc +635-Ne-Yo - So Sick (Official Music Video) IxszlJppRQI +636-Selena Gomez & The Scene - A Year Without Rain M8uPvX2te0I +637-Daddy Yankee - Sabado Rebelde ft. Plan B 0nPkXDrL2ZU +638-Kanye West - All Of The Lights ft. Rihanna, Kid Cudi HAfFfqiYLp0 +639-Zedd - Stay The Night ft. Hayley Williams (Official Music Video) i-gyZ35074k +640-Yandel - Como Antes (Official Video) ft. Wisin QeaumjX9DNY +641-Taylor Swift - Back To December QUwxKWT6m7U +642-Romeo Santos - Rival (Official Video) ft. Mario Domm 6vMhhBRj-2Q +643-Henrique & Diego - Suíte 14 (Ao Vivo) ft. Mc Guimê gmvFLIuVAbA +644-Britney Spears - Gimme More (Official HD Video) elueA2rofoo +645-Rihanna - You Da One b3HeLs8Yosw +646-Avicii - Hey Brother 6Cp6mKbRTQY +647-Soulja Boy Tell'em ft. Sammie - Kiss Me Thru The Phone (Official Video) 47Fbo4kU2AU +648-Beyoncé - Partition (Explicit Video) pZ12_E5R3qc +649-Kid Cudi - Pursuit Of Happiness (Official Music Video) ft. MGMT 7xzU9Qqdqww +650-Sigala - Sweet Lovin' ft. Bryn Christopher (Official Video) qj5zT4t7S6c +651-The Game - My Life ft. Lil Wayne (Official Music Video) udxZ9zkDzpo +652-Nicki Minaj - Moment 4 Life (Clean Version) (Official Music Video) ft. Drake D7GW8TYCEG4 +653-Nicki Minaj - High School (Explicit) ft. Lil Wayne RnpyRe_7jZA +654-Chino & Nacho - Niña Bonita Oe1fRwgGu5E +655-Far East Movement ft. The Cataracs, DEV - Like A G6 (Official Video) w4s6H4ku6ZY +656-Pitbull, Ne-Yo - Time Of Our Lives bTXJQ5ql5Fw +657-Lorde - Team f2JuxM-snGc +658-Christina Aguilera - Candyman (Official Music Video) -ScjucUV8v0 +659-Katy Perry - I Kissed A Girl (Official) tAp9BKosZXs +660-One Direction - Gotta Be You nvfejaHz-o0 +661-Nicki Minaj - Pills N Potions (Official) f7ld-3nZUxA +662-Mohombi - Bumpy Ride G2RCCDSBEGk +663-Demi Lovato - Neon Lights (Official Video) v9uDwppN5-w +664-The Pussycat Dolls - When I Grow Up (Official Music Video) K0K46C82v9o +665-Chris Brown - Don't Wake Me Up (Official Music Video) QOowQeKyNkQ +666-Christina Aguilera - Hurt (Main Video) wwCykGDEp7M +667-Eminem - We Made You (Official Music Video) RSdKmX2BH7o +668-Taio Cruz - Break Your Heart (Official Video) ft. Ludacris y_SI2EDM6Lo +669-Demi Lovato - Really Don't Care ft. Cher Lloyd (Official Video) OJGUbwVMBeA +670-P!nk - Raise Your Glass (Official Video) XjVNlG5cZyQ +671-Austin Mahone - Mmm Yeah ft. Pitbull MMAppa1cAVo +672-Avril Lavigne - Smile (Official Music Video) KagvExF-ijc +673-Little Mix - Wings (Official Video) cOQDsmEqVt8 +674-Brandon Beal - Twerk It Like Miley - Produced by Hedegaard ft. Christopher PLE57UZievU +675-El Bebeto - No Te Creas Tan Importante (Video Oficial) nMv2PeG-2mc +676-Eminem - My Name Is (Official Music Video) sNPnbI1arSE +677-Justin Bieber - All That Matters JC2yu2a9sHk +678-The Wanted - Glad You Came 2ggzxInyzVE +679-Maluma - Addicted (Official Music Video) pMIHC_cItd4 +680-Pitbull - Fun (Official Video) ft. Chris Brown jKbR7u8J5PU +681-Thalia - Desde Esa Noche (Official Video) ft. Maluma CkyBXdXkMr8 +682-Michael Jackson - You Rock My World (Official Video) 1-7ABIM2qjU +683-The Band Perry - If I Die Young (Official Video) 7NJqUN9TClM +684-Alessia Cara - Here (Official Video) UKp2CrfmVfw +685-Güliz Ayla - Olmazsan Olmaz j-T4hRJNFJI +686-Rihanna - Disturbia E1mU6h4Xdxc +687-Beyoncé - Diva rNM5HW13_O8 +688-Bridgit Mendler - Ready or Not (Official Video) dPKG1-3LXBs +689-Nicki Minaj - Beez In The Trap (Explicit) ft. 2 Chainz EmZvOhHF85I +691-Snoop Dogg - 'Sweat' Snoop Dogg vs David Guetta (Remix) [Official Video] KnEXrbAQyIo +692-Olly Murs - Troublemaker ft. Flo Rida 4aQDOUbErNg +693-Ciara ft. Ludacris - Ride (Official Video) Lp6W4aK1sbs +694-Chris Brown - Don't Judge Me z29nI8RQV0U +695-Kendrick Lamar - LOYALTY. ft. Rihanna Dlh-dzB2U4Y +696-Chris Brown - Love More (Explicit) ft. Nicki Minaj Tff2oE31Mlw +697-Christina Aguilera - Your Body (Official Music Video) (Clean Version) 6cfCgLgiFDM +699-MIKA - Popular Song ft. Ariana Grande nmcdLOjGVzw +700-Britney Spears - Till The World Ends (Official Video) qzU9OrZlKb8 +701-Zendaya - Replay cyLE48i4XY0 +702-Shontelle - Impossible (Official Video) NWdrO4BoCu8 +703-Nicki Minaj, Cassie - The Boys (Explicit) kXFcr6oy5dk +704-Miley Cyrus - Can't Be Tamed (Official Video) sjSG6z_13-Q +705-Little Mix - Move (Official Video) RwD4eJGxPc4 +706-Selena Gomez - Tell Me Something I Don't Know _RRyniZG0Jo +707-Britney Spears - Circus (Official HD Video) lVhJ_A8XUgc +708-Avril Lavigne - Here's to Never Growing Up sXd2WxoOP5g +709-Lady Gaga - LoveGame (Official Music Video) 1mB0tP1I-14 +710-Chris Brown - Gimme That (remix) ft. Lil Wayne 3yl-5FOZcr0 +711-Beyoncé - Sweet Dreams JlxByc0-V40 +712-Leona Lewis - Bleeding Love (Official Video) 7_weSk0BonM +713-Taio Cruz - Dynamite (Official Video) VUjdiDeJ0xg +714-Tinashe - 2 On (Explicit) ft. SchoolBoy Q -s7TCuCpB5c +715-Natalie La Rose - Somebody ft. Jeremih 8zqdo_Umd5c +717-Usher - OMG ft. will.i.am 1RnPB76mjxI +718-Taylor Swift - Our Song Jb2stN7kH28 +719-Lil Wayne - How To Love (Official Music Video) y8Gf4-eT3w0 +720-Nicole Scherzinger - Right There ft. 50 Cent t-vTaktsUSw +721-OneRepublic - Good Life (Official Music Video) jZhQOvvV45w +722-Britney Spears, Iggy Azalea - Pretty Girls (Official Video) uV2uebhnqOw +723-Ellie Goulding - Lights (Official Video) 0NKUpo_xKyQ +724-Miley Cyrus - Adore You (Official Video) W1tzURKYFNs +725-Kanye West - Heartless Co0tTeuUVhU +726-Rihanna - Te Amo (Official Music Video) Oe4Ic7fHWf8 +727-Ariana Grande - Baby I bJuWlMFToNo +728-Vanessa Hudgens - Say Ok (Official Music Video) F5VvvVxuKko +729-DJ Khaled - I'm On One (Explicit Version) ft. Drake, Rick Ross, Lil Wayne Z09lYqdxqzo +730-Demi Lovato - Made in the USA (Official Video) z3zdIHDTbg0 +731-Train - Drive By (Official Music Video) oxqnFJ3lp5k +732-Eminem - The Way I Am (Official Music Video) mQvteoFiMlg +733-Timbaland - Carry Out (Official Music Video) ft. Justin Timberlake NRdHsuuXxfk +734-Daddy Yankee - La Noche De Los Dos ft. Natalia Jiménez GDBaeQ5JPuU +735-Justin Bieber - U Smile (Official Music Video) r2ozuCXpVJY +736-Ke$ha - Die Young (Official) NOubzHCUt48 +737-Nick Jonas - Jealous yw04QD1LaB0 +739-Ariana Grande - Right There ft. Big Sean fhcpubAVdmc +740-Selena Gomez & The Scene - Hit The Lights 8c2ahBlTPz0 +741-Eminem - Survival (Explicit) NlmezywdxPI +742-Miley Cyrus - Who Owns My Heart (Official Video) iVbQxC2c3-8 +743-Rihanna - Cheers (Drink To That) ZR0v0i63PQ4 +744-Sigala - Easy Love (Official Music Video) ozx898ADTxM +745-Farruko - Besas Tan Bien (Official Video) E-kkX2UuBcg +746-OneRepublic - All The Right Moves (Official Music Video) qrOeGCJdZe4 +747-Enrique Iglesias, Usher - Dirty Dancer ft. Lil Wayne vHJAUuicC0Q +748-Austin Mahone - What About Love (Official Video) 2PEG82Udb90 +749-Rihanna - Hard (Official Music Video) ft. Jeezy Xcwd_Nz6Zog +750-Lady Gaga - Perfect Illusion (Official Music Video) Xn599R0ZBwg +752-MGMT - Electric Feel (Official HD Video) MmZexg8sxyk +753-'Weird Al' Yankovic - White & Nerdy (Official Music Video) N9qYF9DZPdw +754-Taylor Swift - White Horse D1Xr-JFLxik +755-Miley Cyrus - When I Look At You (Official Video) 8wxOVn99FTE +756-Ne-Yo - Let Me Love You (Until You Learn To Love Yourself) (Official Music Video) crrOl0egI00 +757-Cher Lloyd - Oath (Official Music Video) ft. Becky G Cqz713hhz1Y +758-Timbaland - If We Ever Meet Again ft. Katy Perry (Official Music Video) KDKva-s_khY +759-'Watch Me' from Disney Channel's 'Shake It Up' (Official Video) PPNMGYOm1aM +761-Taio Cruz - Hangover (Official Video) ft. Flo Rida dLhFDYQHDQY +762-Daddy Yankee - Ven Conmigo ft. Prince Royce ZEInlYjVFzk +765-Demi Lovato - La La Land (Official Music Video) nmjO1p9Oxrk +766-Selena Gomez & The Scene - Round & Round UfcvO2t8Ntg +767-Britney Spears - Hold It Against Me (Official Video) -Edv8Onsrgg +768-Far East Movement - Turn Up The Love ft. Cover Drive UqXVgAmqBOs +769-Justin Bieber - Pray (Official Music Video) o9tJW9MDs2M +770-Drake - Find Your Love (Official Music Video) Xyv4Bjja8yc +772-Nicki Minaj - Va Va Voom (Explicit) 3U72hzeBLOw +773-will.i.am, Nicki Minaj - Check It Out (Official Music Video) pqky5B179nM +774-Nicki Minaj - Stupid Hoe (Explicit) T6j4f8cHBIM +775-Jennifer Lopez ft. French Montana - I Luh Ya Papi (Explicit) [Official Video] c4oiEhf9M04 diff --git a/dataset/vevo_meta/top_chord.txt b/dataset/vevo_meta/top_chord.txt new file mode 100644 index 0000000000000000000000000000000000000000..26456cc45487d0d14cc23aae16e163403dc01995 --- /dev/null +++ b/dataset/vevo_meta/top_chord.txt @@ -0,0 +1,30 @@ +C 1 32576 +A:min 122 31898 +F 66 22538 +G 92 22137 +E:min 57 7935 +D:min 31 6457 +D 27 3973 +A:min7 121 3846 +A 118 3606 +E 53 2613 +D:min7 30 2598 +F:maj7 78 2530 +A# 131 1854 +E:min7 56 1695 +E:7 63 1396 +G:7 102 1321 +C:maj7 13 1039 +C:7 11 791 +D:7 37 697 +G:min 96 685 +C:min 5 684 +B:min 148 528 +F:min 70 474 +B 144 459 +D# 40 459 +G# 105 452 +A:7 128 391 +F:7 76 384 +G:sus4 94 384 +G:min7 95 277 diff --git a/default_sound_font.sf2 b/default_sound_font.sf2 new file mode 100644 index 0000000000000000000000000000000000000000..14b4bfccc13d330c811e8a2b4630d314173e40fe --- /dev/null +++ b/default_sound_font.sf2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74594e8f4250680adf590507a306655a299935343583256f3b722c48a1bc1cb0 +size 148398306 diff --git a/model/__pycache__/music_transformer.cpython-37.pyc b/model/__pycache__/music_transformer.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5c092bb158323b32021470593d5d8c0041adb85c Binary files /dev/null and b/model/__pycache__/music_transformer.cpython-37.pyc differ diff --git a/model/__pycache__/positional_encoding.cpython-37.pyc b/model/__pycache__/positional_encoding.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..43d4430de58959375ba40dc1250804bf7fd81868 Binary files /dev/null and b/model/__pycache__/positional_encoding.cpython-37.pyc differ diff --git a/model/__pycache__/positional_encoding.cpython-38.pyc b/model/__pycache__/positional_encoding.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7a418d963e1a78f4e87f310189c714cd49776b1e Binary files /dev/null and b/model/__pycache__/positional_encoding.cpython-38.pyc differ diff --git a/model/__pycache__/rpr.cpython-37.pyc b/model/__pycache__/rpr.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4deeff2fa2df843f3927bf9280e008b82e76ae20 Binary files /dev/null and b/model/__pycache__/rpr.cpython-37.pyc differ diff --git a/model/__pycache__/rpr.cpython-38.pyc b/model/__pycache__/rpr.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3418d5d9daa7e7553a735bad69e6dfe1cb3917a9 Binary files /dev/null and b/model/__pycache__/rpr.cpython-38.pyc differ diff --git a/model/__pycache__/video_music_transformer.cpython-37.pyc b/model/__pycache__/video_music_transformer.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b4b3efb1730d7849ae97674a26859debe332cc9 Binary files /dev/null and b/model/__pycache__/video_music_transformer.cpython-37.pyc differ diff --git a/model/__pycache__/video_music_transformer.cpython-38.pyc b/model/__pycache__/video_music_transformer.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9cf86a1bfae9baf9d0733a0e96b79845a95acb7b Binary files /dev/null and b/model/__pycache__/video_music_transformer.cpython-38.pyc differ diff --git a/model/__pycache__/video_regression.cpython-37.pyc b/model/__pycache__/video_regression.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5e4dd9d485e65d7310362da2cceb6bb3b7d5482b Binary files /dev/null and b/model/__pycache__/video_regression.cpython-37.pyc differ diff --git a/model/__pycache__/video_regression.cpython-38.pyc b/model/__pycache__/video_regression.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e4ff92ea3fd9606ff5515b9132657ba899ca62c1 Binary files /dev/null and b/model/__pycache__/video_regression.cpython-38.pyc differ diff --git a/model/loss.py b/model/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..62ff3d5d39a13ced021e1b9ca27973804a3262e7 --- /dev/null +++ b/model/loss.py @@ -0,0 +1,46 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.modules.loss import _Loss + +# Borrowed from https://github.com/jason9693/MusicTransformer-pytorch/blob/5f183374833ff6b7e17f3a24e3594dedd93a5fe5/custom/criterion.py#L28 +class SmoothCrossEntropyLoss(_Loss): + """ + https://arxiv.org/abs/1512.00567 + """ + __constants__ = ['label_smoothing', 'vocab_size', 'ignore_index', 'reduction'] + + def __init__(self, label_smoothing, vocab_size, ignore_index=-100, reduction='mean', is_logits=True): + assert 0.0 <= label_smoothing <= 1.0 + super().__init__(reduction=reduction) + + self.label_smoothing = label_smoothing + self.vocab_size = vocab_size + self.ignore_index = ignore_index + self.input_is_logits = is_logits + + def forward(self, input, target): + """ + Args: + input: [B * T, V] + target: [B * T] + Returns: + cross entropy: [1] + """ + mask = (target == self.ignore_index).unsqueeze(-1) + q = F.one_hot(target.long(), self.vocab_size).type(torch.float32) + u = 1.0 / self.vocab_size + q_prime = (1.0 - self.label_smoothing) * q + self.label_smoothing * u + q_prime = q_prime.masked_fill(mask, 0) + + ce = self.cross_entropy_with_logits(q_prime, input) + if self.reduction == 'mean': + lengths = torch.sum(target != self.ignore_index) + return ce.sum() / lengths + elif self.reduction == 'sum': + return ce.sum() + else: + raise NotImplementedError + + def cross_entropy_with_logits(self, p, q): + return -torch.sum(p * (q - q.logsumexp(dim=-1, keepdim=True)), dim=-1) diff --git a/model/music_transformer.py b/model/music_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..633b2b335c098ae2c19d023a5ece8424e559034c --- /dev/null +++ b/model/music_transformer.py @@ -0,0 +1,177 @@ +import torch +import torch.nn as nn +from torch.nn.modules.normalization import LayerNorm +import random + +from utilities.constants import * +from utilities.device import get_device + +from .positional_encoding import PositionalEncoding +from .rpr import TransformerEncoderRPR, TransformerEncoderLayerRPR +import json +# MusicTransformer +class MusicTransformer(nn.Module): + def __init__(self, n_layers=6, num_heads=8, d_model=512, dim_feedforward=1024, + dropout=0.1, max_sequence_midi=2048, max_sequence_chord=300, rpr=False): + super(MusicTransformer, self).__init__() + + self.dummy = DummyDecoder() + self.nlayers = n_layers + self.nhead = num_heads + self.d_model = d_model + self.d_ff = dim_feedforward + self.dropout = dropout + self.max_seq_midi = max_sequence_midi + self.max_seq_chord = max_sequence_chord + self.rpr = rpr + + # Input embedding for video and music features + self.embedding = nn.Embedding(CHORD_SIZE, self.d_model) + + # self.embedding_key = nn.Embedding(1, self.d_model) + self.embedding_root = nn.Embedding(CHORD_ROOT_SIZE, self.d_model) + self.embedding_attr = nn.Embedding(CHORD_ATTR_SIZE, self.d_model) + + self.positional_encoding = PositionalEncoding(self.d_model, self.dropout, self.max_seq_chord) + self.Linear_chord = nn.Linear(self.d_model+1, self.d_model) + + # Base transformer + if(not self.rpr): + self.transformer = nn.Transformer( + d_model=self.d_model, nhead=self.nhead, num_encoder_layers=self.nlayers, + num_decoder_layers=0, dropout=self.dropout, # activation=self.ff_activ, + dim_feedforward=self.d_ff, custom_decoder=self.dummy + ) + # RPR Transformer + else: + encoder_norm = LayerNorm(self.d_model) + encoder_layer = TransformerEncoderLayerRPR(self.d_model, self.nhead, self.d_ff, self.dropout, er_len=self.max_seq_chord) + + encoder = TransformerEncoderRPR(encoder_layer, self.nlayers, encoder_norm) + self.transformer = nn.Transformer( + d_model=self.d_model, nhead=self.nhead, num_encoder_layers=self.nlayers, + num_decoder_layers=0, dropout=self.dropout, # activation=self.ff_activ, + dim_feedforward=self.d_ff, custom_decoder=self.dummy, custom_encoder=encoder + ) + # Final output is a softmaxed linear layer + self.Wout = nn.Linear(self.d_model, CHORD_SIZE) + self.Wout_root = nn.Linear(self.d_model, CHORD_ROOT_SIZE) + self.Wout_attr = nn.Linear(self.d_model, CHORD_ATTR_SIZE) + self.softmax = nn.Softmax(dim=-1) + + # forward + def forward(self, x, x_root, x_attr, feature_key, mask=True): + if(mask is True): + mask = self.transformer.generate_square_subsequent_mask(x.shape[1]).to(get_device()) + else: + mask = None + + ### Chord + Key (DECODER) ### + # x = self.embedding(x) + + x_root = self.embedding_root(x_root) + x_attr = self.embedding_attr(x_attr) + x = x_root + x_attr + + feature_key_padded = torch.full((x.shape[0], x.shape[1], 1), feature_key.item()) + feature_key_padded = feature_key_padded.to(get_device()) + x = torch.cat([x, feature_key_padded], dim=-1) + xf = self.Linear_chord(x) + + ### POSITIONAL ENCODING ### + xf = xf.permute(1,0,2) # -> (max_seq-1, batch_size, d_model) + xf = self.positional_encoding(xf) + + ### TRANSFORMER ### + x_out = self.transformer(src=xf, tgt=xf, tgt_mask=mask) + x_out = x_out.permute(1,0,2) + + if IS_SEPERATED: + y_root = self.Wout_root(x_out) + y_attr = self.Wout_attr(x_out) + del mask + return y_root, y_attr + else: + y = self.Wout(x_out) + del mask + return y + + # generate + def generate(self, feature_key=None, primer=None, primer_root=None, primer_attr=None, target_seq_length=300, beam=0, beam_chance=1.0): + assert (not self.training), "Cannot generate while in training mode" + + with open('dataset/vevo_meta/chord_inv.json') as json_file: + chordInvDic = json.load(json_file) + with open('dataset/vevo_meta/chord_root.json') as json_file: + chordRootDic = json.load(json_file) + with open('dataset/vevo_meta/chord_attr.json') as json_file: + chordAttrDic = json.load(json_file) + + print("Generating sequence of max length:", target_seq_length) + gen_seq = torch.full((1,target_seq_length), CHORD_PAD, dtype=TORCH_LABEL_TYPE, device=get_device()) + gen_seq_root = torch.full((1,target_seq_length), CHORD_ROOT_PAD, dtype=TORCH_LABEL_TYPE, device=get_device()) + gen_seq_attr = torch.full((1,target_seq_length), CHORD_ATTR_PAD, dtype=TORCH_LABEL_TYPE, device=get_device()) + + num_primer = len(primer) + + gen_seq[..., :num_primer] = primer.type(TORCH_LABEL_TYPE).to(get_device()) + gen_seq_root[..., :num_primer] = primer_root.type(TORCH_LABEL_TYPE).to(get_device()) + + gen_seq_attr[..., :num_primer] = primer_attr.type(TORCH_LABEL_TYPE).to(get_device()) + + cur_i = num_primer + while(cur_i < target_seq_length): + # gen_seq_batch = gen_seq.clone() + # y = self.softmax(self.forward(gen_seq[..., :cur_i]))[..., :CHORD_END] + y = self.softmax( self.forward( gen_seq[..., :cur_i], gen_seq_root[..., :cur_i], gen_seq_attr[..., :cur_i], feature_key) )[..., :CHORD_END] + + token_probs = y[:, cur_i-1, :] + if(beam == 0): + beam_ran = 2.0 + else: + beam_ran = random.uniform(0,1) + if(beam_ran <= beam_chance): + token_probs = token_probs.flatten() + top_res, top_i = torch.topk(token_probs, beam) + beam_rows = top_i // CHORD_SIZE + beam_cols = top_i % CHORD_SIZE + gen_seq = gen_seq[beam_rows, :] + gen_seq[..., cur_i] = beam_cols + else: + distrib = torch.distributions.categorical.Categorical(probs=token_probs) + next_token = distrib.sample() + #print("next token:",next_token) + gen_seq[:, cur_i] = next_token + gen_chord = chordInvDic[ str( next_token.item() ) ] + + chord_arr = gen_chord.split(":") + if len(chord_arr) == 1: + chordRootID = chordRootDic[chord_arr[0]] + chordAttrID = 1 + chordRootID = torch.tensor([chordRootID]).to(get_device()) + chordAttrID = torch.tensor([chordAttrID]).to(get_device()) + gen_seq_root[:, cur_i] = chordRootID + gen_seq_attr[:, cur_i] = chordAttrID + elif len(chord_arr) == 2: + chordRootID = chordRootDic[chord_arr[0]] + chordAttrID = chordAttrDic[chord_arr[1]] + chordRootID = torch.tensor([chordRootID]).to(get_device()) + chordAttrID = torch.tensor([chordAttrID]).to(get_device()) + gen_seq_root[:, cur_i] = chordRootID + gen_seq_attr[:, cur_i] = chordAttrID + + # Let the transformer decide to end if it wants to + if(next_token == CHORD_END): + print("Model called end of sequence at:", cur_i, "/", target_seq_length) + break + + cur_i += 1 + if(cur_i % 50 == 0): + print(cur_i, "/", target_seq_length) + return gen_seq[:, :cur_i] + +class DummyDecoder(nn.Module): + def __init__(self): + super(DummyDecoder, self).__init__() + def forward(self, tgt, memory, tgt_mask, memory_mask,tgt_key_padding_mask,memory_key_padding_mask): + return memory diff --git a/model/positional_encoding.py b/model/positional_encoding.py new file mode 100644 index 0000000000000000000000000000000000000000..5820a69d721c879cfcdb21db63b9aef83b98f2bb --- /dev/null +++ b/model/positional_encoding.py @@ -0,0 +1,23 @@ +import torch +import torch.nn as nn +import math + +# PositionalEncoding +# Taken from https://pytorch.org/tutorials/beginner/transformer_tutorial.html +class PositionalEncoding(nn.Module): + + def __init__(self, d_model, dropout=0.1, max_len=5000): + super(PositionalEncoding, self).__init__() + self.dropout = nn.Dropout(p=dropout) + + pe = torch.zeros(max_len, d_model) + position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) + div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) + pe[:, 0::2] = torch.sin(position * div_term) + pe[:, 1::2] = torch.cos(position * div_term) + pe = pe.unsqueeze(0).transpose(0, 1) + self.register_buffer('pe', pe) + + def forward(self, x): + x = x + self.pe[:x.size(0), :] + return self.dropout(x) diff --git a/model/rpr.py b/model/rpr.py new file mode 100644 index 0000000000000000000000000000000000000000..1573451715f8cbcdb8834bc11f7372441d843d95 --- /dev/null +++ b/model/rpr.py @@ -0,0 +1,455 @@ +import torch +import torch.nn as nn + +from torch.nn import functional as F +from torch.nn.parameter import Parameter +from torch.nn import Module +from torch.nn.modules.transformer import _get_clones +from torch.nn.modules.linear import Linear +from torch.nn.modules.dropout import Dropout +from torch.nn.modules.normalization import LayerNorm +from torch.nn.init import * + +from torch.nn.functional import linear, softmax, dropout +from torch.nn import MultiheadAttention +from typing import Optional + +class TransformerDecoderRPR(Module): + def __init__(self, decoder_layer, num_layers, norm=None): + super(TransformerDecoderRPR, self).__init__() + self.layers = _get_clones(decoder_layer, num_layers) + self.num_layers = num_layers + self.norm = norm + + def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None): + output = tgt + for mod in self.layers: + output = mod(output, memory, tgt_mask=tgt_mask, + memory_mask=memory_mask, + tgt_key_padding_mask=tgt_key_padding_mask, + memory_key_padding_mask=memory_key_padding_mask) + + if self.norm is not None: + output = self.norm(output) + + return output + +class TransformerDecoderLayerRPR(Module): + def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, er_len=None): + super(TransformerDecoderLayerRPR, self).__init__() + + self.self_attn = MultiheadAttentionRPR(d_model, nhead, dropout=dropout, er_len=er_len) + self.multihead_attn = MultiheadAttention(d_model, nhead, dropout=dropout) + # Implementation of Feedforward model + self.linear1 = Linear(d_model, dim_feedforward) + self.dropout = Dropout(dropout) + self.linear2 = Linear(dim_feedforward, d_model) + + self.norm1 = LayerNorm(d_model) + self.norm2 = LayerNorm(d_model) + self.norm3 = LayerNorm(d_model) + self.dropout1 = Dropout(dropout) + self.dropout2 = Dropout(dropout) + self.dropout3 = Dropout(dropout) + + def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, + tgt_key_padding_mask=None, memory_key_padding_mask=None): + tgt2 = self.self_attn(tgt, tgt, tgt, attn_mask=tgt_mask, + key_padding_mask=tgt_key_padding_mask)[0] + tgt = tgt + self.dropout1(tgt2) + tgt = self.norm1(tgt) + + tgt2 = self.multihead_attn(tgt, memory, memory, attn_mask=memory_mask, + key_padding_mask=memory_key_padding_mask)[0] + + tgt = tgt + self.dropout2(tgt2) + tgt = self.norm2(tgt) + tgt2 = self.linear2(self.dropout(F.relu(self.linear1(tgt)))) + tgt = tgt + self.dropout3(tgt2) + tgt = self.norm3(tgt) + return tgt + +# TransformerEncoderRPR (only for music transformer) +class TransformerEncoderRPR(Module): + def __init__(self, encoder_layer, num_layers, norm=None): + super(TransformerEncoderRPR, self).__init__() + self.layers = _get_clones(encoder_layer, num_layers) + self.num_layers = num_layers + self.norm = norm + def forward(self, src, mask=None, src_key_padding_mask=None): + output = src + for i in range(self.num_layers): + output = self.layers[i](output, src_mask=mask, + src_key_padding_mask=src_key_padding_mask) + if self.norm: + output = self.norm(output) + return output + +# TransformerEncoderLayerRPR (only for music transformer) +class TransformerEncoderLayerRPR(Module): + def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, er_len=None): + super(TransformerEncoderLayerRPR, self).__init__() + self.self_attn = MultiheadAttentionRPR(d_model, nhead, dropout=dropout, er_len=er_len) + # Implementation of Feedforward model + self.linear1 = Linear(d_model, dim_feedforward) + self.dropout = Dropout(dropout) + self.linear2 = Linear(dim_feedforward, d_model) + self.norm1 = LayerNorm(d_model) + self.norm2 = LayerNorm(d_model) + self.dropout1 = Dropout(dropout) + self.dropout2 = Dropout(dropout) + def forward(self, src, src_mask=None, src_key_padding_mask=None): + src2 = self.self_attn(src, src, src, attn_mask=src_mask, + key_padding_mask=src_key_padding_mask)[0] + src = src + self.dropout1(src2) + src = self.norm1(src) + src2 = self.linear2(self.dropout(F.relu(self.linear1(src)))) + src = src + self.dropout2(src2) + src = self.norm2(src) + return src + +# MultiheadAttentionRPR +class MultiheadAttentionRPR(Module): + def __init__(self, embed_dim, num_heads, dropout=0., bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None, er_len=None): + super(MultiheadAttentionRPR, self).__init__() + self.embed_dim = embed_dim + self.kdim = kdim if kdim is not None else embed_dim + self.vdim = vdim if vdim is not None else embed_dim + self._qkv_same_embed_dim = self.kdim == embed_dim and self.vdim == embed_dim + + self.num_heads = num_heads + self.dropout = dropout + self.head_dim = embed_dim // num_heads + assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads" + + self.in_proj_weight = Parameter(torch.empty(3 * embed_dim, embed_dim)) + + if self._qkv_same_embed_dim is False: + self.q_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim)) + self.k_proj_weight = Parameter(torch.Tensor(embed_dim, self.kdim)) + self.v_proj_weight = Parameter(torch.Tensor(embed_dim, self.vdim)) + + if bias: + self.in_proj_bias = Parameter(torch.empty(3 * embed_dim)) + else: + self.register_parameter('in_proj_bias', None) + self.out_proj = Linear(embed_dim, embed_dim, bias=bias) + + if add_bias_kv: + self.bias_k = Parameter(torch.empty(1, 1, embed_dim)) + self.bias_v = Parameter(torch.empty(1, 1, embed_dim)) + else: + self.bias_k = self.bias_v = None + + self.add_zero_attn = add_zero_attn + + # Adding RPR embedding matrix + if(er_len is not None): + self.Er = Parameter(torch.rand((er_len, self.head_dim), dtype=torch.float32)) + else: + self.Er = None + + self._reset_parameters() + + def _reset_parameters(self): + if self._qkv_same_embed_dim: + xavier_uniform_(self.in_proj_weight) + else: + xavier_uniform_(self.q_proj_weight) + xavier_uniform_(self.k_proj_weight) + xavier_uniform_(self.v_proj_weight) + + if self.in_proj_bias is not None: + constant_(self.in_proj_bias, 0.) + constant_(self.out_proj.bias, 0.) + if self.bias_k is not None: + xavier_normal_(self.bias_k) + if self.bias_v is not None: + xavier_normal_(self.bias_v) + + def forward(self, query, key, value, key_padding_mask=None, + need_weights=True, attn_mask=None): + + if hasattr(self, '_qkv_same_embed_dim') and self._qkv_same_embed_dim is False: + + return multi_head_attention_forward_rpr( + query, key, value, self.embed_dim, self.num_heads, + self.in_proj_weight, self.in_proj_bias, + self.bias_k, self.bias_v, self.add_zero_attn, + self.dropout, self.out_proj.weight, self.out_proj.bias, + training=self.training, + key_padding_mask=key_padding_mask, need_weights=need_weights, + attn_mask=attn_mask, use_separate_proj_weight=True, + q_proj_weight=self.q_proj_weight, k_proj_weight=self.k_proj_weight, + v_proj_weight=self.v_proj_weight, rpr_mat=self.Er) + else: + if not hasattr(self, '_qkv_same_embed_dim'): + warnings.warn('A new version of MultiheadAttention module has been implemented. \ + Please re-train your model with the new module', + UserWarning) + + return multi_head_attention_forward_rpr( + query, key, value, self.embed_dim, self.num_heads, + self.in_proj_weight, self.in_proj_bias, + self.bias_k, self.bias_v, self.add_zero_attn, + self.dropout, self.out_proj.weight, self.out_proj.bias, + training=self.training, + key_padding_mask=key_padding_mask, need_weights=need_weights, + attn_mask=attn_mask, rpr_mat=self.Er) + +# multi_head_attention_forward_rpr +def multi_head_attention_forward_rpr(query, # type: Tensor + key, # type: Tensor + value, # type: Tensor + embed_dim_to_check, # type: int + num_heads, # type: int + in_proj_weight, # type: Tensor + in_proj_bias, # type: Tensor + bias_k, # type: Optional[Tensor] + bias_v, # type: Optional[Tensor] + add_zero_attn, # type: bool + dropout_p, # type: float + out_proj_weight, # type: Tensor + out_proj_bias, # type: Tensor + training=True, # type: bool + key_padding_mask=None, # type: Optional[Tensor] + need_weights=True, # type: bool + attn_mask=None, # type: Optional[Tensor] + use_separate_proj_weight=False, # type: bool + q_proj_weight=None, # type: Optional[Tensor] + k_proj_weight=None, # type: Optional[Tensor] + v_proj_weight=None, # type: Optional[Tensor] + static_k=None, # type: Optional[Tensor] + static_v=None, # type: Optional[Tensor] + rpr_mat=None + ): + """ + ---------- + Author: Pytorch + Modified: Damon Gwinn + ---------- + For Relative Position Representation support (https://arxiv.org/abs/1803.02155) + https://pytorch.org/docs/1.2.0/_modules/torch/nn/functional.html + Modification to take RPR embedding matrix and perform skew optimized RPR (https://arxiv.org/abs/1809.04281) + ---------- + """ + # type: (...) -> Tuple[Tensor, Optional[Tensor]] + + qkv_same = torch.equal(query, key) and torch.equal(key, value) + kv_same = torch.equal(key, value) + + tgt_len, bsz, embed_dim = query.size() + assert embed_dim == embed_dim_to_check + assert list(query.size()) == [tgt_len, bsz, embed_dim] + assert key.size() == value.size() + + head_dim = embed_dim // num_heads + assert head_dim * num_heads == embed_dim, "embed_dim must be divisible by num_heads" + scaling = float(head_dim) ** -0.5 + + if use_separate_proj_weight is not True: + if qkv_same: + # self-attention + q, k, v = linear(query, in_proj_weight, in_proj_bias).chunk(3, dim=-1) + + elif kv_same: + # encoder-decoder attention + # This is inline in_proj function with in_proj_weight and in_proj_bias + _b = in_proj_bias + _start = 0 + _end = embed_dim + _w = in_proj_weight[_start:_end, :] + if _b is not None: + _b = _b[_start:_end] + q = linear(query, _w, _b) + + if key is None: + assert value is None + k = None + v = None + else: + # This is inline in_proj function with in_proj_weight and in_proj_bias + _b = in_proj_bias + _start = embed_dim + _end = None + _w = in_proj_weight[_start:, :] + if _b is not None: + _b = _b[_start:] + k, v = linear(key, _w, _b).chunk(2, dim=-1) + + else: + # This is inline in_proj function with in_proj_weight and in_proj_bias + _b = in_proj_bias + _start = 0 + _end = embed_dim + _w = in_proj_weight[_start:_end, :] + if _b is not None: + _b = _b[_start:_end] + q = linear(query, _w, _b) + + # This is inline in_proj function with in_proj_weight and in_proj_bias + _b = in_proj_bias + _start = embed_dim + _end = embed_dim * 2 + _w = in_proj_weight[_start:_end, :] + if _b is not None: + _b = _b[_start:_end] + k = linear(key, _w, _b) + + # This is inline in_proj function with in_proj_weight and in_proj_bias + _b = in_proj_bias + _start = embed_dim * 2 + _end = None + _w = in_proj_weight[_start:, :] + if _b is not None: + _b = _b[_start:] + v = linear(value, _w, _b) + else: + q_proj_weight_non_opt = torch.jit._unwrap_optional(q_proj_weight) + len1, len2 = q_proj_weight_non_opt.size() + assert len1 == embed_dim and len2 == query.size(-1) + + k_proj_weight_non_opt = torch.jit._unwrap_optional(k_proj_weight) + len1, len2 = k_proj_weight_non_opt.size() + assert len1 == embed_dim and len2 == key.size(-1) + + v_proj_weight_non_opt = torch.jit._unwrap_optional(v_proj_weight) + len1, len2 = v_proj_weight_non_opt.size() + assert len1 == embed_dim and len2 == value.size(-1) + + if in_proj_bias is not None: + q = linear(query, q_proj_weight_non_opt, in_proj_bias[0:embed_dim]) + k = linear(key, k_proj_weight_non_opt, in_proj_bias[embed_dim:(embed_dim * 2)]) + v = linear(value, v_proj_weight_non_opt, in_proj_bias[(embed_dim * 2):]) + else: + q = linear(query, q_proj_weight_non_opt, in_proj_bias) + k = linear(key, k_proj_weight_non_opt, in_proj_bias) + v = linear(value, v_proj_weight_non_opt, in_proj_bias) + q = q * scaling + + if bias_k is not None and bias_v is not None: + if static_k is None and static_v is None: + k = torch.cat([k, bias_k.repeat(1, bsz, 1)]) + v = torch.cat([v, bias_v.repeat(1, bsz, 1)]) + if attn_mask is not None: + attn_mask = torch.cat([attn_mask, + torch.zeros((attn_mask.size(0), 1), + dtype=attn_mask.dtype, + device=attn_mask.device)], dim=1) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [key_padding_mask, torch.zeros((key_padding_mask.size(0), 1), + dtype=key_padding_mask.dtype, + device=key_padding_mask.device)], dim=1) + else: + assert static_k is None, "bias cannot be added to static key." + assert static_v is None, "bias cannot be added to static value." + else: + assert bias_k is None + assert bias_v is None + + q = q.contiguous().view(tgt_len, bsz * num_heads, head_dim).transpose(0, 1) + if k is not None: + k = k.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1) + if v is not None: + v = v.contiguous().view(-1, bsz * num_heads, head_dim).transpose(0, 1) + + if static_k is not None: + assert static_k.size(0) == bsz * num_heads + assert static_k.size(2) == head_dim + k = static_k + + if static_v is not None: + assert static_v.size(0) == bsz * num_heads + assert static_v.size(2) == head_dim + v = static_v + + src_len = k.size(1) + + if key_padding_mask is not None: + assert key_padding_mask.size(0) == bsz + assert key_padding_mask.size(1) == src_len + + if add_zero_attn: + src_len += 1 + k = torch.cat([k, torch.zeros((k.size(0), 1) + k.size()[2:], dtype=k.dtype, device=k.device)], dim=1) + v = torch.cat([v, torch.zeros((v.size(0), 1) + v.size()[2:], dtype=v.dtype, device=v.device)], dim=1) + if attn_mask is not None: + attn_mask = torch.cat([attn_mask, torch.zeros((attn_mask.size(0), 1), + dtype=attn_mask.dtype, + device=attn_mask.device)], dim=1) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [key_padding_mask, torch.zeros((key_padding_mask.size(0), 1), + dtype=key_padding_mask.dtype, + device=key_padding_mask.device)], dim=1) + + attn_output_weights = torch.bmm(q, k.transpose(1, 2)) + assert list(attn_output_weights.size()) == [bsz * num_heads, tgt_len, src_len] + + ######### ADDITION OF RPR ########### + if(rpr_mat is not None): + rpr_mat = _get_valid_embedding(rpr_mat, q.shape[1], k.shape[1]) + qe = torch.einsum("hld,md->hlm", q, rpr_mat) + srel = _skew(qe) + attn_output_weights += srel + + if attn_mask is not None: + attn_mask = attn_mask.unsqueeze(0) + attn_output_weights += attn_mask + + if key_padding_mask is not None: + attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len) + attn_output_weights = attn_output_weights.masked_fill( + key_padding_mask.unsqueeze(1).unsqueeze(2), + float('-inf'), + ) + attn_output_weights = attn_output_weights.view(bsz * num_heads, tgt_len, src_len) + + attn_output_weights = softmax( + attn_output_weights, dim=-1) + + attn_output_weights = dropout(attn_output_weights, p=dropout_p, training=training) + + attn_output = torch.bmm(attn_output_weights, v) + assert list(attn_output.size()) == [bsz * num_heads, tgt_len, head_dim] + attn_output = attn_output.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) + attn_output = linear(attn_output, out_proj_weight, out_proj_bias) + + if need_weights: + # average attention weights over heads + attn_output_weights = attn_output_weights.view(bsz, num_heads, tgt_len, src_len) + return attn_output, attn_output_weights.sum(dim=1) / num_heads + else: + return attn_output, None + +def _get_valid_embedding(Er, len_q, len_k): + """ + ---------- + Author: Damon Gwinn + ---------- + Gets valid embeddings based on max length of RPR attention + ---------- + """ + + len_e = Er.shape[0] + start = max(0, len_e - len_q) + return Er[start:, :] + +def _skew(qe): + """ + ---------- + Author: Damon Gwinn + ---------- + Performs the skew optimized RPR computation (https://arxiv.org/abs/1809.04281) + ---------- + """ + sz = qe.shape[1] + mask = (torch.triu(torch.ones(sz, sz).to(qe.device)) == 1).float().flip(0) + + qe = mask * qe + qe = F.pad(qe, (1,0, 0,0, 0,0)) + qe = torch.reshape(qe, (qe.shape[0], qe.shape[2], qe.shape[1])) + + srel = qe[:, 1:, :] + return srel diff --git a/model/video_music_transformer.py b/model/video_music_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..146e8ba81c1e26d3b70e1f72287cc8a4793ff8a4 --- /dev/null +++ b/model/video_music_transformer.py @@ -0,0 +1,205 @@ +import torch +import torch.nn as nn +from torch.nn.modules.normalization import LayerNorm +import random +import numpy as np +from utilities.constants import * +from utilities.device import get_device +from .positional_encoding import PositionalEncoding +from .rpr import TransformerDecoderRPR, TransformerDecoderLayerRPR +from datetime import datetime +import json + + +class VideoMusicTransformer(nn.Module): + def __init__(self, n_layers=6, num_heads=8, d_model=512, dim_feedforward=1024, + dropout=0.1, max_sequence_midi =2048, max_sequence_video=300, max_sequence_chord=300, total_vf_dim = 0, rpr=False): + super(VideoMusicTransformer, self).__init__() + self.nlayers = n_layers + self.nhead = num_heads + self.d_model = d_model + self.d_ff = dim_feedforward + self.dropout = dropout + self.max_seq_midi = max_sequence_midi + self.max_seq_video = max_sequence_video + self.max_seq_chord = max_sequence_chord + self.rpr = rpr + + # Input embedding for video and music features + self.embedding = nn.Embedding(CHORD_SIZE, self.d_model) + self.embedding_root = nn.Embedding(CHORD_ROOT_SIZE, self.d_model) + self.embedding_attr = nn.Embedding(CHORD_ATTR_SIZE, self.d_model) + + self.total_vf_dim = total_vf_dim + self.Linear_vis = nn.Linear(self.total_vf_dim, self.d_model) + self.Linear_chord = nn.Linear(self.d_model+1, self.d_model) + + # Positional encoding + self.positional_encoding = PositionalEncoding(self.d_model, self.dropout, self.max_seq_chord) + self.positional_encoding_video = PositionalEncoding(self.d_model, self.dropout, self.max_seq_video) + + # Add condition (minor or major) + self.condition_linear = nn.Linear(1, self.d_model) + + # Base transformer + if(not self.rpr): + self.transformer = nn.Transformer( + d_model=self.d_model, nhead=self.nhead, num_encoder_layers=self.nlayers, + num_decoder_layers=self.nlayers, dropout=self.dropout, # activation=self.ff_activ, + dim_feedforward=self.d_ff + ) + # RPR Transformer + else: + decoder_norm = LayerNorm(self.d_model) + decoder_layer = TransformerDecoderLayerRPR(self.d_model, self.nhead, self.d_ff, self.dropout, er_len=self.max_seq_chord) + decoder = TransformerDecoderRPR(decoder_layer, self.nlayers, decoder_norm) + self.transformer = nn.Transformer( + d_model=self.d_model, nhead=self.nhead, num_encoder_layers=self.nlayers, + num_decoder_layers=self.nlayers, dropout=self.dropout, # activation=self.ff_activ, + dim_feedforward=self.d_ff, custom_decoder=decoder + ) + + self.Wout = nn.Linear(self.d_model, CHORD_SIZE) + self.Wout_root = nn.Linear(self.d_model, CHORD_ROOT_SIZE) + self.Wout_attr = nn.Linear(self.d_model, CHORD_ATTR_SIZE) + self.softmax = nn.Softmax(dim=-1) + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + def forward(self, x, x_root, x_attr, feature_semantic_list, feature_key, feature_scene_offset, feature_motion, feature_emotion, mask=True): + if(mask is True): + mask = self.transformer.generate_square_subsequent_mask(x.shape[1]).to(self.device) + else: + mask = None + + x_root = self.embedding_root(x_root) + x_attr = self.embedding_attr(x_attr) + x = x_root + x_attr + + feature_key_padded = torch.full((x.shape[0], x.shape[1], 1), feature_key.item()) + feature_key_padded = feature_key_padded.to(self.device) + x = torch.cat([x, feature_key_padded], dim=-1) + + xf = self.Linear_chord(x) + + ### Video (SemanticList + SceneOffset + Motion + Emotion) (ENCODER) ### + vf_concat = feature_semantic_list[0].float() + + for i in range(1, len(feature_semantic_list)): + vf_concat = torch.cat( (vf_concat, feature_semantic_list[i].float()), dim=2) + + vf_concat = torch.cat([vf_concat, feature_scene_offset.unsqueeze(-1).float()], dim=-1) # -> (max_seq_video, batch_size, d_model+1) + vf_concat = torch.cat([vf_concat, feature_motion.unsqueeze(-1).float()], dim=-1) # -> (max_seq_video, batch_size, d_model+1) + vf_concat = torch.cat([vf_concat, feature_emotion.float()], dim=-1) # -> (max_seq_video, batch_size, d_model+1) + vf = self.Linear_vis(vf_concat) + + ### POSITIONAL ENCODING ### + + xf = xf.permute(1,0,2) # -> (max_seq-1, batch_size, d_model) + vf = vf.permute(1,0,2) # -> (max_seq_video, batch_size, d_model) + + xf = self.positional_encoding(xf) + vf = self.positional_encoding_video(vf) + + ### TRANSFORMER ### + x_out = self.transformer(src=vf, tgt=xf, tgt_mask=mask) + x_out = x_out.permute(1,0,2) + + if IS_SEPERATED: + y_root = self.Wout_root(x_out) + y_attr = self.Wout_attr(x_out) + del mask + return y_root, y_attr + else: + y = self.Wout(x_out) + del mask + return y + + def generate(self, feature_semantic_list = [], feature_key=None, feature_scene_offset=None, feature_motion=None, feature_emotion=None, + primer=None, primer_root=None, primer_attr=None, target_seq_length=300, beam=0, + beam_chance=1.0, max_conseq_N = 0, max_conseq_chord = 2): + + assert (not self.training), "Cannot generate while in training mode" + print("Generating sequence of max length:", target_seq_length) + + with open('dataset/vevo_meta/chord_inv.json') as json_file: + chordInvDic = json.load(json_file) + with open('dataset/vevo_meta/chord_root.json') as json_file: + chordRootDic = json.load(json_file) + with open('dataset/vevo_meta/chord_attr.json') as json_file: + chordAttrDic = json.load(json_file) + + gen_seq = torch.full((1,target_seq_length), CHORD_PAD, dtype=TORCH_LABEL_TYPE, device=self.device) + gen_seq_root = torch.full((1,target_seq_length), CHORD_ROOT_PAD, dtype=TORCH_LABEL_TYPE, device=self.device) + gen_seq_attr = torch.full((1,target_seq_length), CHORD_ATTR_PAD, dtype=TORCH_LABEL_TYPE, device=self.device) + + num_primer = len(primer) + gen_seq[..., :num_primer] = primer.type(TORCH_LABEL_TYPE).to(self.device) + gen_seq_root[..., :num_primer] = primer_root.type(TORCH_LABEL_TYPE).to(self.device) + gen_seq_attr[..., :num_primer] = primer_attr.type(TORCH_LABEL_TYPE).to(self.device) + + cur_i = num_primer + while(cur_i < target_seq_length): + y = self.softmax( self.forward( gen_seq[..., :cur_i], gen_seq_root[..., :cur_i], gen_seq_attr[..., :cur_i], + feature_semantic_list, feature_key, feature_scene_offset, feature_motion, feature_emotion) )[..., :CHORD_END] + + token_probs = y[:, cur_i-1, :] + if(beam == 0): + beam_ran = 2.0 + else: + beam_ran = random.uniform(0,1) + if(beam_ran <= beam_chance): + token_probs = token_probs.flatten() + top_res, top_i = torch.topk(token_probs, beam) + beam_rows = top_i // CHORD_SIZE + beam_cols = top_i % CHORD_SIZE + gen_seq = gen_seq[beam_rows, :] + gen_seq[..., cur_i] = beam_cols + else: + # token_probs.shape : [1, 157] + # 0: N, 1: C, ... , 156: B:maj7 + # 157 chordEnd 158 padding + if max_conseq_N == 0: + token_probs[0][0] = 0.0 + isMaxChord = True + if cur_i >= max_conseq_chord : + preChord = gen_seq[0][cur_i-1].item() + for k in range (1, max_conseq_chord): + if preChord != gen_seq[0][cur_i-1-k].item(): + isMaxChord = False + else: + isMaxChord = False + + if isMaxChord: + preChord = gen_seq[0][cur_i-1].item() + token_probs[0][preChord] = 0.0 + + distrib = torch.distributions.categorical.Categorical(probs=token_probs) + next_token = distrib.sample() + gen_seq[:, cur_i] = next_token + gen_chord = chordInvDic[ str( next_token.item() ) ] + + chord_arr = gen_chord.split(":") + if len(chord_arr) == 1: + chordRootID = chordRootDic[chord_arr[0]] + chordAttrID = 1 + chordRootID = torch.tensor([chordRootID]).to(self.device) + chordAttrID = torch.tensor([chordAttrID]).to(self.device) + gen_seq_root[:, cur_i] = chordRootID + gen_seq_attr[:, cur_i] = chordAttrID + elif len(chord_arr) == 2: + chordRootID = chordRootDic[chord_arr[0]] + chordAttrID = chordAttrDic[chord_arr[1]] + chordRootID = torch.tensor([chordRootID]).to(self.device) + chordAttrID = torch.tensor([chordAttrID]).to(self.device) + gen_seq_root[:, cur_i] = chordRootID + gen_seq_attr[:, cur_i] = chordAttrID + + # Let the transformer decide to end if it wants to + if(next_token == CHORD_END): + print("Model called end of sequence at:", cur_i, "/", target_seq_length) + break + cur_i += 1 + if(cur_i % 50 == 0): + print(cur_i, "/", target_seq_length) + return gen_seq[:, :cur_i] + diff --git a/model/video_regression.py b/model/video_regression.py new file mode 100644 index 0000000000000000000000000000000000000000..2aaf4968ed76b5170afd3a2ba06bcfd76a129dd0 --- /dev/null +++ b/model/video_regression.py @@ -0,0 +1,64 @@ +import torch +import torch.nn as nn +from torch.nn.modules.normalization import LayerNorm +import random +import numpy as np +from utilities.constants import * +from utilities.device import get_device +from datetime import datetime + +import torch.nn.functional as F + +class VideoRegression(nn.Module): + def __init__(self, n_layers=2, d_model=64, dropout=0.1, max_sequence_video=300, total_vf_dim = 0, regModel="bilstm"): + super(VideoRegression, self).__init__() + self.nlayers = n_layers + self.d_model = d_model + self.dropout = dropout + self.max_seq_video = max_sequence_video + self.total_vf_dim = total_vf_dim + self.regModel = regModel + + self.bilstm = nn.LSTM(self.total_vf_dim, self.d_model, self.nlayers, bidirectional=True) + self.bigru = nn.GRU(self.total_vf_dim, self.d_model, self.nlayers, bidirectional=True) + self.bifc = nn.Linear(self.d_model * 2, 2) + + self.lstm = nn.LSTM(self.total_vf_dim, self.d_model, self.nlayers) + self.gru = nn.GRU(self.total_vf_dim, self.d_model, self.nlayers) + self.fc = nn.Linear(self.d_model, 2) + + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + + + def forward(self, feature_semantic_list, feature_scene_offset, feature_motion, feature_emotion): + ### Video (SemanticList + SceneOffset + Motion + Emotion) (ENCODER) ### + vf_concat = feature_semantic_list[0].float() + for i in range(1, len(feature_semantic_list)): + vf_concat = torch.cat( (vf_concat, feature_semantic_list[i].float()), dim=2) + + vf_concat = torch.cat([vf_concat, feature_scene_offset.unsqueeze(-1).float()], dim=-1) + vf_concat = torch.cat([vf_concat, feature_motion.unsqueeze(-1).float()], dim=-1) + vf_concat = torch.cat([vf_concat, feature_emotion.float()], dim=-1) + + vf_concat = vf_concat.permute(1,0,2) + vf_concat = F.dropout(vf_concat, p=self.dropout, training=self.training) + + if self.regModel == "bilstm": + out, _ = self.bilstm(vf_concat) + out = out.permute(1,0,2) + out = self.bifc(out) + elif self.regModel == "bigru": + out, _ = self.bigru(vf_concat) + out = out.permute(1,0,2) + out = self.bifc(out) + elif self.regModel == "lstm": + out, _ = self.lstm(vf_concat) + out = out.permute(1,0,2) + out = self.fc(out) + elif self.regModel == "gru": + out, _ = self.gru(vf_concat) + out = out.permute(1,0,2) + out = self.fc(out) + return out + diff --git a/packages.txt b/packages.txt new file mode 100644 index 0000000000000000000000000000000000000000..61b2c6ce9ffb278c58aadd9cfe68cec1ae3ba1aa --- /dev/null +++ b/packages.txt @@ -0,0 +1,2 @@ +ffmpeg +fluidsynth \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..ab0c3868c755f2078a1e8e382b979cd54c792751 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,24 @@ +torchvision==0.9.0 +torch==1.8.0 +clip @ git+https://github.com/openai/CLIP.git +Cython==3.0.5 +numpy==1.19.5 +coloredlogs==15.0.1 +ffmpeg_python==0.2.0 +ftfy==6.1.1 +matplotlib==3.5.3 +midi2audio==0.1.1 +MIDIUtil==1.2.1 +moviepy==1.0.3 +music21==7.3.3 +opencv_python==4.7.0.72 +pandas==1.3.5 +Pillow==8.4.0 +pretty_midi==0.2.9 +pydub==0.25.1 +regex==2022.10.31 +scenedetect==0.6.1 +scikit_learn==1.0.2 +scipy==1.7.3 +gradio==4.7.1 +pyfluidsynth diff --git a/saved_models/AMT/README.md b/saved_models/AMT/README.md new file mode 100644 index 0000000000000000000000000000000000000000..84647b908c040cd170233e1b0c1289fd238dfa49 --- /dev/null +++ b/saved_models/AMT/README.md @@ -0,0 +1 @@ +put pickle files in this directory diff --git a/saved_models/AMT/best_loss_weights.pickle b/saved_models/AMT/best_loss_weights.pickle new file mode 100644 index 0000000000000000000000000000000000000000..25d01b2be9250cc3af42111c82e7d9b2234203f6 --- /dev/null +++ b/saved_models/AMT/best_loss_weights.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:859f0fac92c6d4ac84446983cd138ca8d625a41e1854edbd86ea29a14f0aad28 +size 131375779 diff --git a/saved_models/AMT/best_rmse_weights.pickle b/saved_models/AMT/best_rmse_weights.pickle new file mode 100644 index 0000000000000000000000000000000000000000..f6f6e3936b81fff9856628aa8d79b5c8292ca264 --- /dev/null +++ b/saved_models/AMT/best_rmse_weights.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3680851df4f8bb7902539bc10b3025eaa7162410826c164b4aec4d44a8c19818 +size 5463439 diff --git a/third_party/midi_processor/__pycache__/processor.cpython-37.pyc b/third_party/midi_processor/__pycache__/processor.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..772264dd0765aa59c39565285b5ba9da3d17753c Binary files /dev/null and b/third_party/midi_processor/__pycache__/processor.cpython-37.pyc differ diff --git a/third_party/midi_processor/__pycache__/processor.cpython-38.pyc b/third_party/midi_processor/__pycache__/processor.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f3ae0acfff0e1fbcac7a233fbfef4811ef831b9b Binary files /dev/null and b/third_party/midi_processor/__pycache__/processor.cpython-38.pyc differ diff --git a/third_party/midi_processor/processor.py b/third_party/midi_processor/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..c6531853b3888c0b50e20fc72ca207dfba5cf49a --- /dev/null +++ b/third_party/midi_processor/processor.py @@ -0,0 +1,261 @@ +import pretty_midi + +RANGE_NOTE_ON = 128 +RANGE_NOTE_OFF = 128 +RANGE_VEL = 32 +RANGE_TIME_SHIFT = 100 + +START_IDX = { + 'note_on': 0, + 'note_off': RANGE_NOTE_ON, + 'time_shift': RANGE_NOTE_ON + RANGE_NOTE_OFF, + 'velocity': RANGE_NOTE_ON + RANGE_NOTE_OFF + RANGE_TIME_SHIFT +} + +class SustainAdapter: + def __init__(self, time, type): + self.start = time + self.type = type + + +class SustainDownManager: + def __init__(self, start, end): + self.start = start + self.end = end + self.managed_notes = [] + self._note_dict = {} # key: pitch, value: note.start + + def add_managed_note(self, note: pretty_midi.Note): + self.managed_notes.append(note) + + def transposition_notes(self): + for note in reversed(self.managed_notes): + try: + note.end = self._note_dict[note.pitch] + except KeyError: + note.end = max(self.end, note.end) + self._note_dict[note.pitch] = note.start + + +# Divided note by note_on, note_off +class SplitNote: + def __init__(self, type, time, value, velocity): + ## type: note_on, note_off + self.type = type + self.time = time + self.velocity = velocity + self.value = value + + def __repr__(self): + return '<[SNote] time: {} type: {}, value: {}, velocity: {}>'\ + .format(self.time, self.type, self.value, self.velocity) + + +class Event: + def __init__(self, event_type, value): + self.type = event_type + self.value = value + + def __repr__(self): + return ''.format(self.type, self.value) + + def to_int(self): + return START_IDX[self.type] + self.value + + @staticmethod + def from_int(int_value): + info = Event._type_check(int_value) + return Event(info['type'], info['value']) + + @staticmethod + def _type_check(int_value): + range_note_on = range(0, RANGE_NOTE_ON) + range_note_off = range(RANGE_NOTE_ON, RANGE_NOTE_ON+RANGE_NOTE_OFF) + range_time_shift = range(RANGE_NOTE_ON+RANGE_NOTE_OFF,RANGE_NOTE_ON+RANGE_NOTE_OFF+RANGE_TIME_SHIFT) + + valid_value = int_value + + if int_value in range_note_on: + return {'type': 'note_on', 'value': valid_value} + elif int_value in range_note_off: + valid_value -= RANGE_NOTE_ON + return {'type': 'note_off', 'value': valid_value} + elif int_value in range_time_shift: + valid_value -= (RANGE_NOTE_ON + RANGE_NOTE_OFF) + return {'type': 'time_shift', 'value': valid_value} + else: + valid_value -= (RANGE_NOTE_ON + RANGE_NOTE_OFF + RANGE_TIME_SHIFT) + return {'type': 'velocity', 'value': valid_value} + + +def _divide_note(notes): + result_array = [] + notes.sort(key=lambda x: x.start) + + for note in notes: + on = SplitNote('note_on', note.start, note.pitch, note.velocity) + off = SplitNote('note_off', note.end, note.pitch, None) + result_array += [on, off] + return result_array + + +def _merge_note(snote_sequence): + note_on_dict = {} + result_array = [] + + for snote in snote_sequence: + # print(note_on_dict) + if snote.type == 'note_on': + note_on_dict[snote.value] = snote + elif snote.type == 'note_off': + try: + on = note_on_dict[snote.value] + off = snote + if off.time - on.time == 0: + continue + result = pretty_midi.Note(on.velocity, snote.value, on.time, off.time) + result_array.append(result) + except: + print('info removed pitch: {}'.format(snote.value)) + return result_array + + +def _snote2events(snote: SplitNote, prev_vel: int): + result = [] + if snote.velocity is not None: + modified_velocity = snote.velocity // 4 + if prev_vel != modified_velocity: + result.append(Event(event_type='velocity', value=modified_velocity)) + result.append(Event(event_type=snote.type, value=snote.value)) + return result + + +def _event_seq2snote_seq(event_sequence): + timeline = 0 + velocity = 0 + snote_seq = [] + + for event in event_sequence: + if event.type == 'time_shift': + timeline += ((event.value+1) / 100) + if event.type == 'velocity': + velocity = event.value * 4 + else: + snote = SplitNote(event.type, timeline, event.value, velocity) + snote_seq.append(snote) + return snote_seq + + +def _make_time_sift_events(prev_time, post_time): + time_interval = int(round((post_time - prev_time) * 100)) + results = [] + while time_interval >= RANGE_TIME_SHIFT: + results.append(Event(event_type='time_shift', value=RANGE_TIME_SHIFT-1)) + time_interval -= RANGE_TIME_SHIFT + if time_interval == 0: + return results + else: + return results + [Event(event_type='time_shift', value=time_interval-1)] + + +def _control_preprocess(ctrl_changes): + sustains = [] + + manager = None + for ctrl in ctrl_changes: + if ctrl.value >= 64 and manager is None: + # sustain down + manager = SustainDownManager(start=ctrl.time, end=None) + elif ctrl.value < 64 and manager is not None: + # sustain up + manager.end = ctrl.time + sustains.append(manager) + manager = None + elif ctrl.value < 64 and len(sustains) > 0: + sustains[-1].end = ctrl.time + return sustains + + +def _note_preprocess(susteins, notes): + note_stream = [] + + if susteins: # if the midi file has sustain controls + for sustain in susteins: + for note_idx, note in enumerate(notes): + if note.start < sustain.start: + note_stream.append(note) + elif note.start > sustain.end: + notes = notes[note_idx:] + sustain.transposition_notes() + break + else: + sustain.add_managed_note(note) + + for sustain in susteins: + note_stream += sustain.managed_notes + + else: # else, just push everything into note stream + for note_idx, note in enumerate(notes): + note_stream.append(note) + + note_stream.sort(key= lambda x: x.start) + return note_stream + + +def encode_midi(file_path): + events = [] + notes = [] + mid = pretty_midi.PrettyMIDI(midi_file=file_path) + + for inst in mid.instruments: + inst_notes = inst.notes + # ctrl.number is the number of sustain control. If you want to know abour the number type of control, + # see https://www.midi.org/specifications-old/item/table-3-control-change-messages-data-bytes-2 + ctrls = _control_preprocess([ctrl for ctrl in inst.control_changes if ctrl.number == 64]) + notes += _note_preprocess(ctrls, inst_notes) + + dnotes = _divide_note(notes) + # print(dnotes) + dnotes.sort(key=lambda x: x.time) + # print('sorted:') + # print(dnotes) + cur_time = 0 + cur_vel = 0 + for snote in dnotes: + events += _make_time_sift_events(prev_time=cur_time, post_time=snote.time) + events += _snote2events(snote=snote, prev_vel=cur_vel) + # events += _make_time_sift_events(prev_time=cur_time, post_time=snote.time) + cur_time = snote.time + cur_vel = snote.velocity + + return [e.to_int() for e in events] + +def decode_midi(idx_array, file_path=None): + event_sequence = [Event.from_int(idx) for idx in idx_array] + # print(event_sequence) + snote_seq = _event_seq2snote_seq(event_sequence) + note_seq = _merge_note(snote_seq) + note_seq.sort(key=lambda x:x.start) + + mid = pretty_midi.PrettyMIDI() + # if want to change instument, see https://www.midi.org/specifications/item/gm-level-1-sound-set + instument = pretty_midi.Instrument(1, False, "Developed By Jaeyong Kang") + instument.notes = note_seq + + mid.instruments.append(instument) + if file_path is not None: + mid.write(file_path) + return mid + +# if __name__ == '__main__': +# encoded = encode_midi('bin/ADIG04.mid') +# print(encoded) +# decided = decode_midi(encoded,file_path='bin/test.mid') + +# ins = pretty_midi.PrettyMIDI('bin/ADIG04.mid') +# print(ins) +# print(ins.instruments[0]) +# for i in ins.instruments: +# print(i.control_changes) +# print(i.notes) + diff --git a/utilities/__init__.py b/utilities/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/utilities/__pycache__/__init__.cpython-37.pyc b/utilities/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f1c917f3725cc057a73fb1c8c56035d3f72f3df3 Binary files /dev/null and b/utilities/__pycache__/__init__.cpython-37.pyc differ diff --git a/utilities/__pycache__/__init__.cpython-38.pyc b/utilities/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..413f568d22a4d6720f04f2fd21bac47445825776 Binary files /dev/null and b/utilities/__pycache__/__init__.cpython-38.pyc differ diff --git a/utilities/__pycache__/argument_funcs.cpython-37.pyc b/utilities/__pycache__/argument_funcs.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b3d8762b8a135a44f00bb9317ba234f5528c3154 Binary files /dev/null and b/utilities/__pycache__/argument_funcs.cpython-37.pyc differ diff --git a/utilities/__pycache__/chord_to_midi.cpython-37.pyc b/utilities/__pycache__/chord_to_midi.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..29aea4e5b863c4c6cb365b6ac95a75c2ed930d2c Binary files /dev/null and b/utilities/__pycache__/chord_to_midi.cpython-37.pyc differ diff --git a/utilities/__pycache__/chord_to_midi.cpython-38.pyc b/utilities/__pycache__/chord_to_midi.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0f66382aa7a37bf413c1f523e77cd0d5b17ad32a Binary files /dev/null and b/utilities/__pycache__/chord_to_midi.cpython-38.pyc differ diff --git a/utilities/__pycache__/constants.cpython-37.pyc b/utilities/__pycache__/constants.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..152e8575802dc8868fa801e9fba3364833e38729 Binary files /dev/null and b/utilities/__pycache__/constants.cpython-37.pyc differ diff --git a/utilities/__pycache__/constants.cpython-38.pyc b/utilities/__pycache__/constants.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cc612d3513fae337d93d7d0dbc3234e792e663ed Binary files /dev/null and b/utilities/__pycache__/constants.cpython-38.pyc differ diff --git a/utilities/__pycache__/device.cpython-37.pyc b/utilities/__pycache__/device.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b3105df0fb1ed22b15ba7e1dc8b77112dbeba3c Binary files /dev/null and b/utilities/__pycache__/device.cpython-37.pyc differ diff --git a/utilities/__pycache__/device.cpython-38.pyc b/utilities/__pycache__/device.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9cc932b1e2d496ed28d13778dd032e4ec6d5c5f8 Binary files /dev/null and b/utilities/__pycache__/device.cpython-38.pyc differ diff --git a/utilities/argument_funcs.py b/utilities/argument_funcs.py new file mode 100644 index 0000000000000000000000000000000000000000..8aea43e4d23f0bc550cacd5a364a119af22d7999 --- /dev/null +++ b/utilities/argument_funcs.py @@ -0,0 +1,275 @@ +import argparse +from .constants import * + +version = VERSION +split_ver = SPLIT_VER +split_path = "split_" + split_ver + +def parse_train_args(): + parser = argparse.ArgumentParser() + + parser.add_argument("-dataset_dir", type=str, default="./dataset/", help="Folder of VEVO dataset") + + parser.add_argument("-input_dir_music", type=str, default="./dataset/vevo_chord/" + MUSIC_TYPE, help="Folder of video CNN feature files") + parser.add_argument("-input_dir_video", type=str, default="./dataset/vevo_vis", help="Folder of video CNN feature files") + + parser.add_argument("-output_dir", type=str, default="./saved_models", help="Folder to save model weights. Saves one every epoch") + + parser.add_argument("-weight_modulus", type=int, default=1, help="How often to save epoch weights (ex: value of 10 means save every 10 epochs)") + parser.add_argument("-print_modulus", type=int, default=1, help="How often to print train results for a batch (batch loss, learn rate, etc.)") + parser.add_argument("-n_workers", type=int, default=1, help="Number of threads for the dataloader") + parser.add_argument("--force_cpu", action="store_true", help="Forces model to run on a cpu even when gpu is available") + parser.add_argument("--no_tensorboard", action="store_true", help="Turns off tensorboard result reporting") + parser.add_argument("-continue_weights", type=str, default=None, help="Model weights to continue training based on") + parser.add_argument("-continue_epoch", type=int, default=None, help="Epoch the continue_weights model was at") + parser.add_argument("-lr", type=float, default=None, help="Constant learn rate. Leave as None for a custom scheduler.") + parser.add_argument("-ce_smoothing", type=float, default=None, help="Smoothing parameter for smoothed cross entropy loss (defaults to no smoothing)") + parser.add_argument("-batch_size", type=int, default=1, help="Batch size to use") + parser.add_argument("-epochs", type=int, default=5, help="Number of epochs to use") + + parser.add_argument("-max_sequence_midi", type=int, default=2048, help="Maximum midi sequence to consider") + parser.add_argument("-max_sequence_video", type=int, default=300, help="Maximum video sequence to consider") + parser.add_argument("-max_sequence_chord", type=int, default=300, help="Maximum video sequence to consider") + + parser.add_argument("-n_layers", type=int, default=6, help="Number of decoder layers to use") + parser.add_argument("-num_heads", type=int, default=8, help="Number of heads to use for multi-head attention") + parser.add_argument("-d_model", type=int, default=512, help="Dimension of the model (output dim of embedding layers, etc.)") + parser.add_argument("-dim_feedforward", type=int, default=1024, help="Dimension of the feedforward layer") + parser.add_argument("-dropout", type=float, default=0.1, help="Dropout rate") + + parser.add_argument("-is_video", type=bool, default=IS_VIDEO, help="MusicTransformer or VideoMusicTransformer") + + if IS_VIDEO: + parser.add_argument("-vis_models", type=str, default=VIS_MODELS_SORTED, help="...") + else: + parser.add_argument("-vis_models", type=str, default="", help="...") + + parser.add_argument("-emo_model", type=str, default="6c_l14p", help="...") + parser.add_argument("-rpr", type=bool, default=RPR, help="...") + return parser.parse_args() + +def print_train_args(args): + print(SEPERATOR) + + print("dataset_dir:", args.dataset_dir ) + + print("input_dir_music:", args.input_dir_music) + print("input_dir_video:", args.input_dir_video) + + print("output_dir:", args.output_dir) + + print("weight_modulus:", args.weight_modulus) + print("print_modulus:", args.print_modulus) + print("") + print("n_workers:", args.n_workers) + print("force_cpu:", args.force_cpu) + print("tensorboard:", not args.no_tensorboard) + print("") + print("continue_weights:", args.continue_weights) + print("continue_epoch:", args.continue_epoch) + print("") + print("lr:", args.lr) + print("ce_smoothing:", args.ce_smoothing) + print("batch_size:", args.batch_size) + print("epochs:", args.epochs) + print("") + print("rpr:", args.rpr) + + print("max_sequence_midi:", args.max_sequence_midi) + print("max_sequence_video:", args.max_sequence_video) + print("max_sequence_chord:", args.max_sequence_chord) + + print("n_layers:", args.n_layers) + print("num_heads:", args.num_heads) + print("d_model:", args.d_model) + print("") + print("dim_feedforward:", args.dim_feedforward) + print("dropout:", args.dropout) + print("is_video:", args.is_video) + + print(SEPERATOR) + print("") + +def parse_eval_args(): + if IS_VIDEO: + modelpath = "./saved_models/AMT/best_acc_weights.pickle" + # modelpath = "./saved_models/"+version+ "/"+VIS_MODELS_PATH+"/results/best_loss_weights.pickle" + else: + modelpath = "./saved_models/"+version+ "/no_video/results/best_acc_weights.pickle" + + parser = argparse.ArgumentParser() + + parser.add_argument("-dataset_dir", type=str, default="./dataset/", help="Folder of VEVO dataset") + + parser.add_argument("-input_dir_music", type=str, default="./dataset/vevo_chord/" + MUSIC_TYPE, help="Folder of video CNN feature files") + parser.add_argument("-input_dir_video", type=str, default="./dataset/vevo_vis", help="Folder of video CNN feature files") + + parser.add_argument("-model_weights", type=str, default= modelpath, help="Pickled model weights file saved with torch.save and model.state_dict()") + + parser.add_argument("-n_workers", type=int, default=1, help="Number of threads for the dataloader") + parser.add_argument("--force_cpu", action="store_true", help="Forces model to run on a cpu even when gpu is available") + parser.add_argument("-batch_size", type=int, default=1, help="Batch size to use") + + parser.add_argument("-max_sequence_midi", type=int, default=2048, help="Maximum midi sequence to consider") + parser.add_argument("-max_sequence_video", type=int, default=300, help="Maximum video sequence to consider") + parser.add_argument("-max_sequence_chord", type=int, default=300, help="Maximum video sequence to consider") + + parser.add_argument("-n_layers", type=int, default=6, help="Number of decoder layers to use") + parser.add_argument("-num_heads", type=int, default=8, help="Number of heads to use for multi-head attention") + parser.add_argument("-d_model", type=int, default=512, help="Dimension of the model (output dim of embedding layers, etc.)") + parser.add_argument("-dim_feedforward", type=int, default=1024, help="Dimension of the feedforward layer") + + parser.add_argument("-is_video", type=bool, default=IS_VIDEO, help="MusicTransformer or VideoMusicTransformer") + + if IS_VIDEO: + parser.add_argument("-vis_models", type=str, default=VIS_MODELS_SORTED, help="...") + else: + parser.add_argument("-vis_models", type=str, default="", help="...") + + parser.add_argument("-emo_model", type=str, default="6c_l14p", help="...") + parser.add_argument("-rpr", type=bool, default=RPR, help="...") + return parser.parse_args() + +def print_eval_args(args): + print(SEPERATOR) + print("input_dir_music:", args.input_dir_music) + print("input_dir_video:", args.input_dir_video) + + print("model_weights:", args.model_weights) + print("n_workers:", args.n_workers) + print("force_cpu:", args.force_cpu) + print("") + print("batch_size:", args.batch_size) + print("") + print("rpr:", args.rpr) + + print("max_sequence_midi:", args.max_sequence_midi) + print("max_sequence_video:", args.max_sequence_video) + print("max_sequence_chord:", args.max_sequence_chord) + + print("n_layers:", args.n_layers) + print("num_heads:", args.num_heads) + print("d_model:", args.d_model) + print("") + print("dim_feedforward:", args.dim_feedforward) + print(SEPERATOR) + print("") + +# parse_generate_args +def parse_generate_args(): + parser = argparse.ArgumentParser() + outputpath = "./output_vevo/"+version + if IS_VIDEO: + modelpath = "./saved_models/AMT/best_loss_weights.pickle" + modelpathReg = "./saved_models/AMT/best_rmse_weights.pickle" + # modelpath = "./saved_models/"+version+ "/"+VIS_MODELS_PATH+"/results/best_acc_weights.pickle" + # modelpathReg = "./saved_models/"+version+ "/"+VIS_MODELS_PATH+"/results_regression_bigru/best_rmse_weights.pickle" + else: + modelpath = "./saved_models/"+version+ "/no_video/results/best_loss_weights.pickle" + modelpathReg = None + + parser.add_argument("-dataset_dir", type=str, default="./dataset/", help="Folder of VEVO dataset") + + parser.add_argument("-input_dir_music", type=str, default="./dataset/vevo_chord/" + MUSIC_TYPE, help="Folder of video CNN feature files") + parser.add_argument("-input_dir_video", type=str, default="./dataset/vevo_vis", help="Folder of video CNN feature files") + + parser.add_argument("-output_dir", type=str, default= outputpath, help="Folder to write generated midi to") + + parser.add_argument("-primer_file", type=str, default=None, help="File path or integer index to the evaluation dataset. Default is to select a random index.") + parser.add_argument("--force_cpu", action="store_true", help="Forces model to run on a cpu even when gpu is available") + + parser.add_argument("-target_seq_length_midi", type=int, default=1024, help="Target length you'd like the midi to be") + parser.add_argument("-target_seq_length_chord", type=int, default=300, help="Target length you'd like the midi to be") + + parser.add_argument("-num_prime_midi", type=int, default=256, help="Amount of messages to prime the generator with") + parser.add_argument("-num_prime_chord", type=int, default=30, help="Amount of messages to prime the generator with") + parser.add_argument("-model_weights", type=str, default=modelpath, help="Pickled model weights file saved with torch.save and model.state_dict()") + parser.add_argument("-modelReg_weights", type=str, default=modelpathReg, help="Pickled model weights file saved with torch.save and model.state_dict()") + + parser.add_argument("-beam", type=int, default=0, help="Beam search k. 0 for random probability sample and 1 for greedy") + + parser.add_argument("-max_sequence_midi", type=int, default=2048, help="Maximum midi sequence to consider") + parser.add_argument("-max_sequence_video", type=int, default=300, help="Maximum video sequence to consider") + parser.add_argument("-max_sequence_chord", type=int, default=300, help="Maximum chord sequence to consider") + + parser.add_argument("-n_layers", type=int, default=6, help="Number of decoder layers to use") + parser.add_argument("-num_heads", type=int, default=8, help="Number of heads to use for multi-head attention") + parser.add_argument("-d_model", type=int, default=512, help="Dimension of the model (output dim of embedding layers, etc.)") + parser.add_argument("-dim_feedforward", type=int, default=1024, help="Dimension of the feedforward layer") + + parser.add_argument("-is_video", type=bool, default=IS_VIDEO, help="MusicTransformer or VideoMusicTransformer") + + if IS_VIDEO: + parser.add_argument("-vis_models", type=str, default=VIS_MODELS_SORTED, help="...") + else: + parser.add_argument("-vis_models", type=str, default="", help="...") + + parser.add_argument("-emo_model", type=str, default="6c_l14p", help="...") + parser.add_argument("-rpr", type=bool, default=RPR, help="...") + parser.add_argument("-test_id", type=str, default=None, help="Dimension of the feedforward layer") + + return parser.parse_args() + +def print_generate_args(args): + + print(SEPERATOR) + print("input_dir_music:", args.input_dir_music) + print("input_dir_video:", args.input_dir_video) + + print("output_dir:", args.output_dir) + print("primer_file:", args.primer_file) + print("force_cpu:", args.force_cpu) + print("") + + print("target_seq_length_midi:", args.target_seq_length_midi) + print("target_seq_length_chord:", args.target_seq_length_chord) + + print("num_prime_midi:", args.num_prime_midi) + print("num_prime_chord:", args.num_prime_chord) + + print("model_weights:", args.model_weights) + print("beam:", args.beam) + print("") + print("rpr:", args.rpr) + + print("max_sequence_midi:", args.max_sequence_midi) + print("max_sequence_video:", args.max_sequence_video) + print("max_sequence_chord:", args.max_sequence_chord) + + + print("n_layers:", args.n_layers) + print("num_heads:", args.num_heads) + print("d_model:", args.d_model) + print("") + print("dim_feedforward:", args.dim_feedforward) + print("") + print("test_id:", args.test_id) + + print(SEPERATOR) + print("") + +# write_model_params +def write_model_params(args, output_file): + o_stream = open(output_file, "w") + + o_stream.write("rpr: " + str(args.rpr) + "\n") + o_stream.write("lr: " + str(args.lr) + "\n") + o_stream.write("ce_smoothing: " + str(args.ce_smoothing) + "\n") + o_stream.write("batch_size: " + str(args.batch_size) + "\n") + + o_stream.write("max_sequence_midi: " + str(args.max_sequence_midi) + "\n") + o_stream.write("max_sequence_video: " + str(args.max_sequence_video) + "\n") + o_stream.write("max_sequence_chord: " + str(args.max_sequence_chord) + "\n") + + o_stream.write("n_layers: " + str(args.n_layers) + "\n") + o_stream.write("num_heads: " + str(args.num_heads) + "\n") + o_stream.write("d_model: " + str(args.d_model) + "\n") + o_stream.write("dim_feedforward: " + str(args.dim_feedforward) + "\n") + o_stream.write("dropout: " + str(args.dropout) + "\n") + + o_stream.write("is_video: " + str(args.is_video) + "\n") + o_stream.write("vis_models: " + str(args.vis_models) + "\n") + o_stream.write("input_dir_music: " + str(args.input_dir_music) + "\n") + o_stream.write("input_dir_video: " + str(args.input_dir_video) + "\n") + + o_stream.close() diff --git a/utilities/chord_to_midi.py b/utilities/chord_to_midi.py new file mode 100644 index 0000000000000000000000000000000000000000..393a43e039905a4d39f71d10371b796280713c9d --- /dev/null +++ b/utilities/chord_to_midi.py @@ -0,0 +1,316 @@ +# ezchord - convert complex chord names to midi notes + +import sys +import math +import argparse +from enum import Enum, auto +from midiutil import MIDIFile + +class Mode(Enum): + DIM = auto() + MIN = auto() + MAJ = auto() + DOM = auto() + AUG = auto() + SUS2 = auto() + SUS = auto() + FIVE = auto() + +TEXT_TO_MODE = { + "maj": Mode.MAJ, + "dim": Mode.DIM, + "o": Mode.DIM, + "min": Mode.MIN, + "m": Mode.MIN, + "-": Mode.MIN, + "aug": Mode.AUG, + "+": Mode.AUG, + "sus2": Mode.SUS2, + "sus": Mode.SUS, + "5": Mode.FIVE, + "five": Mode.FIVE +} + +MODE_TO_SHIFT = { + Mode.MAJ: {3:0, 5:0}, + Mode.DOM: {3:0, 5:0}, + Mode.DIM: {3:-1, 5:-1}, + Mode.MIN: {3:-1, 5:0}, + Mode.AUG: {3:0, 5:1}, + Mode.SUS2: {3:-2, 5:0}, + Mode.SUS: {3:1, 5:0}, + Mode.FIVE: {3:3, 5:0}, +} + +NOTE_TO_PITCH = { + "a": 9, + "b": 11, + "c": 12, + "d": 14, + "e": 16, + "f": 17, + "g": 19 +} + +PITCH_TO_NOTE = {} + +for note, pitch in NOTE_TO_PITCH.items(): + PITCH_TO_NOTE[pitch] = note + +RM_TO_PITCH = { + "vii": 11, + "iii": 4, + "vi": 9, + "iv": 5, + "ii": 2, + "i": 0, + "v": 7 +} + +ACC_TO_SHIFT = { + "b": -1, + "#": 1 +} + +SCALE_DEGREE_SHIFT = { + 1: 0, + 2: 2, + 3: 4, + 4: 5, + 5: 7, + 6: 9, + 7: 11 +} + +def getNumber(string): + numStr = "" + + for char in string: + if char.isdigit(): + numStr += char + + if len(numStr) > 0: + return int(numStr) + + return + +def textToPitch(text, key = "c", voice = True): + text = text.lower() + isLetter = text[0] in NOTE_TO_PITCH.keys() + + if isLetter: + pitch = NOTE_TO_PITCH[text[0]] + else: + for rm in RM_TO_PITCH.keys(): + if rm in text: + pitch = RM_TO_PITCH[rm] + textToPitch(key) + isRomanNumeral = True + break + + for i in range(1 if isLetter else 0, len(text)): + if text[i] in ACC_TO_SHIFT.keys(): + pitch += ACC_TO_SHIFT[text[i]] + + return pitch + +def pitchToText(pitch): + octave = math.floor(pitch / 12) + pitch = pitch % 12 + pitch = pitch + (12 if pitch < 9 else 0) + accidental = "" + + if not (pitch in PITCH_TO_NOTE.keys()): + pitch = (pitch + 1) % 12 + pitch = pitch + (12 if pitch < 9 else 0) + accidental = "b" + + return PITCH_TO_NOTE[pitch].upper() + accidental + str(octave) + +def degreeToShift(deg): + return SCALE_DEGREE_SHIFT[(deg - 1) % 7 + 1] + math.floor(deg / 8) * 12 + +def voice(chords): + center = 0 + voiced_chords = [] + chord_ct = 0 + pChord = None + + for i, currChord in enumerate(chords): + + if len(currChord) == 0: + voiced_chords.append( [] ) + continue + else: + if chord_ct == 0: + voiced_chords.append( currChord ) + chord_ct += 1 + center = currChord[1] + 3 + pChord = currChord + continue + + prevChord = pChord + voiced_chord = [] + + for i_, currNote in enumerate(currChord): + # Skip bass note + if i_ == 0: + prevNote = prevChord[0] + if abs(currNote - prevNote) > 7: + if currNote < prevNote and abs(currNote + 12 - prevNote) < abs(currNote - prevNote): + bestVoicing = currNote + 12 + elif currNote > prevNote and abs(currNote - 12 - prevNote) < abs(currNote - prevNote): + bestVoicing = currNote - 12 + else: + bestVoicing = currNote + + voiced_chord.append(bestVoicing) + continue + + bestNeighbor = None + allowance = -1 + + while bestNeighbor == None: + allowance += 1 + for i__, prevNote in enumerate(prevChord): + if i__ == 0: + continue + + if ( + abs(currNote - prevNote) % 12 == allowance + or abs(currNote - prevNote) % 12 == 12 - allowance + ): + bestNeighbor = prevNote + break + + if currNote <= bestNeighbor: + bestVoicing = currNote + math.floor((bestNeighbor - currNote + 6) / 12) * 12 + else: + bestVoicing = currNote + math.ceil((bestNeighbor - currNote - 6) / 12) * 12 + + bestVoicing = bestVoicing if (abs(bestVoicing - center) <= 8 or allowance > 2) else currNote + voiced_chord.append(bestVoicing) + + + voiced_chord.sort() + voiced_chords.append(voiced_chord) + pChord = voiced_chord + + return voiced_chords + +class Chord: + def __init__(self, string): + self.string = string + self.degrees = {} + + string += " " + self.split = [] + sect = "" + + notes = list(NOTE_TO_PITCH.keys()) + rms = list(RM_TO_PITCH.keys()) + accs = list(ACC_TO_SHIFT.keys()) + modes = list(TEXT_TO_MODE.keys()) + + rootAdded = False + modeAdded = False + + isRomanNumeral = False + isSlashChord = False + isMaj7 = False + + for i in range(0, len(string) - 1): + sect += string[i] + currChar = string[i].lower() + nextChar = string[i+1].lower() + + rootFound = not rootAdded and (currChar in notes+rms+accs and not nextChar in rms+accs) + modeFound = False + numFound = (currChar.isdigit() and not nextChar.isdigit()) + + if ( + (i == len(string) - 2) + or rootFound + or numFound + or nextChar == "/" + or currChar == ")" + ): + if rootFound: + self.root = sect + rootAdded = True + + isRomanNumeral = self.root in rms + elif sect[0] == "/": + # case for 6/9 chords + if sect[1] == "9": + self.degrees[9] = 0 + else: + isSlashChord = True + self.bassnote = sect[1:len(sect)] + else: + if not modeAdded: + for mode in modes: + modeFound = mode in sect[0:len(mode)] + if modeFound: + self.mode = TEXT_TO_MODE[mode] + modeAdded = True + break + + if not modeAdded: + if not isRomanNumeral and str(getNumber(sect)) == sect: + self.mode = Mode.DOM + modeFound = True + modeAdded = True + + deg = getNumber(sect) + if deg != None: + shift = 0 + + for char in sect: + if char == "#": + shift += 1 + elif char == "b": + shift -= 1 + + if (not modeFound) or deg % 2 == 0: + self.degrees[deg] = shift + elif deg >= 7: + for i in range(7, deg+1): + if i % 2 != 0: + self.degrees[i] = shift + + self.split.append(sect) + sect = "" + + if not modeAdded: + # Case for minor roman numeral chords + if self.root in rms and self.root == self.root.lower(): + self.mode = Mode.MIN + else: + self.mode = Mode.DOM + + if not isSlashChord: + self.bassnote = self.root + + for sect in self.split: + isMaj7 = ("maj" in sect) or isMaj7 + + if (7 in self.degrees.keys()) and not isMaj7: + self.degrees[7] = -1 + + def getMIDI(self, key="c", octave=4): + notes = {} + + notes[0] = textToPitch(self.bassnote, key) - 12 + + root = textToPitch(self.root, key) + notes[1] = root + notes[3] = root + degreeToShift(3) + MODE_TO_SHIFT[self.mode][3] + notes[5] = root + degreeToShift(5) + MODE_TO_SHIFT[self.mode][5] + + for deg in self.degrees.keys(): + notes[deg] = root + degreeToShift(deg) + self.degrees[deg] + + for deg in notes.keys(): + notes[deg] += 12 * octave + + return list(notes.values()) diff --git a/utilities/constants.py b/utilities/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..5c4a44d1f6aef4d591494a7bcbb014579f8014d7 --- /dev/null +++ b/utilities/constants.py @@ -0,0 +1,97 @@ +import torch +from third_party.midi_processor.processor import RANGE_NOTE_ON, RANGE_NOTE_OFF, RANGE_VEL, RANGE_TIME_SHIFT + +#Proposed (AMT l0.4) +# VERSION = "v27_video_rpr_nosep_l0.4" +VERSION = "AMT" + +#Best Baseline (MT) +# VERSION = "v27_novideo_rpr_nosep" + +IS_SEPERATED = False # True : seperated chord quality and root output +RPR = True +IS_VIDEO = True + +GEN_MODEL = "Video Music Transformer" +# LSTM +# Transformer +# Music Transformer +# Video Music Transformer + +LOSS_LAMBDA = 0.4 # lamda * chord + ( 1-lamda ) * emotion + +EMOTION_THRESHOLD = 0.80 + +VIS_MODELS = "2d/clip_l14p" +SPLIT_VER = "v1" + +MUSIC_TYPE = "lab_v2_norm" +# - midi_prep +# - lab +# - lab_v2 +# - lab_v2_norm +# ----------------------------------------- # + +VIS_ABBR_DIC = { + "2d/clip_l14p" : "clip_l14p", # NEW +} + +vis_arr = VIS_MODELS.split(" ") +vis_arr.sort() +vis_abbr_path = "" +for v in vis_arr: + vis_abbr_path = vis_abbr_path + "_" + VIS_ABBR_DIC[v] +vis_abbr_path = vis_abbr_path[1:] + +VIS_MODELS_PATH = vis_abbr_path +VIS_MODELS_SORTED = " ".join(vis_arr) + +# CHORD +CHORD_END = 157 +CHORD_PAD = CHORD_END + 1 +CHORD_SIZE = CHORD_PAD + 1 + +# CHORD_ROOT +CHORD_ROOT_END = 13 +CHORD_ROOT_PAD = CHORD_ROOT_END + 1 +CHORD_ROOT_SIZE = CHORD_ROOT_PAD + 1 + +# CHORD_ATTR +CHORD_ATTR_END = 14 +CHORD_ATTR_PAD = CHORD_ATTR_END + 1 +CHORD_ATTR_SIZE = CHORD_ATTR_PAD + 1 + +# SEMANTIC +SEMANTIC_PAD = 0.0 + +# SCENE_OFFSET +SCENE_OFFSET_PAD = 0.0 + +# MOTION +MOTION_PAD = 0.0 + +# EMOTION +EMOTION_PAD = 0.0 + +# NOTE_DENSITY +NOTE_DENSITY_PAD = 0.0 + +# LOUDNESS +LOUDNESS_PAD = 0.0 + +# OTHER +SEPERATOR = "=========================" +ADAM_BETA_1 = 0.9 +ADAM_BETA_2 = 0.98 +ADAM_EPSILON = 10e-9 +LR_DEFAULT_START = 1.0 +SCHEDULER_WARMUP_STEPS = 4000 +TORCH_FLOAT = torch.float32 +TORCH_INT = torch.int32 +TORCH_LABEL_TYPE = torch.long +PREPEND_ZEROS_WIDTH = 4 + +# MIDI +TOKEN_END = RANGE_NOTE_ON + RANGE_NOTE_OFF + RANGE_VEL + RANGE_TIME_SHIFT +TOKEN_PAD = TOKEN_END + 1 +VOCAB_SIZE = TOKEN_PAD + 1 diff --git a/utilities/device.py b/utilities/device.py new file mode 100755 index 0000000000000000000000000000000000000000..61f0cf29ef9c1698842ef9ebcda48581fa165c34 --- /dev/null +++ b/utilities/device.py @@ -0,0 +1,67 @@ +# For all things related to devices +#### ONLY USE PROVIDED FUNCTIONS, DO NOT USE GLOBAL CONSTANTS #### + +import torch + +TORCH_CPU_DEVICE = torch.device("cpu") + +if(torch.cuda.device_count() > 0): + TORCH_CUDA_DEVICE = torch.device("cuda:0") +else: + print("----- WARNING: CUDA devices not detected. This will cause the model to run very slow! -----") + print("") + TORCH_CUDA_DEVICE = None + +USE_CUDA = False + +# use_cuda +def use_cuda(cuda_bool): + """ + ---------- + Author: Damon Gwinn + ---------- + Sets whether to use CUDA (if available), or use the CPU (not recommended) + ---------- + """ + + global USE_CUDA + USE_CUDA = cuda_bool + +# get_device +def get_device(): + """ + ---------- + Author: Damon Gwinn + ---------- + Grabs the default device. Default device is CUDA if available and use_cuda is not False, CPU otherwise. + ---------- + """ + + if((not USE_CUDA) or (TORCH_CUDA_DEVICE is None)): + return TORCH_CPU_DEVICE + else: + return TORCH_CUDA_DEVICE + +# cuda_device +def cuda_device(): + """ + ---------- + Author: Damon Gwinn + ---------- + Grabs the cuda device (may be None if CUDA is not available) + ---------- + """ + + return TORCH_CUDA_DEVICE + +# cpu_device +def cpu_device(): + """ + ---------- + Author: Damon Gwinn + ---------- + Grabs the cpu device + ---------- + """ + + return TORCH_CPU_DEVICE diff --git a/utilities/lr_scheduling.py b/utilities/lr_scheduling.py new file mode 100644 index 0000000000000000000000000000000000000000..6620a03c8d06c7b4dd3b2467db5dfa2b1ac5b9a5 --- /dev/null +++ b/utilities/lr_scheduling.py @@ -0,0 +1,58 @@ +import math + +# LrStepTracker +class LrStepTracker: + """ + ---------- + Author: Ryan Marshall + Modified: Damon Gwinn + ---------- + Class for custom learn rate scheduler (to be used by torch.optim.lr_scheduler.LambdaLR). + + Learn rate for each step (batch) given the warmup steps is: + lr = [ 1/sqrt(d_model) ] * min[ 1/sqrt(step) , step * (warmup_steps)^-1.5 ] + + This is from Attention is All you Need (https://arxiv.org/abs/1706.03762) + ---------- + """ + + def __init__(self, model_dim=512, warmup_steps=4000, init_steps=0): + # Store Values + self.warmup_steps = warmup_steps + self.model_dim = model_dim + self.init_steps = init_steps + + # Begin Calculations + self.invsqrt_dim = (1 / math.sqrt(model_dim)) + self.invsqrt_warmup = (1 / (warmup_steps * math.sqrt(warmup_steps))) + + # step + def step(self, step): + """ + ---------- + Author: Ryan Marshall + Modified: Damon Gwinn + ---------- + Method to pass to LambdaLR. Increments the step and computes the new learn rate. + ---------- + """ + + step += self.init_steps + if(step <= self.warmup_steps): + return self.invsqrt_dim * self.invsqrt_warmup * step + else: + invsqrt_step = (1 / math.sqrt(step)) + return self.invsqrt_dim * invsqrt_step + +# get_lr +def get_lr(optimizer): + """ + ---------- + Author: Damon Gwinn + ---------- + Hack to get the current learn rate of the model + ---------- + """ + + for param_group in optimizer.param_groups: + return param_group['lr'] diff --git a/utilities/preprocessing.py b/utilities/preprocessing.py new file mode 100755 index 0000000000000000000000000000000000000000..e0c59e653f39aad11928d223e0087b4e33e78423 --- /dev/null +++ b/utilities/preprocessing.py @@ -0,0 +1,39 @@ +import torch as th + +class Normalize(object): + + def __init__(self, mean, std): + self.mean = th.FloatTensor(mean).view(1, 3, 1, 1) + self.std = th.FloatTensor(std).view(1, 3, 1, 1) + + def __call__(self, tensor): + tensor = (tensor - self.mean) / (self.std + 1e-8) + return tensor + +class Preprocessing(object): + + def __init__(self, type): + self.type = type + if type == '2d': + self.norm = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + elif type == '3d': + self.norm = Normalize(mean=[110.6, 103.2, 96.3], std=[1.0, 1.0, 1.0]) + + def _zero_pad(self, tensor, size): + n = size - len(tensor) % size + if n == size: + return tensor + else: + z = th.zeros(n, tensor.shape[1], tensor.shape[2], tensor.shape[3]) + return th.cat((tensor, z), 0) + + def __call__(self, tensor): + if self.type == '2d': + tensor = tensor / 255.0 + tensor = self.norm(tensor) + elif self.type == '3d': + tensor = self._zero_pad(tensor, 16) + tensor = self.norm(tensor) + tensor = tensor.view(-1, 16, 3, 112, 112) + tensor = tensor.transpose(1, 2) + return tensor diff --git a/utilities/run_model_regression.py b/utilities/run_model_regression.py new file mode 100644 index 0000000000000000000000000000000000000000..e6eb1deaf004390dc32a2d80167df032166a9f5d --- /dev/null +++ b/utilities/run_model_regression.py @@ -0,0 +1,120 @@ +import torch +import time + +from .constants import * +from utilities.device import get_device +from .lr_scheduling import get_lr +import torch.nn.functional as F + +def train_epoch(cur_epoch, model, dataloader, loss, opt, lr_scheduler=None, print_modulus=1): + out = -1 + model.train() + for batch_num, batch in enumerate(dataloader): + time_before = time.time() + opt.zero_grad() + + feature_semantic_list = [] + for feature_semantic in batch["semanticList"]: + feature_semantic_list.append( feature_semantic.to(get_device()) ) + + feature_scene_offset = batch["scene_offset"].to(get_device()) + feature_motion = batch["motion"].to(get_device()) + feature_emotion = batch["emotion"].to(get_device()) + + feature_note_density = batch["note_density"].to(get_device()) + feature_loudness = batch["loudness"].to(get_device()) + + y = model( + feature_semantic_list, + feature_scene_offset, + feature_motion, + feature_emotion) + + y = y.reshape(y.shape[0] * y.shape[1], -1) + + feature_loudness = feature_loudness.flatten().reshape(-1,1) # (300, 1) + feature_note_density = feature_note_density.flatten().reshape(-1,1) # (300, 1) + feature_combined = torch.cat((feature_note_density, feature_loudness), dim=1) # (300, 2) + + out = loss.forward(y, feature_combined) + out.backward() + opt.step() + + if(lr_scheduler is not None): + lr_scheduler.step() + time_after = time.time() + time_took = time_after - time_before + + if((batch_num+1) % print_modulus == 0): + print(SEPERATOR) + print("Epoch", cur_epoch, " Batch", batch_num+1, "/", len(dataloader)) + print("LR:", get_lr(opt)) + print("Train loss:", float(out)) + print("") + print("Time (s):", time_took) + print(SEPERATOR) + print("") + return + +def eval_model(model, dataloader, loss): + model.eval() + + avg_rmse = -1 + avg_loss = -1 + avg_rmse_note_density = -1 + avg_rmse_loudness = -1 + with torch.set_grad_enabled(False): + n_test = len(dataloader) + + sum_loss = 0.0 + + sum_rmse = 0.0 + sum_rmse_note_density = 0.0 + sum_rmse_loudness = 0.0 + + for batch in dataloader: + feature_semantic_list = [] + for feature_semantic in batch["semanticList"]: + feature_semantic_list.append( feature_semantic.to(get_device()) ) + + feature_scene_offset = batch["scene_offset"].to(get_device()) + feature_motion = batch["motion"].to(get_device()) + feature_emotion = batch["emotion"].to(get_device()) + feature_loudness = batch["loudness"].to(get_device()) + feature_note_density = batch["note_density"].to(get_device()) + + y = model( + feature_semantic_list, + feature_scene_offset, + feature_motion, + feature_emotion) + + y = y.reshape(y.shape[0] * y.shape[1], -1) + + feature_loudness = feature_loudness.flatten().reshape(-1,1) # (300, 1) + feature_note_density = feature_note_density.flatten().reshape(-1,1) # (300, 1) + feature_combined = torch.cat((feature_note_density, feature_loudness), dim=1) # (300, 2) + + mse = F.mse_loss(y, feature_combined) + rmse = torch.sqrt(mse) + sum_rmse += float(rmse) + + y_note_density, y_loudness = torch.split(y, split_size_or_sections=1, dim=1) + + mse_note_density = F.mse_loss(y_note_density, feature_note_density) + rmse_note_density = torch.sqrt(mse_note_density) + sum_rmse_note_density += float(rmse_note_density) + + mse_loudness = F.mse_loss(y_loudness, feature_loudness) + rmse_loudness = torch.sqrt(mse_loudness) + sum_rmse_loudness += float(rmse_loudness) + + out = loss.forward(y, feature_combined) + sum_loss += float(out) + + avg_loss = sum_loss / n_test + avg_rmse = sum_rmse / n_test + avg_rmse_note_density = sum_rmse_note_density / n_test + avg_rmse_loudness = sum_rmse_loudness / n_test + + return avg_loss, avg_rmse, avg_rmse_note_density, avg_rmse_loudness diff --git a/utilities/run_model_vevo.py b/utilities/run_model_vevo.py new file mode 100644 index 0000000000000000000000000000000000000000..7b8349d4e25355e75b8543699754fe86752f050f --- /dev/null +++ b/utilities/run_model_vevo.py @@ -0,0 +1,525 @@ +import torch +import time + +from .constants import * +from utilities.device import get_device +from .lr_scheduling import get_lr +import numpy as np +import matplotlib.pyplot as plt +from sklearn.metrics import confusion_matrix +import json + +from dataset.vevo_dataset import compute_vevo_accuracy, compute_vevo_correspondence, compute_hits_k, compute_hits_k_root_attr, compute_vevo_accuracy_root_attr, compute_vevo_correspondence_root_attr + +def train_epoch(cur_epoch, model, dataloader, + train_loss_func, train_loss_emotion_func, + opt, lr_scheduler=None, print_modulus=1, isVideo=True): + + loss_chord = -1 + loss_emotion = -1 + model.train() + for batch_num, batch in enumerate(dataloader): + time_before = time.time() + opt.zero_grad() + + x = batch["x"].to(get_device()) + tgt = batch["tgt"].to(get_device()) + x_root = batch["x_root"].to(get_device()) + tgt_root = batch["tgt_root"].to(get_device()) + x_attr = batch["x_attr"].to(get_device()) + tgt_attr = batch["tgt_attr"].to(get_device()) + tgt_emotion = batch["tgt_emotion"].to(get_device()) + tgt_emotion_prob = batch["tgt_emotion_prob"].to(get_device()) + + feature_semantic_list = [] + for feature_semantic in batch["semanticList"]: + feature_semantic_list.append( feature_semantic.to(get_device()) ) + + feature_key = batch["key"].to(get_device()) + feature_scene_offset = batch["scene_offset"].to(get_device()) + feature_motion = batch["motion"].to(get_device()) + feature_emotion = batch["emotion"].to(get_device()) + + if isVideo: + # use VideoMusicTransformer + if IS_SEPERATED: + y_root, y_attr = model(x, + x_root, + x_attr, + feature_semantic_list, + feature_key, + feature_scene_offset, + feature_motion, + feature_emotion) + + y_root = y_root.reshape(y_root.shape[0] * y_root.shape[1], -1) + y_attr = y_attr.reshape(y_attr.shape[0] * y_attr.shape[1], -1) + + tgt_root = tgt_root.flatten() + tgt_attr = tgt_attr.flatten() + + tgt_emotion = tgt_emotion.squeeze() + + loss_chord_root = train_loss_func.forward(y_root, tgt_root) + loss_chord_attr = train_loss_func.forward(y_attr, tgt_attr) + loss_chord = loss_chord_root + loss_chord_attr + + first_14 = tgt_emotion[:, :14] + last_2 = tgt_emotion[:, -2:] + tgt_emotion_attr = torch.cat((first_14, last_2), dim=1) + + loss_emotion = train_loss_emotion_func.forward(y_attr, tgt_emotion_attr) + + total_loss = LOSS_LAMBDA * loss_chord + (1-LOSS_LAMBDA) * loss_emotion + total_loss.backward() + opt.step() + if(lr_scheduler is not None): + lr_scheduler.step() + + else: + #videomusic tran nosep + y = model(x, + x_root, + x_attr, + feature_semantic_list, + feature_key, + feature_scene_offset, + feature_motion, + feature_emotion) + + y = y.reshape(y.shape[0] * y.shape[1], -1) + tgt = tgt.flatten() + tgt_emotion = tgt_emotion.squeeze() + loss_chord = train_loss_func.forward(y, tgt) + loss_emotion = train_loss_emotion_func.forward(y, tgt_emotion) + total_loss = LOSS_LAMBDA * loss_chord + (1-LOSS_LAMBDA) * loss_emotion + total_loss.backward() + opt.step() + if(lr_scheduler is not None): + lr_scheduler.step() + + else: + # music transformer + if IS_SEPERATED: + y_root, y_attr = model(x, + x_root, + x_attr, + feature_key) + + y_root = y_root.reshape(y_root.shape[0] * y_root.shape[1], -1) + y_attr = y_attr.reshape(y_attr.shape[0] * y_attr.shape[1], -1) + + tgt_root = tgt_root.flatten() + tgt_attr = tgt_attr.flatten() + + tgt_emotion = tgt_emotion.squeeze() + + loss_chord_root = train_loss_func.forward(y_root, tgt_root) + loss_chord_attr = train_loss_func.forward(y_attr, tgt_attr) + + loss_chord = loss_chord_root + loss_chord_attr + loss_emotion = -1 + + total_loss = loss_chord + total_loss.backward() + opt.step() + if(lr_scheduler is not None): + lr_scheduler.step() + else: + # use MusicTransformer (no sep) + y = model(x, + x_root, + x_attr, + feature_key) + + y = y.reshape(y.shape[0] * y.shape[1], -1) + tgt = tgt.flatten() + + loss_chord = train_loss_func.forward(y, tgt) + loss_emotion = -1 + + total_loss = loss_chord + total_loss.backward() + + opt.step() + + if(lr_scheduler is not None): + lr_scheduler.step() + + time_after = time.time() + time_took = time_after - time_before + + if((batch_num+1) % print_modulus == 0): + print(SEPERATOR) + print("Epoch", cur_epoch, " Batch", batch_num+1, "/", len(dataloader)) + print("LR:", get_lr(opt)) + print("Train loss (total):", float(total_loss)) + print("Train loss (chord):", float(loss_chord)) + print("Train loss (emotion):", float(loss_emotion)) + print("") + print("Time (s):", time_took) + print(SEPERATOR) + print("") + return + +def eval_model(model, dataloader, + eval_loss_func, eval_loss_emotion_func, + isVideo = True, isGenConfusionMatrix=False): + model.eval() + avg_acc = -1 + avg_cor = -1 + avg_acc_cor = -1 + + avg_h1 = -1 + avg_h3 = -1 + avg_h5 = -1 + + avg_loss_chord = -1 + avg_loss_emotion = -1 + avg_total_loss = -1 + + true_labels = [] + true_root_labels = [] + true_attr_labels = [] + + pred_labels = [] + pred_root_labels = [] + pred_attr_labels = [] + + with torch.set_grad_enabled(False): + n_test = len(dataloader) + n_test_cor = 0 + + sum_loss_chord = 0.0 + sum_loss_emotion = 0.0 + sum_total_loss = 0.0 + + sum_acc = 0.0 + sum_cor = 0.0 + + sum_h1 = 0.0 + sum_h3 = 0.0 + sum_h5 = 0.0 + + for batch in dataloader: + x = batch["x"].to(get_device()) + tgt = batch["tgt"].to(get_device()) + x_root = batch["x_root"].to(get_device()) + tgt_root = batch["tgt_root"].to(get_device()) + x_attr = batch["x_attr"].to(get_device()) + tgt_attr = batch["tgt_attr"].to(get_device()) + tgt_emotion = batch["tgt_emotion"].to(get_device()) + tgt_emotion_prob = batch["tgt_emotion_prob"].to(get_device()) + + feature_semantic_list = [] + for feature_semantic in batch["semanticList"]: + feature_semantic_list.append( feature_semantic.to(get_device()) ) + + feature_key = batch["key"].to(get_device()) + feature_scene_offset = batch["scene_offset"].to(get_device()) + feature_motion = batch["motion"].to(get_device()) + feature_emotion = batch["emotion"].to(get_device()) + + if isVideo: + if IS_SEPERATED: + y_root, y_attr = model(x, + x_root, + x_attr, + feature_semantic_list, + feature_key, + feature_scene_offset, + feature_motion, + feature_emotion) + + sum_acc += float(compute_vevo_accuracy_root_attr(y_root, y_attr, tgt)) + cor = float(compute_vevo_correspondence_root_attr(y_root, y_attr, tgt, tgt_emotion, tgt_emotion_prob, EMOTION_THRESHOLD)) + if cor >= 0 : + n_test_cor +=1 + sum_cor += cor + + sum_h1 += float(compute_hits_k_root_attr(y_root, y_attr, tgt,1)) + sum_h3 += float(compute_hits_k_root_attr(y_root, y_attr, tgt,3)) + sum_h5 += float(compute_hits_k_root_attr(y_root, y_attr, tgt,5)) + + y_root = y_root.reshape(y_root.shape[0] * y_root.shape[1], -1) + y_attr = y_attr.reshape(y_attr.shape[0] * y_attr.shape[1], -1) + + tgt_root = tgt_root.flatten() + tgt_attr = tgt_attr.flatten() + tgt_emotion = tgt_emotion.squeeze() + + loss_chord_root = eval_loss_func.forward(y_root, tgt_root) + loss_chord_attr = eval_loss_func.forward(y_attr, tgt_attr) + loss_chord = loss_chord_root + loss_chord_attr + + first_14 = tgt_emotion[:, :14] + last_2 = tgt_emotion[:, -2:] + tgt_emotion_attr = torch.cat((first_14, last_2), dim=1) + + loss_emotion = eval_loss_emotion_func.forward(y_attr, tgt_emotion_attr) + total_loss = LOSS_LAMBDA * loss_chord + (1-LOSS_LAMBDA) * loss_emotion + + sum_loss_chord += float(loss_chord) + sum_loss_emotion += float(loss_emotion) + sum_total_loss += float(total_loss) + else: + y= model(x, + x_root, + x_attr, + feature_semantic_list, + feature_key, + feature_scene_offset, + feature_motion, + feature_emotion) + + sum_acc += float(compute_vevo_accuracy(y, tgt )) + cor = float(compute_vevo_correspondence(y, tgt, tgt_emotion, tgt_emotion_prob, EMOTION_THRESHOLD)) + if cor >= 0 : + n_test_cor +=1 + sum_cor += cor + + sum_h1 += float(compute_hits_k(y, tgt,1)) + sum_h3 += float(compute_hits_k(y, tgt,3)) + sum_h5 += float(compute_hits_k(y, tgt,5)) + + y = y.reshape(y.shape[0] * y.shape[1], -1) + + tgt = tgt.flatten() + tgt_root = tgt_root.flatten() + tgt_attr = tgt_attr.flatten() + + tgt_emotion = tgt_emotion.squeeze() + + loss_chord = eval_loss_func.forward(y, tgt) + loss_emotion = eval_loss_emotion_func.forward(y, tgt_emotion) + total_loss = LOSS_LAMBDA * loss_chord + (1-LOSS_LAMBDA) * loss_emotion + + sum_loss_chord += float(loss_chord) + sum_loss_emotion += float(loss_emotion) + sum_total_loss += float(total_loss) + + if isGenConfusionMatrix: + pred = y.argmax(dim=1).detach().cpu().numpy() + pred_root = [] + pred_attr = [] + + for i in pred: + if i == 0: + pred_root.append(0) + pred_attr.append(0) + elif i == 157: + pred_root.append(CHORD_ROOT_END) + pred_attr.append(CHORD_ATTR_END) + elif i == 158: + pred_root.append(CHORD_ROOT_PAD) + pred_attr.append(CHORD_ATTR_PAD) + else: + rootindex = int( (i-1)/13 ) + 1 + attrindex = (i-1)%13 + 1 + pred_root.append(rootindex) + pred_attr.append(attrindex) + + pred_root = np.array(pred_root) + pred_attr = np.array(pred_attr) + + true = tgt.detach().cpu().numpy() + true_root = tgt_root.detach().cpu().numpy() + true_attr = tgt_attr.detach().cpu().numpy() + + pred_labels.extend(pred) + pred_root_labels.extend(pred_root) + pred_attr_labels.extend(pred_attr) + + true_labels.extend(true) + true_root_labels.extend(true_root) + true_attr_labels.extend(true_attr) + else: + if IS_SEPERATED: + y_root, y_attr = model(x, + x_root, + x_attr, + feature_key) + + sum_acc += float(compute_vevo_accuracy_root_attr(y_root, y_attr, tgt)) + cor = float(compute_vevo_correspondence_root_attr(y_root, y_attr, tgt, tgt_emotion, tgt_emotion_prob, EMOTION_THRESHOLD)) + if cor >= 0 : + n_test_cor +=1 + sum_cor += cor + + sum_h1 += float(compute_hits_k_root_attr(y_root, y_attr, tgt,1)) + sum_h3 += float(compute_hits_k_root_attr(y_root, y_attr, tgt,3)) + sum_h5 += float(compute_hits_k_root_attr(y_root, y_attr, tgt,5)) + + y_root = y_root.reshape(y_root.shape[0] * y_root.shape[1], -1) + y_attr = y_attr.reshape(y_attr.shape[0] * y_attr.shape[1], -1) + + tgt_root = tgt_root.flatten() + tgt_attr = tgt_attr.flatten() + tgt_emotion = tgt_emotion.squeeze() + + loss_chord_root = eval_loss_func.forward(y_root, tgt_root) + loss_chord_attr = eval_loss_func.forward(y_attr, tgt_attr) + loss_chord = loss_chord_root + loss_chord_attr + + first_14 = tgt_emotion[:, :14] + last_2 = tgt_emotion[:, -2:] + tgt_emotion_attr = torch.cat((first_14, last_2), dim=1) + loss_emotion = eval_loss_emotion_func.forward(y_attr, tgt_emotion_attr) + + total_loss = LOSS_LAMBDA * loss_chord + (1-LOSS_LAMBDA) * loss_emotion + + sum_loss_chord += float(loss_chord) + sum_loss_emotion += float(loss_emotion) + sum_total_loss += float(total_loss) + else: + # use MusicTransformer no sep + y = model(x, + x_root, + x_attr, + feature_key) + + sum_acc += float(compute_vevo_accuracy(y, tgt )) + cor = float(compute_vevo_correspondence(y, tgt, tgt_emotion, tgt_emotion_prob, EMOTION_THRESHOLD)) + + if cor >= 0 : + n_test_cor +=1 + sum_cor += cor + + sum_h1 += float(compute_hits_k(y, tgt,1)) + sum_h3 += float(compute_hits_k(y, tgt,3)) + sum_h5 += float(compute_hits_k(y, tgt,5)) + + tgt_emotion = tgt_emotion.squeeze() + + y = y.reshape(y.shape[0] * y.shape[1], -1) + tgt = tgt.flatten() + loss_chord = eval_loss_func.forward(y, tgt) + loss_emotion = eval_loss_emotion_func.forward(y, tgt_emotion) + total_loss = loss_chord + + sum_loss_chord += float(loss_chord) + sum_loss_emotion += float(loss_emotion) + sum_total_loss += float(total_loss) + + avg_loss_chord = sum_loss_chord / n_test + avg_loss_emotion = sum_loss_emotion / n_test + avg_total_loss = sum_total_loss / n_test + + avg_acc = sum_acc / n_test + avg_cor = sum_cor / n_test_cor + + avg_h1 = sum_h1 / n_test + avg_h3 = sum_h3 / n_test + avg_h5 = sum_h5 / n_test + + avg_acc_cor = (avg_acc + avg_cor)/ 2.0 + + if isGenConfusionMatrix: + chordInvDicPath = "./dataset/vevo_meta/chord_inv.json" + chordRootInvDicPath = "./dataset/vevo_meta/chord_root_inv.json" + chordAttrInvDicPath = "./dataset/vevo_meta/chord_attr_inv.json" + + with open(chordInvDicPath) as json_file: + chordInvDic = json.load(json_file) + with open(chordRootInvDicPath) as json_file: + chordRootInvDic = json.load(json_file) + with open(chordAttrInvDicPath) as json_file: + chordAttrInvDic = json.load(json_file) + + # Confusion matrix (CHORD) + topChordList = [] + with open("./dataset/vevo_meta/top_chord.txt", encoding = 'utf-8') as f: + for line in f: + line = line.strip() + line_arr = line.split(" ") + if len(line_arr) == 3 : + chordID = line_arr[1] + topChordList.append( int(chordID) ) + topChordList = np.array(topChordList) + topChordList = topChordList[:10] + mask = np.isin(true_labels, topChordList) + true_labels = np.array(true_labels)[mask] + pred_labels = np.array(pred_labels)[mask] + + conf_matrix = confusion_matrix(true_labels, pred_labels, labels=topChordList) + label_names = [ chordInvDic[str(label_id)] for label_id in topChordList ] + + plt.figure(figsize=(8, 6)) + plt.imshow(conf_matrix, cmap=plt.cm.Blues) + plt.title("Confusion Matrix") + plt.colorbar() + tick_marks = np.arange(len(topChordList)) + plt.xticks(tick_marks, label_names, rotation=45) + plt.yticks(tick_marks, label_names) + thresh = conf_matrix.max() / 2.0 + for i in range(conf_matrix.shape[0]): + for j in range(conf_matrix.shape[1]): + plt.text(j, i, format(conf_matrix[i, j], 'd'), + ha="center", va="center", + color="white" if conf_matrix[i, j] > thresh else "black") + plt.ylabel('True label') + plt.xlabel('Predicted label') + plt.tight_layout() + plt.savefig("confusion_matrix.png") + plt.show() + + # Confusion matrix (CHORD ROOT) + chordRootList = np.arange(1, 13) + conf_matrix = confusion_matrix(true_root_labels, pred_root_labels, labels= chordRootList ) + + label_names = [ chordRootInvDic[str(label_id)] for label_id in chordRootList ] + + plt.figure(figsize=(8, 6)) + plt.imshow(conf_matrix, cmap=plt.cm.Blues) + plt.title("Confusion Matrix (Chord root)") + plt.colorbar() + tick_marks = np.arange(len(chordRootList)) + plt.xticks(tick_marks, label_names, rotation=45) + plt.yticks(tick_marks, label_names) + thresh = conf_matrix.max() / 2.0 + for i in range(conf_matrix.shape[0]): + for j in range(conf_matrix.shape[1]): + plt.text(j, i, format(conf_matrix[i, j], 'd'), + ha="center", va="center", + color="white" if conf_matrix[i, j] > thresh else "black") + plt.ylabel('True label') + plt.xlabel('Predicted label') + plt.tight_layout() + plt.savefig("confusion_matrix_root.png") + plt.show() + + # Confusion matrix (CHORD ATTR) + chordAttrList = np.arange(1, 14) + conf_matrix = confusion_matrix(true_attr_labels, pred_attr_labels, labels= chordAttrList ) + + label_names = [ chordAttrInvDic[str(label_id)] for label_id in chordAttrList ] + + plt.figure(figsize=(8, 6)) + plt.imshow(conf_matrix, cmap=plt.cm.Blues) + plt.title("Confusion Matrix (Chord quality)") + plt.colorbar() + tick_marks = np.arange(len(chordAttrList)) + plt.xticks(tick_marks, label_names, rotation=45) + plt.yticks(tick_marks, label_names) + thresh = conf_matrix.max() / 2.0 + for i in range(conf_matrix.shape[0]): + for j in range(conf_matrix.shape[1]): + plt.text(j, i, format(conf_matrix[i, j], 'd'), + ha="center", va="center", + color="white" if conf_matrix[i, j] > thresh else "black") + plt.ylabel('True label') + plt.xlabel('Predicted label') + plt.tight_layout() + plt.savefig("confusion_matrix_quality.png") + plt.show() + + return { "avg_total_loss" : avg_total_loss, + "avg_loss_chord" : avg_loss_chord, + "avg_loss_emotion": avg_loss_emotion, + "avg_acc" : avg_acc, + "avg_cor" : avg_cor, + "avg_acc_cor" : avg_acc_cor, + "avg_h1" : avg_h1, + "avg_h3" : avg_h3, + "avg_h5" : avg_h5 } + diff --git a/utilities/video_loader.py b/utilities/video_loader.py new file mode 100755 index 0000000000000000000000000000000000000000..9261585228a28c563ba974fa49ce02da2a548572 --- /dev/null +++ b/utilities/video_loader.py @@ -0,0 +1,83 @@ +import torch as th +from torch.utils.data import Dataset +import pandas as pd +import os +import numpy as np +import ffmpeg + +class VideoLoader(Dataset): + def __init__( + self, + fileList = [], + framerate=1, + size=112, + centercrop=False, + ): + #self.csv = pd.read_csv(csv) + self.fileList = fileList + + self.centercrop = centercrop + self.size = size + self.framerate = framerate + + def __len__(self): + return len(self.fileList) + + def _get_video_dim(self, video_path): + probe = ffmpeg.probe(video_path) + video_stream = next((stream for stream in probe['streams'] + if stream['codec_type'] == 'video'), None) + width = int(video_stream['width']) + height = int(video_stream['height']) + return height, width + + def _get_output_dim(self, h, w): + if isinstance(self.size, tuple) and len(self.size) == 2: + return self.size + elif h >= w: + return int(h * self.size / w), self.size + else: + return self.size, int(w * self.size / h) + + def __getitem__(self, idx): + + video_path = self.fileList[idx] + output_file = video_path[:video_path.rfind(".")] + ".npy" + + #video_path = self.csv['video_path'].values[idx] + #output_file = self.csv['feature_path'].values[idx] + + if not(os.path.isfile(output_file)) and os.path.isfile(video_path): + print('Decoding video: {}'.format(video_path)) + + + try: + h, w = self._get_video_dim(video_path) + except: + print('ffprobe failed at: {}'.format(video_path)) + return {'video': th.zeros(1), 'input': video_path, + 'output': output_file} + height, width = self._get_output_dim(h, w) + cmd = ( + ffmpeg + .input(video_path) + .filter('fps', fps=self.framerate) + .filter('scale', width, height) + ) + if self.centercrop: + x = int((width - self.size) / 2.0) + y = int((height - self.size) / 2.0) + cmd = cmd.crop(x, y, self.size, self.size) + out, _ = ( + cmd.output('pipe:', format='rawvideo', pix_fmt='rgb24') + .run(capture_stdout=True, quiet=True) + ) + if self.centercrop and isinstance(self.size, int): + height, width = self.size, self.size + video = np.frombuffer(out, np.uint8).reshape([-1, height, width, 3]) + video = th.from_numpy(video.astype('float32')) + video = video.permute(0, 3, 1, 2) + else: + video = th.zeros(1) + + return {'video': video, 'input': video_path, 'output': output_file}