File size: 2,226 Bytes
4e46a55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import torch
from third_party.midi_processor.processor import RANGE_NOTE_ON, RANGE_NOTE_OFF, RANGE_VEL, RANGE_TIME_SHIFT

#Proposed (AMT l0.4)
# VERSION = "v27_video_rpr_nosep_l0.4"
VERSION = "AMT"

#Best Baseline (MT)
# VERSION = "v27_novideo_rpr_nosep"

IS_SEPERATED = False # True : seperated chord quality and root output
RPR = True
IS_VIDEO = True

GEN_MODEL = "Video Music Transformer"
# LSTM
# Transformer
# Music Transformer
# Video Music Transformer

LOSS_LAMBDA = 0.4 # lamda * chord  +  ( 1-lamda ) * emotion

EMOTION_THRESHOLD = 0.80

VIS_MODELS = "2d/clip_l14p"
SPLIT_VER = "v1"

MUSIC_TYPE = "lab_v2_norm"
# - midi_prep
# - lab
# - lab_v2
# - lab_v2_norm
# ----------------------------------------- #

VIS_ABBR_DIC = {
    "2d/clip_l14p" : "clip_l14p", # NEW
}

vis_arr = VIS_MODELS.split(" ")
vis_arr.sort()
vis_abbr_path = ""
for v in vis_arr:
    vis_abbr_path = vis_abbr_path + "_" + VIS_ABBR_DIC[v]
vis_abbr_path = vis_abbr_path[1:]

VIS_MODELS_PATH = vis_abbr_path
VIS_MODELS_SORTED = " ".join(vis_arr)

# CHORD
CHORD_END               = 157
CHORD_PAD               = CHORD_END + 1 
CHORD_SIZE              = CHORD_PAD + 1

# CHORD_ROOT
CHORD_ROOT_END               = 13
CHORD_ROOT_PAD               = CHORD_ROOT_END + 1
CHORD_ROOT_SIZE              = CHORD_ROOT_PAD + 1

# CHORD_ATTR
CHORD_ATTR_END               = 14
CHORD_ATTR_PAD               = CHORD_ATTR_END + 1
CHORD_ATTR_SIZE              = CHORD_ATTR_PAD + 1

# SEMANTIC
SEMANTIC_PAD               = 0.0 

# SCENE_OFFSET
SCENE_OFFSET_PAD        = 0.0 

# MOTION
MOTION_PAD        = 0.0 

# EMOTION
EMOTION_PAD        = 0.0 

# NOTE_DENSITY
NOTE_DENSITY_PAD        = 0.0 

# LOUDNESS
LOUDNESS_PAD        = 0.0 

# OTHER
SEPERATOR               = "========================="
ADAM_BETA_1             = 0.9
ADAM_BETA_2             = 0.98
ADAM_EPSILON            = 10e-9
LR_DEFAULT_START        = 1.0
SCHEDULER_WARMUP_STEPS  = 4000
TORCH_FLOAT             = torch.float32
TORCH_INT               = torch.int32
TORCH_LABEL_TYPE        = torch.long
PREPEND_ZEROS_WIDTH     = 4

# MIDI
TOKEN_END               = RANGE_NOTE_ON + RANGE_NOTE_OFF + RANGE_VEL + RANGE_TIME_SHIFT
TOKEN_PAD               = TOKEN_END + 1
VOCAB_SIZE              = TOKEN_PAD + 1