| { | |
| "_name_": "PianoFlow", | |
| "_model_version_": "1.0.0", | |
| "_symupe_version_": "0.1.0", | |
| "dim": 512, | |
| "transformer": { | |
| "token_embeddings": { | |
| "_target_": "simple", | |
| "emb_dims": 64, | |
| "mode": "cat", | |
| "emb_norm": true, | |
| "discrete": false, | |
| "sinusoidal": true, | |
| "sinusoidal_learned": true, | |
| "project_emb_dim": null, | |
| "project_bias": false, | |
| "embedding_kwargs": { | |
| "depth": 1, | |
| "log_inv_freq": true, | |
| "with_positions": false | |
| }, | |
| "tie_keys": { | |
| "TimeDurationSustain": "TimeDuration" | |
| }, | |
| "special_tokens": { | |
| "PAD_None": 0, | |
| "MASK_None": 1, | |
| "BOS_None": 2, | |
| "EOS_None": 3, | |
| "IGNORE_None": 4, | |
| "EOD_None": 5, | |
| "Bar_Line": 6, | |
| "Pedal_On": 7, | |
| "Pedal_Off": 8 | |
| } | |
| }, | |
| "emb_norm": false, | |
| "emb_dropout": 0.0, | |
| "use_abs_pos_emb": false, | |
| "project_bias": false, | |
| "time_embedding": "adanorm", | |
| "time_embedding_dim": 32, | |
| "time_embedding_freq_dim": null, | |
| "transformer": { | |
| "_target_": "encoder", | |
| "depth": 8, | |
| "memory_tokens": 4, | |
| "final_norm_bias": false, | |
| "attention": { | |
| "_target_": "global", | |
| "heads": 8, | |
| "head_dim": 64, | |
| "one_kv_head": true, | |
| "dropout": 0.1, | |
| "rotary_pos_emb": true, | |
| "rotary_emb_base": 1024, | |
| "alibi_pos_bias": false, | |
| "alibi_learned": true, | |
| "alibi_heads": 8 | |
| }, | |
| "feed_forward": { | |
| "mult": 3, | |
| "glu": true, | |
| "swish": true, | |
| "dropout": 0.1 | |
| } | |
| }, | |
| "value_head": { | |
| "_target_": "value", | |
| "ranges": null | |
| }, | |
| "type_embedding": "sum", | |
| "context_embedding": "layer_sum", | |
| "context_embedding_dim": null, | |
| "context_layer_ids": [ | |
| 4 | |
| ], | |
| "score_token_embeddings": { | |
| "_target_": "simple", | |
| "emb_dims": 64, | |
| "mode": "cat", | |
| "emb_norm": true, | |
| "discrete": false, | |
| "sinusoidal": true, | |
| "sinusoidal_learned": true, | |
| "project_emb_dim": null, | |
| "project_bias": false, | |
| "embedding_kwargs": { | |
| "depth": 1, | |
| "log_inv_freq": true, | |
| "with_positions": false | |
| }, | |
| "tie_keys": { | |
| "TimeDurationSustain": "TimeDuration" | |
| } | |
| }, | |
| "score_tokens_dropout": 0.2 | |
| }, | |
| "value_mean": { | |
| "Pitch": 0.5, | |
| "Position": 0.37, | |
| "PositionShift": 0.05, | |
| "Duration": 0.14, | |
| "Velocity": 0.5, | |
| "TimeShift": 0.1, | |
| "TimeDuration": 0.22, | |
| "TimeDurationSustain": 0.5 | |
| }, | |
| "value_std": { | |
| "Pitch": 0.1, | |
| "Position": 0.28, | |
| "PositionShift": 0.06, | |
| "Duration": 0.13, | |
| "Velocity": 0.13, | |
| "TimeShift": 0.12, | |
| "TimeDuration": 0.3, | |
| "TimeDurationSustain": 0.62 | |
| }, | |
| "value_keys": [ | |
| "Velocity", | |
| "TimeShift", | |
| "TimeDuration", | |
| "TimeDurationSustain" | |
| ], | |
| "num_tokens": { | |
| "Pitch": 97, | |
| "Position": 202, | |
| "PositionShift": 142, | |
| "Duration": 142, | |
| "Velocity": 137, | |
| "TimeShift": 371, | |
| "TimeDuration": 319, | |
| "TimeDurationSustain": 319 | |
| }, | |
| "context_num_tokens": null, | |
| "score_num_tokens": { | |
| "Velocity": 137, | |
| "Tempo": 170 | |
| } | |
| } |