rrayy
commited on
Commit
·
28e8aa2
1
Parent(s):
399b810
Changes to be committed: loss 값 26까지 떨굼
Browse filesmodified: DIVA_Model_dict.pt
modified: DIVA_Model_full.pt
modified: Models/Vector2MIDI.py
modified: train.ipynb
modified: utility/lossf.py
- DIVA_Model_dict.pt +1 -1
- DIVA_Model_full.pt +1 -1
- Models/Vector2MIDI.py +59 -4
- train.ipynb +0 -0
- utility/lossf.py +8 -7
DIVA_Model_dict.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 51786305
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fcb8fcf23418eb641517ef8ff4a23adccca450f2923f24adc8798a5791750376
|
| 3 |
size 51786305
|
DIVA_Model_full.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 51788289
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f9a974be02de8e6751ce63d61b7b4b8d6fb49960d7772bb3674fd55262916e3
|
| 3 |
size 51788289
|
Models/Vector2MIDI.py
CHANGED
|
@@ -19,16 +19,24 @@ class Vector2MIDI(nn.Module):
|
|
| 19 |
self.fc_mid = nn.Linear(hidden_dim, 256)
|
| 20 |
self.fc_out = nn.Linear(256, n_vocab)
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
def forward(self, x, lengths, target_tokens):
|
| 23 |
"""
|
| 24 |
x: (B, input_dim) - 입력 벡터
|
| 25 |
lengths: [B] - 시퀀스 길이
|
| 26 |
target_tokens: (B, T, n_vocab) - one-hot 또는 임베딩된 토큰 입력
|
| 27 |
"""
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
h0 = self.init_hidden(x).unsqueeze(0).repeat(2, 1, 1) # (num_layers, B, H)
|
| 31 |
-
c0 = self.init_cell(x).unsqueeze(0).repeat(2, 1, 1)
|
| 32 |
|
| 33 |
packed_input = pack_padded_sequence(target_tokens, lengths.cpu(), batch_first=True, enforce_sorted=False)
|
| 34 |
packed_out, _ = self.lstm(packed_input, (h0, c0))
|
|
@@ -38,3 +46,50 @@ class Vector2MIDI(nn.Module):
|
|
| 38 |
out = self.fc_out(out) # (B, T, vocab_size)
|
| 39 |
|
| 40 |
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
self.fc_mid = nn.Linear(hidden_dim, 256)
|
| 20 |
self.fc_out = nn.Linear(256, n_vocab)
|
| 21 |
|
| 22 |
+
def init_hidden_states(self, x):
|
| 23 |
+
"""초기 hidden과 cell state 생성"""
|
| 24 |
+
h0 = torch.tanh(self.init_hidden(x)) # 활성화 함수 추가 (hyperbolic tangent)
|
| 25 |
+
c0 = torch.tanh(self.init_cell(x))
|
| 26 |
+
|
| 27 |
+
h0 = h0.unsqueeze(0).repeat(2, 1, 1) # (num_layers, B, H)
|
| 28 |
+
c0 = c0.unsqueeze(0).repeat(2, 1, 1)
|
| 29 |
+
|
| 30 |
+
return h0, c0
|
| 31 |
+
|
| 32 |
def forward(self, x, lengths, target_tokens):
|
| 33 |
"""
|
| 34 |
x: (B, input_dim) - 입력 벡터
|
| 35 |
lengths: [B] - 시퀀스 길이
|
| 36 |
target_tokens: (B, T, n_vocab) - one-hot 또는 임베딩된 토큰 입력
|
| 37 |
"""
|
| 38 |
+
|
| 39 |
+
h0, c0 = self.init_hidden_states(x) # 초기 상태 생성
|
|
|
|
|
|
|
| 40 |
|
| 41 |
packed_input = pack_padded_sequence(target_tokens, lengths.cpu(), batch_first=True, enforce_sorted=False)
|
| 42 |
packed_out, _ = self.lstm(packed_input, (h0, c0))
|
|
|
|
| 46 |
out = self.fc_out(out) # (B, T, vocab_size)
|
| 47 |
|
| 48 |
return out
|
| 49 |
+
|
| 50 |
+
def generate(self, x, max_length, start_token=None, temperature=1.0, top_k=None):
|
| 51 |
+
self.eval()
|
| 52 |
+
batch_size = x.size(0)
|
| 53 |
+
device = x.device
|
| 54 |
+
|
| 55 |
+
# 초기 상태
|
| 56 |
+
h, c = self.init_hidden_states(x)
|
| 57 |
+
|
| 58 |
+
# 시작 토큰 설정
|
| 59 |
+
if start_token is None:
|
| 60 |
+
start_token = 0
|
| 61 |
+
|
| 62 |
+
# 현재 입력 (원-핫 벡터)
|
| 63 |
+
current_input = torch.zeros(batch_size, 1, self.n_vocab, device=device)
|
| 64 |
+
current_input[:, 0, start_token] = 1.0
|
| 65 |
+
|
| 66 |
+
generated_tokens = []
|
| 67 |
+
|
| 68 |
+
with torch.no_grad():
|
| 69 |
+
for _ in range(max_length):
|
| 70 |
+
lstm_out, (h, c) = self.lstm(current_input, (h, c))
|
| 71 |
+
|
| 72 |
+
# forward와 동일하게 (relu 제거)
|
| 73 |
+
out = self.fc_mid(lstm_out)
|
| 74 |
+
logits = self.fc_out(out)[:, -1, :]
|
| 75 |
+
|
| 76 |
+
if temperature != 1.0:
|
| 77 |
+
logits = logits / temperature
|
| 78 |
+
|
| 79 |
+
if top_k is not None:
|
| 80 |
+
top_k_logits, top_k_indices = torch.topk(logits, top_k)
|
| 81 |
+
mask = torch.full_like(logits, float('-inf'))
|
| 82 |
+
logits = mask.scatter(1, top_k_indices, top_k_logits)
|
| 83 |
+
|
| 84 |
+
probs = F.softmax(logits, dim=-1)
|
| 85 |
+
next_token = torch.multinomial(probs, 1) # (B, 1)
|
| 86 |
+
|
| 87 |
+
generated_tokens.append(next_token)
|
| 88 |
+
|
| 89 |
+
# 다음 입력 준비 (원-핫)
|
| 90 |
+
current_input = torch.zeros(batch_size, 1, self.n_vocab, device=device)
|
| 91 |
+
current_input.scatter_(2, next_token.unsqueeze(-1), 1.0)
|
| 92 |
+
|
| 93 |
+
result = torch.cat(generated_tokens, dim=1).to(torch.int64) # (B, max_length)
|
| 94 |
+
|
| 95 |
+
return result.tolist()
|
train.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
utility/lossf.py
CHANGED
|
@@ -4,7 +4,7 @@ from pysdtw import distance
|
|
| 4 |
from pysdtw import SoftDTW
|
| 5 |
|
| 6 |
class HuberDTW_CrossEntropyLoss(Module):
|
| 7 |
-
def __init__(self, device:device
|
| 8 |
super(HuberDTW_CrossEntropyLoss, self).__init__()
|
| 9 |
|
| 10 |
use_cuda = device.type == "cuda"
|
|
@@ -12,7 +12,6 @@ class HuberDTW_CrossEntropyLoss(Module):
|
|
| 12 |
|
| 13 |
self.sdtw = SoftDTW(1.0, fun, use_cuda) # Soft Dynamic Time Warping (timestep 끼리 비교해 loss 계산 -> gradient 가 흐르도록 함) https://judy-son.tistory.com/3
|
| 14 |
self.huber = HuberLoss(reduction='none', delta=1.0).to(device) # HuberLoss (reduction='none'로 개별 timestep loss 계산)
|
| 15 |
-
self.cel = CrossEntropyLoss(ignore_index=ignore_index).to(device) # CrossEntropyLoss (분류용)
|
| 16 |
self.device = device
|
| 17 |
|
| 18 |
def forward(self, input: Tensor, target: Tensor, lengths_batch: Tensor):
|
|
@@ -22,14 +21,16 @@ class HuberDTW_CrossEntropyLoss(Module):
|
|
| 22 |
cut_input = input[:, :min_len, :]
|
| 23 |
cut_target = target[:, :min_len, :]
|
| 24 |
|
|
|
|
| 25 |
loss_HL = self.huber(cut_input, cut_target).mean(dim=2) # (B, T), 7차원 평균
|
| 26 |
mask = arange(max_len, device=self.device).unsqueeze(0) < lengths_batch.unsqueeze(1) # (B, T)
|
| 27 |
loss_HL = (loss_HL * mask[:, :min_len]).sum() / mask[:, :min_len].sum() # huber만 padding 제외 (sdtw랑 shape가 달라서)
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
loss_sm =
|
| 32 |
-
|
|
|
|
| 33 |
loss_sdtw = self.sdtw(input, target).mean() / min_len
|
| 34 |
|
| 35 |
-
return 0.
|
|
|
|
| 4 |
from pysdtw import SoftDTW
|
| 5 |
|
| 6 |
class HuberDTW_CrossEntropyLoss(Module):
|
| 7 |
+
def __init__(self, device:device): # type: ignore
|
| 8 |
super(HuberDTW_CrossEntropyLoss, self).__init__()
|
| 9 |
|
| 10 |
use_cuda = device.type == "cuda"
|
|
|
|
| 12 |
|
| 13 |
self.sdtw = SoftDTW(1.0, fun, use_cuda) # Soft Dynamic Time Warping (timestep 끼리 비교해 loss 계산 -> gradient 가 흐르도록 함) https://judy-son.tistory.com/3
|
| 14 |
self.huber = HuberLoss(reduction='none', delta=1.0).to(device) # HuberLoss (reduction='none'로 개별 timestep loss 계산)
|
|
|
|
| 15 |
self.device = device
|
| 16 |
|
| 17 |
def forward(self, input: Tensor, target: Tensor, lengths_batch: Tensor):
|
|
|
|
| 21 |
cut_input = input[:, :min_len, :]
|
| 22 |
cut_target = target[:, :min_len, :]
|
| 23 |
|
| 24 |
+
# Huber Loss (메인 손실함수)
|
| 25 |
loss_HL = self.huber(cut_input, cut_target).mean(dim=2) # (B, T), 7차원 평균
|
| 26 |
mask = arange(max_len, device=self.device).unsqueeze(0) < lengths_batch.unsqueeze(1) # (B, T)
|
| 27 |
loss_HL = (loss_HL * mask[:, :min_len]).sum() / mask[:, :min_len].sum() # huber만 padding 제외 (sdtw랑 shape가 달라서)
|
| 28 |
|
| 29 |
+
# One-hot CrossEntropy (분류용으로 살짝)
|
| 30 |
+
log_probs = F.log_softmax(cut_input, dim=-1)
|
| 31 |
+
loss_sm = -(cut_target * log_probs).sum(dim=-1).mean() # (B, T)
|
| 32 |
+
|
| 33 |
+
# SoftDTW (길이 정규화로 스케일 안정화)
|
| 34 |
loss_sdtw = self.sdtw(input, target).mean() / min_len
|
| 35 |
|
| 36 |
+
return loss_HL*0.9 + loss_sm*0.08 + loss_sdtw*0.02
|