Changes to be committed: loss 값 26까지 떨굼

modified: DIVA_Model_dict.pt
modified: DIVA_Model_full.pt
modified: Models/Vector2MIDI.py
modified: train.ipynb
modified: utility/lossf.py

Files changed (5) hide show

DIVA_Model_dict.pt +1 -1
DIVA_Model_full.pt +1 -1
Models/Vector2MIDI.py +59 -4
train.ipynb +0 -0
utility/lossf.py +8 -7

DIVA_Model_dict.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e6a510890b3fc4bc4ad0a0c151d76752a1b2cbcea52aef39f5ebe0e187c97e6a
 size 51786305

 version https://git-lfs.github.com/spec/v1
+oid sha256:fcb8fcf23418eb641517ef8ff4a23adccca450f2923f24adc8798a5791750376
 size 51786305

DIVA_Model_full.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ea2e7979e6ba5ba6e0cb5591f8258c2a231c68a0f7e43656c81bbad045e03bd
 size 51788289

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f9a974be02de8e6751ce63d61b7b4b8d6fb49960d7772bb3674fd55262916e3
 size 51788289

Models/Vector2MIDI.py CHANGED Viewed

@@ -19,16 +19,24 @@ class Vector2MIDI(nn.Module):
         self.fc_mid = nn.Linear(hidden_dim, 256)
         self.fc_out = nn.Linear(256, n_vocab)
     def forward(self, x, lengths, target_tokens):
         """
         x: (B, input_dim) - 입력 벡터
         lengths: [B] - 시퀀스 길이
         target_tokens: (B, T, n_vocab) - one-hot 또는 임베딩된 토큰 입력
         """
-        B = x.size(0)
-        h0 = self.init_hidden(x).unsqueeze(0).repeat(2, 1, 1)  # (num_layers, B, H)
-        c0 = self.init_cell(x).unsqueeze(0).repeat(2, 1, 1)
         packed_input = pack_padded_sequence(target_tokens, lengths.cpu(), batch_first=True, enforce_sorted=False)
         packed_out, _ = self.lstm(packed_input, (h0, c0))
@@ -38,3 +46,50 @@ class Vector2MIDI(nn.Module):
         out = self.fc_out(out)  # (B, T, vocab_size)
         return out

         self.fc_mid = nn.Linear(hidden_dim, 256)
         self.fc_out = nn.Linear(256, n_vocab)
+    def init_hidden_states(self, x):
+        """초기 hidden과 cell state 생성"""
+        h0 = torch.tanh(self.init_hidden(x)) # 활성화 함수 추가 (hyperbolic tangent)
+        c0 = torch.tanh(self.init_cell(x))
+        h0 = h0.unsqueeze(0).repeat(2, 1, 1)  # (num_layers, B, H)
+        c0 = c0.unsqueeze(0).repeat(2, 1, 1)
+        return h0, c0
     def forward(self, x, lengths, target_tokens):
         """
         x: (B, input_dim) - 입력 벡터
         lengths: [B] - 시퀀스 길이
         target_tokens: (B, T, n_vocab) - one-hot 또는 임베딩된 토큰 입력
         """
+        h0, c0 = self.init_hidden_states(x) # 초기 상태 생성
         packed_input = pack_padded_sequence(target_tokens, lengths.cpu(), batch_first=True, enforce_sorted=False)
         packed_out, _ = self.lstm(packed_input, (h0, c0))
         out = self.fc_out(out)  # (B, T, vocab_size)
         return out
+    def generate(self, x, max_length, start_token=None, temperature=1.0, top_k=None):
+        self.eval()
+        batch_size = x.size(0)
+        device = x.device
+        # 초기 상태
+        h, c = self.init_hidden_states(x)
+        # 시작 토큰 설정
+        if start_token is None:
+            start_token = 0
+        # 현재 입력 (원-핫 벡터)
+        current_input = torch.zeros(batch_size, 1, self.n_vocab, device=device)
+        current_input[:, 0, start_token] = 1.0
+        generated_tokens = []
+        with torch.no_grad():
+            for _ in range(max_length):
+                lstm_out, (h, c) = self.lstm(current_input, (h, c))
+                # forward와 동일하게 (relu 제거)
+                out = self.fc_mid(lstm_out)
+                logits = self.fc_out(out)[:, -1, :]
+                if temperature != 1.0:
+                    logits = logits / temperature
+                if top_k is not None:
+                    top_k_logits, top_k_indices = torch.topk(logits, top_k)
+                    mask = torch.full_like(logits, float('-inf'))
+                    logits = mask.scatter(1, top_k_indices, top_k_logits)
+                probs = F.softmax(logits, dim=-1)
+                next_token = torch.multinomial(probs, 1)  # (B, 1)
+                generated_tokens.append(next_token)
+                # 다음 입력 준비 (원-핫)
+                current_input = torch.zeros(batch_size, 1, self.n_vocab, device=device)
+                current_input.scatter_(2, next_token.unsqueeze(-1), 1.0)
+        result = torch.cat(generated_tokens, dim=1).to(torch.int64)  # (B, max_length)
+        return result.tolist()

train.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

utility/lossf.py CHANGED Viewed

@@ -4,7 +4,7 @@ from pysdtw import distance
 from pysdtw import SoftDTW
 class HuberDTW_CrossEntropyLoss(Module):
-    def __init__(self, device:device, ignore_index:int = None): # type: ignore
         super(HuberDTW_CrossEntropyLoss, self).__init__()
         use_cuda = device.type == "cuda"
@@ -12,7 +12,6 @@ class HuberDTW_CrossEntropyLoss(Module):
         self.sdtw = SoftDTW(1.0, fun, use_cuda) # Soft Dynamic Time Warping (timestep 끼리 비교해 loss 계산 ->  gradient 가 흐르도록 함) https://judy-son.tistory.com/3
         self.huber = HuberLoss(reduction='none', delta=1.0).to(device) # HuberLoss (reduction='none'로 개별 timestep loss 계산)
-        self.cel = CrossEntropyLoss(ignore_index=ignore_index).to(device) # CrossEntropyLoss (분류용)
         self.device = device
     def forward(self, input: Tensor, target: Tensor, lengths_batch: Tensor):
@@ -22,14 +21,16 @@ class HuberDTW_CrossEntropyLoss(Module):
         cut_input = input[:, :min_len, :]
         cut_target = target[:, :min_len, :]
         loss_HL = self.huber(cut_input, cut_target).mean(dim=2)  # (B, T), 7차원 평균
         mask = arange(max_len, device=self.device).unsqueeze(0) < lengths_batch.unsqueeze(1)  # (B, T)
         loss_HL = (loss_HL * mask[:, :min_len]).sum() / mask[:, :min_len].sum()  # huber만 padding 제외 (sdtw랑 shape가 달라서)
-        log_probs = F.log_softmax(cut_input, dim=-1)   # (B, T, C)
-        loss_sm = -(cut_target * log_probs).sum(dim=-1)  # (B, T)
-        loss_sm = loss_sm.mean()   # 스칼라
         loss_sdtw = self.sdtw(input, target).mean() / min_len
-        return 0.7*loss_HL + 0.2*loss_sm + 0.1*loss_sdtw

 from pysdtw import SoftDTW
 class HuberDTW_CrossEntropyLoss(Module):
+    def __init__(self, device:device): # type: ignore
         super(HuberDTW_CrossEntropyLoss, self).__init__()
         use_cuda = device.type == "cuda"
         self.sdtw = SoftDTW(1.0, fun, use_cuda) # Soft Dynamic Time Warping (timestep 끼리 비교해 loss 계산 ->  gradient 가 흐르도록 함) https://judy-son.tistory.com/3
         self.huber = HuberLoss(reduction='none', delta=1.0).to(device) # HuberLoss (reduction='none'로 개별 timestep loss 계산)
         self.device = device
     def forward(self, input: Tensor, target: Tensor, lengths_batch: Tensor):
         cut_input = input[:, :min_len, :]
         cut_target = target[:, :min_len, :]
+        # Huber Loss (메인 손실함수)
         loss_HL = self.huber(cut_input, cut_target).mean(dim=2)  # (B, T), 7차원 평균
         mask = arange(max_len, device=self.device).unsqueeze(0) < lengths_batch.unsqueeze(1)  # (B, T)
         loss_HL = (loss_HL * mask[:, :min_len]).sum() / mask[:, :min_len].sum()  # huber만 padding 제외 (sdtw랑 shape가 달라서)
+        # One-hot CrossEntropy (분류용으로 살짝)
+        log_probs = F.log_softmax(cut_input, dim=-1)
+        loss_sm = -(cut_target * log_probs).sum(dim=-1).mean() # (B, T)
+        # SoftDTW (길이 정규화로 스케일 안정화)
         loss_sdtw = self.sdtw(input, target).mean() / min_len
+        return loss_HL*0.9 + loss_sm*0.08 + loss_sdtw*0.02