rrayy commited on
Commit
28e8aa2
·
1 Parent(s): 399b810

Changes to be committed: loss 값 26까지 떨굼

Browse files

modified: DIVA_Model_dict.pt
modified: DIVA_Model_full.pt
modified: Models/Vector2MIDI.py
modified: train.ipynb
modified: utility/lossf.py

DIVA_Model_dict.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6a510890b3fc4bc4ad0a0c151d76752a1b2cbcea52aef39f5ebe0e187c97e6a
3
  size 51786305
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcb8fcf23418eb641517ef8ff4a23adccca450f2923f24adc8798a5791750376
3
  size 51786305
DIVA_Model_full.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ea2e7979e6ba5ba6e0cb5591f8258c2a231c68a0f7e43656c81bbad045e03bd
3
  size 51788289
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f9a974be02de8e6751ce63d61b7b4b8d6fb49960d7772bb3674fd55262916e3
3
  size 51788289
Models/Vector2MIDI.py CHANGED
@@ -19,16 +19,24 @@ class Vector2MIDI(nn.Module):
19
  self.fc_mid = nn.Linear(hidden_dim, 256)
20
  self.fc_out = nn.Linear(256, n_vocab)
21
 
 
 
 
 
 
 
 
 
 
 
22
  def forward(self, x, lengths, target_tokens):
23
  """
24
  x: (B, input_dim) - 입력 벡터
25
  lengths: [B] - 시퀀스 길이
26
  target_tokens: (B, T, n_vocab) - one-hot 또는 임베딩된 토큰 입력
27
  """
28
- B = x.size(0)
29
-
30
- h0 = self.init_hidden(x).unsqueeze(0).repeat(2, 1, 1) # (num_layers, B, H)
31
- c0 = self.init_cell(x).unsqueeze(0).repeat(2, 1, 1)
32
 
33
  packed_input = pack_padded_sequence(target_tokens, lengths.cpu(), batch_first=True, enforce_sorted=False)
34
  packed_out, _ = self.lstm(packed_input, (h0, c0))
@@ -38,3 +46,50 @@ class Vector2MIDI(nn.Module):
38
  out = self.fc_out(out) # (B, T, vocab_size)
39
 
40
  return out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  self.fc_mid = nn.Linear(hidden_dim, 256)
20
  self.fc_out = nn.Linear(256, n_vocab)
21
 
22
+ def init_hidden_states(self, x):
23
+ """초기 hidden과 cell state 생성"""
24
+ h0 = torch.tanh(self.init_hidden(x)) # 활성화 함수 추가 (hyperbolic tangent)
25
+ c0 = torch.tanh(self.init_cell(x))
26
+
27
+ h0 = h0.unsqueeze(0).repeat(2, 1, 1) # (num_layers, B, H)
28
+ c0 = c0.unsqueeze(0).repeat(2, 1, 1)
29
+
30
+ return h0, c0
31
+
32
  def forward(self, x, lengths, target_tokens):
33
  """
34
  x: (B, input_dim) - 입력 벡터
35
  lengths: [B] - 시퀀스 길이
36
  target_tokens: (B, T, n_vocab) - one-hot 또는 임베딩된 토큰 입력
37
  """
38
+
39
+ h0, c0 = self.init_hidden_states(x) # 초기 상태 생성
 
 
40
 
41
  packed_input = pack_padded_sequence(target_tokens, lengths.cpu(), batch_first=True, enforce_sorted=False)
42
  packed_out, _ = self.lstm(packed_input, (h0, c0))
 
46
  out = self.fc_out(out) # (B, T, vocab_size)
47
 
48
  return out
49
+
50
+ def generate(self, x, max_length, start_token=None, temperature=1.0, top_k=None):
51
+ self.eval()
52
+ batch_size = x.size(0)
53
+ device = x.device
54
+
55
+ # 초기 상태
56
+ h, c = self.init_hidden_states(x)
57
+
58
+ # 시작 토큰 설정
59
+ if start_token is None:
60
+ start_token = 0
61
+
62
+ # 현재 입력 (원-핫 벡터)
63
+ current_input = torch.zeros(batch_size, 1, self.n_vocab, device=device)
64
+ current_input[:, 0, start_token] = 1.0
65
+
66
+ generated_tokens = []
67
+
68
+ with torch.no_grad():
69
+ for _ in range(max_length):
70
+ lstm_out, (h, c) = self.lstm(current_input, (h, c))
71
+
72
+ # forward와 동일하게 (relu 제거)
73
+ out = self.fc_mid(lstm_out)
74
+ logits = self.fc_out(out)[:, -1, :]
75
+
76
+ if temperature != 1.0:
77
+ logits = logits / temperature
78
+
79
+ if top_k is not None:
80
+ top_k_logits, top_k_indices = torch.topk(logits, top_k)
81
+ mask = torch.full_like(logits, float('-inf'))
82
+ logits = mask.scatter(1, top_k_indices, top_k_logits)
83
+
84
+ probs = F.softmax(logits, dim=-1)
85
+ next_token = torch.multinomial(probs, 1) # (B, 1)
86
+
87
+ generated_tokens.append(next_token)
88
+
89
+ # 다음 입력 준비 (원-핫)
90
+ current_input = torch.zeros(batch_size, 1, self.n_vocab, device=device)
91
+ current_input.scatter_(2, next_token.unsqueeze(-1), 1.0)
92
+
93
+ result = torch.cat(generated_tokens, dim=1).to(torch.int64) # (B, max_length)
94
+
95
+ return result.tolist()
train.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
utility/lossf.py CHANGED
@@ -4,7 +4,7 @@ from pysdtw import distance
4
  from pysdtw import SoftDTW
5
 
6
  class HuberDTW_CrossEntropyLoss(Module):
7
- def __init__(self, device:device, ignore_index:int = None): # type: ignore
8
  super(HuberDTW_CrossEntropyLoss, self).__init__()
9
 
10
  use_cuda = device.type == "cuda"
@@ -12,7 +12,6 @@ class HuberDTW_CrossEntropyLoss(Module):
12
 
13
  self.sdtw = SoftDTW(1.0, fun, use_cuda) # Soft Dynamic Time Warping (timestep 끼리 비교해 loss 계산 -> gradient 가 흐르도록 함) https://judy-son.tistory.com/3
14
  self.huber = HuberLoss(reduction='none', delta=1.0).to(device) # HuberLoss (reduction='none'로 개별 timestep loss 계산)
15
- self.cel = CrossEntropyLoss(ignore_index=ignore_index).to(device) # CrossEntropyLoss (분류용)
16
  self.device = device
17
 
18
  def forward(self, input: Tensor, target: Tensor, lengths_batch: Tensor):
@@ -22,14 +21,16 @@ class HuberDTW_CrossEntropyLoss(Module):
22
  cut_input = input[:, :min_len, :]
23
  cut_target = target[:, :min_len, :]
24
 
 
25
  loss_HL = self.huber(cut_input, cut_target).mean(dim=2) # (B, T), 7차원 평균
26
  mask = arange(max_len, device=self.device).unsqueeze(0) < lengths_batch.unsqueeze(1) # (B, T)
27
  loss_HL = (loss_HL * mask[:, :min_len]).sum() / mask[:, :min_len].sum() # huber만 padding 제외 (sdtw랑 shape가 달라서)
28
 
29
- log_probs = F.log_softmax(cut_input, dim=-1) # (B, T, C)
30
- loss_sm = -(cut_target * log_probs).sum(dim=-1) # (B, T)
31
- loss_sm = loss_sm.mean() # 스칼라
32
-
 
33
  loss_sdtw = self.sdtw(input, target).mean() / min_len
34
 
35
- return 0.7*loss_HL + 0.2*loss_sm + 0.1*loss_sdtw
 
4
  from pysdtw import SoftDTW
5
 
6
  class HuberDTW_CrossEntropyLoss(Module):
7
+ def __init__(self, device:device): # type: ignore
8
  super(HuberDTW_CrossEntropyLoss, self).__init__()
9
 
10
  use_cuda = device.type == "cuda"
 
12
 
13
  self.sdtw = SoftDTW(1.0, fun, use_cuda) # Soft Dynamic Time Warping (timestep 끼리 비교해 loss 계산 -> gradient 가 흐르도록 함) https://judy-son.tistory.com/3
14
  self.huber = HuberLoss(reduction='none', delta=1.0).to(device) # HuberLoss (reduction='none'로 개별 timestep loss 계산)
 
15
  self.device = device
16
 
17
  def forward(self, input: Tensor, target: Tensor, lengths_batch: Tensor):
 
21
  cut_input = input[:, :min_len, :]
22
  cut_target = target[:, :min_len, :]
23
 
24
+ # Huber Loss (메인 손실함수)
25
  loss_HL = self.huber(cut_input, cut_target).mean(dim=2) # (B, T), 7차원 평균
26
  mask = arange(max_len, device=self.device).unsqueeze(0) < lengths_batch.unsqueeze(1) # (B, T)
27
  loss_HL = (loss_HL * mask[:, :min_len]).sum() / mask[:, :min_len].sum() # huber만 padding 제외 (sdtw랑 shape가 달라서)
28
 
29
+ # One-hot CrossEntropy (분류용으로 살짝)
30
+ log_probs = F.log_softmax(cut_input, dim=-1)
31
+ loss_sm = -(cut_target * log_probs).sum(dim=-1).mean() # (B, T)
32
+
33
+ # SoftDTW (길이 정규화로 스케일 안정화)
34
  loss_sdtw = self.sdtw(input, target).mean() / min_len
35
 
36
+ return loss_HL*0.9 + loss_sm*0.08 + loss_sdtw*0.02