suryadev1 commited on
Commit
1922da0
1 Parent(s): 5c72fe4

removed head

Browse files
Files changed (9) hide show
  1. app.py +15 -22
  2. src/attention.py +0 -24
  3. src/bert.py +0 -20
  4. src/classifier_model.py +1 -25
  5. src/dataset.py +0 -229
  6. src/pretrainer.py +10 -427
  7. src/seq_model.py +1 -37
  8. src/transformer.py +0 -9
  9. src/vocab.py +0 -10
app.py CHANGED
@@ -101,24 +101,22 @@ import shutil
101
  import matplotlib.pyplot as plt
102
  from sklearn.metrics import roc_curve, auc
103
  # Define the function to process the input file and model selection
104
- <<<<<<< HEAD
105
  def process_file(file,label,info, model_name):
106
- =======
107
- def process_file(file,label, model_name):
108
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
109
  with open(file.name, 'r') as f:
110
  content = f.read()
111
  saved_test_dataset = "train.txt"
112
  saved_test_label = "train_label.txt"
113
- <<<<<<< HEAD
114
  saved_train_info="train_info.txt"
115
- =======
116
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
117
 
118
  # Save the uploaded file content to a specified location
119
  shutil.copyfile(file.name, saved_test_dataset)
120
  shutil.copyfile(label.name, saved_test_label)
121
- <<<<<<< HEAD
122
  shutil.copyfile(info.name, saved_train_info)
123
  # For demonstration purposes, we'll just return the content with the selected model name
124
  # if(model_name=="highGRschool10"):
@@ -142,7 +140,7 @@ def process_file(file,label, model_name):
142
  "-e",str(1),
143
  "-b",str(5)
144
  ], shell=True)
145
- =======
146
  # For demonstration purposes, we'll just return the content with the selected model name
147
  if(model_name=="FS"):
148
  checkpoint="ratio_proportion_change3/output/FS/bert_fine_tuned.model.ep32"
@@ -159,7 +157,7 @@ def process_file(file,label, model_name):
159
  subprocess.run(["python", "src/test_saved_model.py",
160
  "--finetuned_bert_checkpoint",checkpoint
161
  ])
162
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
163
  result = {}
164
  with open("result.txt", 'r') as file:
165
  for line in file:
@@ -194,11 +192,9 @@ def process_file(file,label, model_name):
194
  return text_output,plot_path
195
 
196
  # List of models for the dropdown menu
197
- <<<<<<< HEAD
198
  models = ["highGRschool10", "lowGRschoolAll", "fullTest"]
199
- =======
200
- models = ["FS", "IS", "CORRECTNESS","EFFECTIVENESS"]
201
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
202
 
203
  # Create the Gradio interface
204
  with gr.Blocks(css="""
@@ -388,25 +384,22 @@ tbody.svelte-18wv37q>tr.svelte-18wv37q:nth-child(odd) {
388
  with gr.Row():
389
  file_input = gr.File(label="Upload a test file", file_types=['.txt'], elem_classes="file-box")
390
  label_input = gr.File(label="Upload test labels", file_types=['.txt'], elem_classes="file-box")
391
- <<<<<<< HEAD
392
  info_input = gr.File(label="Upload test info", file_types=['.txt'], elem_classes="file-box")
393
 
394
  model_dropdown = gr.Dropdown(choices=models, label="Select Finetune Task", elem_classes="dropdown-menu")
395
- =======
396
 
397
- model_dropdown = gr.Dropdown(choices=models, label="Select Model", elem_classes="dropdown-menu")
398
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
399
 
400
  with gr.Row():
401
  output_text = gr.Textbox(label="Output Text")
402
  output_image = gr.Image(label="Output Plot")
403
 
404
  btn = gr.Button("Submit")
405
- <<<<<<< HEAD
406
  btn.click(fn=process_file, inputs=[file_input,label_input,info_input, model_dropdown], outputs=[output_text,output_image])
407
- =======
408
- btn.click(fn=process_file, inputs=[file_input,label_input, model_dropdown], outputs=[output_text,output_image])
409
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
410
 
411
  # Launch the app
412
  demo.launch()
 
101
  import matplotlib.pyplot as plt
102
  from sklearn.metrics import roc_curve, auc
103
  # Define the function to process the input file and model selection
104
+
105
  def process_file(file,label,info, model_name):
106
+
 
 
107
  with open(file.name, 'r') as f:
108
  content = f.read()
109
  saved_test_dataset = "train.txt"
110
  saved_test_label = "train_label.txt"
111
+
112
  saved_train_info="train_info.txt"
113
+
114
+
115
 
116
  # Save the uploaded file content to a specified location
117
  shutil.copyfile(file.name, saved_test_dataset)
118
  shutil.copyfile(label.name, saved_test_label)
119
+
120
  shutil.copyfile(info.name, saved_train_info)
121
  # For demonstration purposes, we'll just return the content with the selected model name
122
  # if(model_name=="highGRschool10"):
 
140
  "-e",str(1),
141
  "-b",str(5)
142
  ], shell=True)
143
+
144
  # For demonstration purposes, we'll just return the content with the selected model name
145
  if(model_name=="FS"):
146
  checkpoint="ratio_proportion_change3/output/FS/bert_fine_tuned.model.ep32"
 
157
  subprocess.run(["python", "src/test_saved_model.py",
158
  "--finetuned_bert_checkpoint",checkpoint
159
  ])
160
+
161
  result = {}
162
  with open("result.txt", 'r') as file:
163
  for line in file:
 
192
  return text_output,plot_path
193
 
194
  # List of models for the dropdown menu
195
+
196
  models = ["highGRschool10", "lowGRschoolAll", "fullTest"]
197
+
 
 
198
 
199
  # Create the Gradio interface
200
  with gr.Blocks(css="""
 
384
  with gr.Row():
385
  file_input = gr.File(label="Upload a test file", file_types=['.txt'], elem_classes="file-box")
386
  label_input = gr.File(label="Upload test labels", file_types=['.txt'], elem_classes="file-box")
387
+
388
  info_input = gr.File(label="Upload test info", file_types=['.txt'], elem_classes="file-box")
389
 
390
  model_dropdown = gr.Dropdown(choices=models, label="Select Finetune Task", elem_classes="dropdown-menu")
391
+
392
 
393
+
 
394
 
395
  with gr.Row():
396
  output_text = gr.Textbox(label="Output Text")
397
  output_image = gr.Image(label="Output Plot")
398
 
399
  btn = gr.Button("Submit")
400
+
401
  btn.click(fn=process_file, inputs=[file_input,label_input,info_input, model_dropdown], outputs=[output_text,output_image])
402
+
 
 
403
 
404
  # Launch the app
405
  demo.launch()
src/attention.py CHANGED
@@ -3,19 +3,11 @@ import torch.nn.functional as F
3
  import torch
4
 
5
  import math
6
- <<<<<<< HEAD
7
  import pickle
8
 
9
  class Attention(nn.Module):
10
  """
11
  Compute Scaled Dot Product Attention
12
- =======
13
-
14
-
15
- class Attention(nn.Module):
16
- """
17
- Compute 'Scaled Dot Product Attention
18
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
19
  """
20
 
21
  def __init__(self):
@@ -53,10 +45,6 @@ class MultiHeadedAttention(nn.Module):
53
  self.linear_layers = nn.ModuleList([nn.Linear(d_model, d_model) for _ in range(3)])
54
  self.output_linear = nn.Linear(d_model, d_model)
55
  self.attention = Attention()
56
- <<<<<<< HEAD
57
- =======
58
-
59
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
60
  self.dropout = nn.Dropout(p=dropout)
61
 
62
  def forward(self, query, key, value, mask=None):
@@ -70,21 +58,9 @@ class MultiHeadedAttention(nn.Module):
70
  query, key, value = [l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2)
71
  for l, x in zip(self.linear_layers, (query, key, value))]
72
  # 2) Apply attention on all the projected vectors in batch.
73
- <<<<<<< HEAD
74
  x, p_attn = self.attention(query, key, value, mask=mask, dropout=self.dropout)
75
 
76
  # 3) "Concat" using a view and apply a final linear.
77
  x = x.transpose(1, 2).contiguous().view(nbatches, -1, self.h * self.d_k)
78
 
79
  return self.output_linear(x), p_attn
80
- =======
81
- x, attn = self.attention(query, key, value, mask=mask, dropout=self.dropout)
82
- # torch.Size([64, 8, 100, 100])
83
- # print("Attention", attn.shape)
84
-
85
- # 3) "Concat" using a view and apply a final linear.
86
- x = x.transpose(1, 2).contiguous().view(nbatches, -1, self.h * self.d_k)
87
-
88
- return self.output_linear(x)
89
-
90
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
 
3
  import torch
4
 
5
  import math
 
6
  import pickle
7
 
8
  class Attention(nn.Module):
9
  """
10
  Compute Scaled Dot Product Attention
 
 
 
 
 
 
 
11
  """
12
 
13
  def __init__(self):
 
45
  self.linear_layers = nn.ModuleList([nn.Linear(d_model, d_model) for _ in range(3)])
46
  self.output_linear = nn.Linear(d_model, d_model)
47
  self.attention = Attention()
 
 
 
 
48
  self.dropout = nn.Dropout(p=dropout)
49
 
50
  def forward(self, query, key, value, mask=None):
 
58
  query, key, value = [l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2)
59
  for l, x in zip(self.linear_layers, (query, key, value))]
60
  # 2) Apply attention on all the projected vectors in batch.
 
61
  x, p_attn = self.attention(query, key, value, mask=mask, dropout=self.dropout)
62
 
63
  # 3) "Concat" using a view and apply a final linear.
64
  x = x.transpose(1, 2).contiguous().view(nbatches, -1, self.h * self.d_k)
65
 
66
  return self.output_linear(x), p_attn
 
 
 
 
 
 
 
 
 
 
 
src/bert.py CHANGED
@@ -1,14 +1,8 @@
1
  import torch.nn as nn
2
- <<<<<<< HEAD
3
  import torch
4
 
5
  from .transformer import TransformerBlock
6
  from .embedding import BERTEmbedding
7
- =======
8
-
9
- from transformer import TransformerBlock
10
- from embedding import BERTEmbedding
11
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
12
 
13
  class BERT(nn.Module):
14
  """
@@ -38,15 +32,11 @@ class BERT(nn.Module):
38
  # multi-layers transformer blocks, deep network
39
  self.transformer_blocks = nn.ModuleList(
40
  [TransformerBlock(hidden, attn_heads, hidden * 4, dropout) for _ in range(n_layers)])
41
- <<<<<<< HEAD
42
  # self.attention_values = []
43
- =======
44
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
45
 
46
  def forward(self, x, segment_info):
47
  # attention masking for padded token
48
  # torch.ByteTensor([batch_size, 1, seq_len, seq_len)
49
- <<<<<<< HEAD
50
 
51
  device = x.device
52
 
@@ -68,15 +58,5 @@ class BERT(nn.Module):
68
  for transformer in self.transformer_blocks:
69
  x = transformer.forward(x, mask)
70
  # self.attention_values.append(transformer.p_attn)
71
- =======
72
- mask = (x > 0).unsqueeze(1).repeat(1, x.size(1), 1).unsqueeze(1)
73
- # print("bert mask: ", mask)
74
- # embedding the indexed sequence to sequence of vectors
75
- x = self.embedding(x, segment_info)
76
-
77
- # running over multiple transformer blocks
78
- for transformer in self.transformer_blocks:
79
- x = transformer.forward(x, mask)
80
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
81
 
82
  return x
 
1
  import torch.nn as nn
 
2
  import torch
3
 
4
  from .transformer import TransformerBlock
5
  from .embedding import BERTEmbedding
 
 
 
 
 
6
 
7
  class BERT(nn.Module):
8
  """
 
32
  # multi-layers transformer blocks, deep network
33
  self.transformer_blocks = nn.ModuleList(
34
  [TransformerBlock(hidden, attn_heads, hidden * 4, dropout) for _ in range(n_layers)])
 
35
  # self.attention_values = []
 
 
36
 
37
  def forward(self, x, segment_info):
38
  # attention masking for padded token
39
  # torch.ByteTensor([batch_size, 1, seq_len, seq_len)
 
40
 
41
  device = x.device
42
 
 
58
  for transformer in self.transformer_blocks:
59
  x = transformer.forward(x, mask)
60
  # self.attention_values.append(transformer.p_attn)
 
 
 
 
 
 
 
 
 
 
61
 
62
  return x
src/classifier_model.py CHANGED
@@ -1,28 +1,17 @@
1
- <<<<<<< HEAD
2
  import torch
3
  import torch.nn as nn
4
 
5
  from .bert import BERT
6
- =======
7
- import torch.nn as nn
8
-
9
- from bert import BERT
10
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
11
 
12
 
13
  class BERTForClassification(nn.Module):
14
  """
15
- <<<<<<< HEAD
16
  Fine-tune Task Classifier Model
17
- =======
18
- Progress Classifier Model
19
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
20
  """
21
 
22
  def __init__(self, bert: BERT, vocab_size, n_labels):
23
  """
24
  :param bert: BERT model which should be trained
25
- <<<<<<< HEAD
26
  :param vocab_size: total vocab size
27
  :param n_labels: number of labels for the task
28
  """
@@ -59,17 +48,4 @@ class BERTForClassificationWithFeats(nn.Module):
59
  # x = self.linear1(x)
60
  # x = self.RELU(x)
61
  # return self.linear2(x)
62
- return self.linear(x)
63
- =======
64
- :param vocab_size: total vocab size for masked_lm
65
- """
66
-
67
- super().__init__()
68
- self.bert = bert
69
- self.linear = nn.Linear(self.bert.hidden, n_labels)
70
- # self.softmax = nn.LogSoftmax(dim=-1)
71
-
72
- def forward(self, x, segment_label):
73
- x = self.bert(x, segment_label)
74
- return x, self.linear(x[:, 0])
75
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
 
 
1
  import torch
2
  import torch.nn as nn
3
 
4
  from .bert import BERT
 
 
 
 
 
5
 
6
 
7
  class BERTForClassification(nn.Module):
8
  """
 
9
  Fine-tune Task Classifier Model
 
 
 
10
  """
11
 
12
  def __init__(self, bert: BERT, vocab_size, n_labels):
13
  """
14
  :param bert: BERT model which should be trained
 
15
  :param vocab_size: total vocab size
16
  :param n_labels: number of labels for the task
17
  """
 
48
  # x = self.linear1(x)
49
  # x = self.RELU(x)
50
  # return self.linear2(x)
51
+ return self.linear(x)
 
 
 
 
 
 
 
 
 
 
 
 
 
src/dataset.py CHANGED
@@ -4,28 +4,17 @@ import pandas as pd
4
  import numpy as np
5
  import tqdm
6
  import random
7
- <<<<<<< HEAD
8
  from .vocab import Vocab
9
  import pickle
10
  import copy
11
  # from sklearn.preprocessing import OneHotEncoder
12
- =======
13
- from vocab import Vocab
14
- import pickle
15
- import copy
16
- from sklearn.preprocessing import OneHotEncoder
17
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
18
 
19
  class PretrainerDataset(Dataset):
20
  """
21
  Class name: PretrainDataset
22
 
23
  """
24
- <<<<<<< HEAD
25
  def __init__(self, dataset_path, vocab, seq_len=30, max_mask=0.15):
26
- =======
27
- def __init__(self, dataset_path, vocab, seq_len=30, select_next_seq= False):
28
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
29
  self.dataset_path = dataset_path
30
  self.vocab = vocab # Vocab object
31
 
@@ -46,7 +35,6 @@ class PretrainerDataset(Dataset):
46
  self.index_documents[i] = []
47
  else:
48
  self.index_documents[i].append(index)
49
- <<<<<<< HEAD
50
  self.lines.append(line.split("\t"))
51
  len_line = len(line.split("\t"))
52
  seq_len_list.append(len_line)
@@ -61,22 +49,6 @@ class PretrainerDataset(Dataset):
61
  print("Sequence length set at: ", self.seq_len)
62
  self.max_mask = max_mask
63
  print("% of input tokens selected for masking : ",self.max_mask)
64
- =======
65
- self.lines.append(line.split())
66
- len_line = len(line.split())
67
- seq_len_list.append(len_line)
68
- index+=1
69
- reader.close()
70
- print("Sequence Stats: ", len(seq_len_list), min(seq_len_list), max(seq_len_list), sum(seq_len_list)/len(seq_len_list))
71
- print("Unique Sequences: ", len({tuple(ll) for ll in self.lines}))
72
- self.index_documents = {k:v for k,v in self.index_documents.items() if v}
73
- self.seq_len = seq_len
74
- self.max_mask_per_seq = 0.15
75
- self.select_next_seq = select_next_seq
76
- print("Sequence length set at ", self.seq_len)
77
- print("select_next_seq: ", self.select_next_seq)
78
- print(len(self.index_documents))
79
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
80
 
81
 
82
  def __len__(self):
@@ -84,7 +56,6 @@ class PretrainerDataset(Dataset):
84
 
85
  def __getitem__(self, item):
86
  token_a = self.lines[item]
87
- <<<<<<< HEAD
88
  # sa_masked = None
89
  # sa_masked_label = None
90
  # token_b = None
@@ -130,44 +101,6 @@ class PretrainerDataset(Dataset):
130
 
131
  # print(item, len(s1), len(s1_label), len(segment_label))
132
  # print(f"{item}.")
133
- =======
134
- token_b = None
135
- is_same_student = None
136
- sa_masked = None
137
- sa_masked_label = None
138
- sb_masked = None
139
- sb_masked_label = None
140
-
141
- if self.select_next_seq:
142
- is_same_student, token_b = self.get_token_b(item)
143
- is_same_student = 1 if is_same_student else 0
144
- token_a1, token_b1 = self.truncate_to_max_seq(token_a, token_b)
145
- sa_masked, sa_masked_label = self.random_mask_seq(token_a1)
146
- sb_masked, sb_masked_label = self.random_mask_seq(token_b1)
147
- else:
148
- token_a = token_a[:self.seq_len-2]
149
- sa_masked, sa_masked_label = self.random_mask_seq(token_a)
150
-
151
- s1 = ([self.vocab.vocab['[CLS]']] + sa_masked + [self.vocab.vocab['[SEP]']])
152
- s1_label = ([self.vocab.vocab['[PAD]']] + sa_masked_label + [self.vocab.vocab['[PAD]']])
153
- segment_label = [1 for _ in range(len(s1))]
154
-
155
- if self.select_next_seq:
156
- s1 = s1 + sb_masked + [self.vocab.vocab['[SEP]']]
157
- s1_label = s1_label + sb_masked_label + [self.vocab.vocab['[PAD]']]
158
- segment_label = segment_label + [2 for _ in range(len(sb_masked)+1)]
159
-
160
- padding = [self.vocab.vocab['[PAD]'] for _ in range(self.seq_len - len(s1))]
161
- s1.extend(padding), s1_label.extend(padding), segment_label.extend(padding)
162
-
163
- output = {'bert_input': s1,
164
- 'bert_label': s1_label,
165
- 'segment_label': segment_label}
166
-
167
- if self.select_next_seq:
168
- output['is_same_student'] = is_same_student
169
- # print(item, len(s1), len(s1_label), len(segment_label))
170
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
171
  return {key: torch.tensor(value) for key, value in output.items()}
172
 
173
  def random_mask_seq(self, tokens):
@@ -176,7 +109,6 @@ class PretrainerDataset(Dataset):
176
  Output: masked token seq, output label
177
  """
178
 
179
- <<<<<<< HEAD
180
  masked_pos = []
181
  output_labels = []
182
  output_tokens = copy.deepcopy(tokens)
@@ -197,22 +129,11 @@ class PretrainerDataset(Dataset):
197
  # else:
198
  prob = random.random()
199
  if prob < self.max_mask:
200
- =======
201
- # masked_pos_label = {}
202
- output_labels = []
203
- output_tokens = copy.deepcopy(tokens)
204
-
205
- # while(len(label_tokens) < self.max_mask_per_seq*len(tokens)):
206
- for i, token in enumerate(tokens):
207
- prob = random.random()
208
- if prob < 0.15:
209
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
210
  # chooses 15% of token positions at random
211
  # prob /= 0.15
212
  prob = random.random()
213
  if prob < 0.8: #[MASK] token 80% of the time
214
  output_tokens[i] = self.vocab.vocab['[MASK]']
215
- <<<<<<< HEAD
216
  masked_pos.append(1)
217
  elif prob < 0.9: # a random token 10% of the time
218
  # print(".......0.8-0.9......")
@@ -226,14 +147,6 @@ class PretrainerDataset(Dataset):
226
  # print(".......unchanged......")
227
  output_tokens[i] = self.vocab.vocab.get(token, self.vocab.vocab['[UNK]'])
228
  masked_pos.append(0)
229
- =======
230
- elif prob < 0.9: # a random token 10% of the time
231
- # print(".......0.8-0.9......")
232
- output_tokens[i] = random.randint(1, len(self.vocab.vocab)-1)
233
- else: # the unchanged i-th token 10% of the time
234
- # print(".......unchanged......")
235
- output_tokens[i] = self.vocab.vocab.get(token, self.vocab.vocab['[UNK]'])
236
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
237
  # True Label
238
  output_labels.append(self.vocab.vocab.get(token, self.vocab.vocab['[UNK]']))
239
  # masked_pos_label[i] = self.vocab.vocab.get(token, self.vocab.vocab['[UNK]'])
@@ -242,16 +155,12 @@ class PretrainerDataset(Dataset):
242
  output_tokens[i] = self.vocab.vocab.get(token, self.vocab.vocab['[UNK]'])
243
  # Padded label
244
  output_labels.append(self.vocab.vocab['[PAD]'])
245
- <<<<<<< HEAD
246
  masked_pos.append(0)
247
- =======
248
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
249
  # label_position = []
250
  # label_tokens = []
251
  # for k, v in masked_pos_label.items():
252
  # label_position.append(k)
253
  # label_tokens.append(v)
254
- <<<<<<< HEAD
255
  return output_tokens, output_labels, masked_pos
256
 
257
  # def get_token_b(self, item):
@@ -288,43 +197,6 @@ class PretrainerDataset(Dataset):
288
  # sb.pop()
289
  # return sa, sb
290
 
291
- =======
292
- return output_tokens, output_labels
293
-
294
- def get_token_b(self, item):
295
- document_id = [k for k,v in self.index_documents.items() if item in v][0]
296
- random_document_id = document_id
297
-
298
- if random.random() < 0.5:
299
- document_ids = [k for k in self.index_documents.keys() if k != document_id]
300
- random_document_id = random.choice(document_ids)
301
-
302
- same_student = (random_document_id == document_id)
303
-
304
- nex_seq_list = self.index_documents.get(random_document_id)
305
-
306
- if same_student:
307
- if len(nex_seq_list) != 1:
308
- nex_seq_list = [v for v in nex_seq_list if v !=item]
309
-
310
- next_seq = random.choice(nex_seq_list)
311
- tokens = self.lines[next_seq]
312
- # print(f"item = {item}, tokens: {tokens}")
313
- # print(f"item={item}, next={next_seq}, same_student = {same_student}, {document_id} == {random_document_id}, b. {tokens}")
314
- return same_student, tokens
315
-
316
- def truncate_to_max_seq(self, s1, s2):
317
- sa = copy.deepcopy(s1)
318
- sb = copy.deepcopy(s1)
319
- total_allowed_seq = self.seq_len - 3
320
-
321
- while((len(sa)+len(sb)) > total_allowed_seq):
322
- if random.random() < 0.5:
323
- sa.pop()
324
- else:
325
- sb.pop()
326
- return sa, sb
327
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
328
 
329
  class TokenizerDataset(Dataset):
330
  """
@@ -332,24 +204,15 @@ class TokenizerDataset(Dataset):
332
  Tokenize the data in the dataset
333
 
334
  """
335
- <<<<<<< HEAD
336
  def __init__(self, dataset_path, label_path, vocab, seq_len=30):
337
  self.dataset_path = dataset_path
338
  self.label_path = label_path
339
  self.vocab = vocab # Vocab object
340
  # self.encoder = OneHotEncoder(sparse=False)
341
- =======
342
- def __init__(self, dataset_path, label_path, vocab, seq_len=30, train=True):
343
- self.dataset_path = dataset_path
344
- self.label_path = label_path
345
- self.vocab = vocab # Vocab object
346
- self.encoder = OneHotEncoder(sparse_output=False)
347
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
348
 
349
  # Related to input dataset file
350
  self.lines = []
351
  self.labels = []
352
- <<<<<<< HEAD
353
  self.feats = []
354
  if self.label_path:
355
  self.label_file = open(self.label_path, "r")
@@ -414,97 +277,21 @@ class TokenizerDataset(Dataset):
414
  # self.labels = self.encoder.transform(np.array(self.labels).reshape(-1,1))
415
 
416
  self.file = open(self.dataset_path, "r")
417
- =======
418
- self.labels = []
419
-
420
- self.label_file = open(self.label_path, "r")
421
- for line in self.label_file:
422
- if line:
423
- line = line.strip()
424
- if not line:
425
- continue
426
- self.labels.append(float(line))
427
- self.label_file.close()
428
- labeler = np.unique(self.labels)
429
- self.encoder.fit(labeler.reshape(-1,1))
430
- self.labels = self.encoder.transform(np.array(self.labels).reshape(-1,1))
431
- # print(f"labels: {self.labels}")
432
-
433
- # info_file_name = self.dataset_path.split('.')
434
- # info_file_name = info_file_name[0]+"_info."+info_file_name[1]
435
- # progress = []
436
- # with open(info_file_name, "r") as f:
437
- # for line in f:
438
- # if line:
439
- # line = line.strip()
440
- # if not line:
441
- # continue
442
- # line = line.split(",")[0]
443
- # pstat = 1 if line == "GRADUATED" else 0
444
- # progress.append(pstat)
445
- # f.close()
446
-
447
- # indices_of_grad = np.where(np.array(progress) == 1)[0]
448
- # indices_of_prom = np.where(np.array(progress) == 0)[0]
449
-
450
- # indices_of_zeros = np.where(np.array(labels) == 0)[0]
451
- # indices_of_ones = np.where(np.array(labels) == 1)[0]
452
-
453
- # number_of_items = min(len(indices_of_zeros), len(indices_of_ones))
454
- # # number_of_items = min(len(indices_of_grad), len(indices_of_prom))
455
- # print(number_of_items)
456
-
457
- # indices_of_zeros = indices_of_zeros[:number_of_items]
458
- # indices_of_ones = indices_of_ones[:number_of_items]
459
- # print(indices_of_zeros)
460
- # print(indices_of_ones)
461
-
462
- # indices_of_grad = indices_of_grad[:number_of_items]
463
- # indices_of_prom = indices_of_prom[:number_of_items]
464
- # print(indices_of_grad)
465
- # print(indices_of_prom)
466
-
467
- self.file = open(self.dataset_path, "r")
468
- # index = 0
469
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
470
  for line in self.file:
471
  if line:
472
  line = line.strip()
473
  if line:
474
  self.lines.append(line)
475
- <<<<<<< HEAD
476
- =======
477
- # if train:
478
- # if index in indices_of_zeros:
479
- # # if index in indices_of_prom:
480
- # self.lines.append(line)
481
- # self.labels.append(0)
482
- # if index in indices_of_ones:
483
- # # if index in indices_of_grad:
484
- # self.lines.append(line)
485
- # self.labels.append(1)
486
- # else:
487
- # self.lines.append(line)
488
- # self.labels.append(labels[index])
489
- # self.labels.append(progress[index])
490
- # index += 1
491
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
492
  self.file.close()
493
 
494
  self.len = len(self.lines)
495
  self.seq_len = seq_len
496
- <<<<<<< HEAD
497
  print("Sequence length set at ", self.seq_len, len(self.lines), len(self.labels) if self.label_path else 0)
498
- =======
499
-
500
- print("Sequence length set at ", self.seq_len, len(self.lines), len(self.labels))
501
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
502
 
503
  def __len__(self):
504
  return self.len
505
 
506
  def __getitem__(self, item):
507
- <<<<<<< HEAD
508
  org_line = self.lines[item].split("\t")
509
  dup_line = []
510
  opt = False
@@ -527,23 +314,10 @@ class TokenizerDataset(Dataset):
527
  output = {'input': s1,
528
  'label': s1_label,
529
  'feat': s1_feat,
530
- =======
531
-
532
- s1 = self.vocab.to_seq(self.lines[item], self.seq_len) # This is like tokenizer and adds [CLS] and [SEP].
533
- s1_label = self.labels[item]
534
- segment_label = [1 for _ in range(len(s1))]
535
-
536
- padding = [self.vocab.vocab['[PAD]'] for _ in range(self.seq_len - len(s1))]
537
- s1.extend(padding), segment_label.extend(padding)
538
-
539
- output = {'bert_input': s1,
540
- 'progress_status': s1_label,
541
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
542
  'segment_label': segment_label}
543
  return {key: torch.tensor(value) for key, value in output.items()}
544
 
545
 
546
- <<<<<<< HEAD
547
  class TokenizerDatasetForCalibration(Dataset):
548
  """
549
  Class name: TokenizerDataset
@@ -661,9 +435,6 @@ class TokenizerDatasetForCalibration(Dataset):
661
 
662
 
663
  # if __name__ == "__main__":
664
- =======
665
- # if __name__ == "__main__":
666
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
667
  # # import pickle
668
  # # k = pickle.load(open("dataset/CL4999_1920/unique_steps_list.pkl","rb"))
669
  # # print(k)
 
4
  import numpy as np
5
  import tqdm
6
  import random
 
7
  from .vocab import Vocab
8
  import pickle
9
  import copy
10
  # from sklearn.preprocessing import OneHotEncoder
 
 
 
 
 
 
11
 
12
  class PretrainerDataset(Dataset):
13
  """
14
  Class name: PretrainDataset
15
 
16
  """
 
17
  def __init__(self, dataset_path, vocab, seq_len=30, max_mask=0.15):
 
 
 
18
  self.dataset_path = dataset_path
19
  self.vocab = vocab # Vocab object
20
 
 
35
  self.index_documents[i] = []
36
  else:
37
  self.index_documents[i].append(index)
 
38
  self.lines.append(line.split("\t"))
39
  len_line = len(line.split("\t"))
40
  seq_len_list.append(len_line)
 
49
  print("Sequence length set at: ", self.seq_len)
50
  self.max_mask = max_mask
51
  print("% of input tokens selected for masking : ",self.max_mask)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
 
54
  def __len__(self):
 
56
 
57
  def __getitem__(self, item):
58
  token_a = self.lines[item]
 
59
  # sa_masked = None
60
  # sa_masked_label = None
61
  # token_b = None
 
101
 
102
  # print(item, len(s1), len(s1_label), len(segment_label))
103
  # print(f"{item}.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  return {key: torch.tensor(value) for key, value in output.items()}
105
 
106
  def random_mask_seq(self, tokens):
 
109
  Output: masked token seq, output label
110
  """
111
 
 
112
  masked_pos = []
113
  output_labels = []
114
  output_tokens = copy.deepcopy(tokens)
 
129
  # else:
130
  prob = random.random()
131
  if prob < self.max_mask:
 
 
 
 
 
 
 
 
 
 
132
  # chooses 15% of token positions at random
133
  # prob /= 0.15
134
  prob = random.random()
135
  if prob < 0.8: #[MASK] token 80% of the time
136
  output_tokens[i] = self.vocab.vocab['[MASK]']
 
137
  masked_pos.append(1)
138
  elif prob < 0.9: # a random token 10% of the time
139
  # print(".......0.8-0.9......")
 
147
  # print(".......unchanged......")
148
  output_tokens[i] = self.vocab.vocab.get(token, self.vocab.vocab['[UNK]'])
149
  masked_pos.append(0)
 
 
 
 
 
 
 
 
150
  # True Label
151
  output_labels.append(self.vocab.vocab.get(token, self.vocab.vocab['[UNK]']))
152
  # masked_pos_label[i] = self.vocab.vocab.get(token, self.vocab.vocab['[UNK]'])
 
155
  output_tokens[i] = self.vocab.vocab.get(token, self.vocab.vocab['[UNK]'])
156
  # Padded label
157
  output_labels.append(self.vocab.vocab['[PAD]'])
 
158
  masked_pos.append(0)
 
 
159
  # label_position = []
160
  # label_tokens = []
161
  # for k, v in masked_pos_label.items():
162
  # label_position.append(k)
163
  # label_tokens.append(v)
 
164
  return output_tokens, output_labels, masked_pos
165
 
166
  # def get_token_b(self, item):
 
197
  # sb.pop()
198
  # return sa, sb
199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
  class TokenizerDataset(Dataset):
202
  """
 
204
  Tokenize the data in the dataset
205
 
206
  """
 
207
  def __init__(self, dataset_path, label_path, vocab, seq_len=30):
208
  self.dataset_path = dataset_path
209
  self.label_path = label_path
210
  self.vocab = vocab # Vocab object
211
  # self.encoder = OneHotEncoder(sparse=False)
 
 
 
 
 
 
 
212
 
213
  # Related to input dataset file
214
  self.lines = []
215
  self.labels = []
 
216
  self.feats = []
217
  if self.label_path:
218
  self.label_file = open(self.label_path, "r")
 
277
  # self.labels = self.encoder.transform(np.array(self.labels).reshape(-1,1))
278
 
279
  self.file = open(self.dataset_path, "r")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  for line in self.file:
281
  if line:
282
  line = line.strip()
283
  if line:
284
  self.lines.append(line)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  self.file.close()
286
 
287
  self.len = len(self.lines)
288
  self.seq_len = seq_len
 
289
  print("Sequence length set at ", self.seq_len, len(self.lines), len(self.labels) if self.label_path else 0)
 
 
 
 
290
 
291
  def __len__(self):
292
  return self.len
293
 
294
  def __getitem__(self, item):
 
295
  org_line = self.lines[item].split("\t")
296
  dup_line = []
297
  opt = False
 
314
  output = {'input': s1,
315
  'label': s1_label,
316
  'feat': s1_feat,
 
 
 
 
 
 
 
 
 
 
 
 
317
  'segment_label': segment_label}
318
  return {key: torch.tensor(value) for key, value in output.items()}
319
 
320
 
 
321
  class TokenizerDatasetForCalibration(Dataset):
322
  """
323
  Class name: TokenizerDataset
 
435
 
436
 
437
  # if __name__ == "__main__":
 
 
 
438
  # # import pickle
439
  # # k = pickle.load(open("dataset/CL4999_1920/unique_steps_list.pkl","rb"))
440
  # # print(k)
src/pretrainer.py CHANGED
@@ -1,6 +1,5 @@
1
  import torch
2
  import torch.nn as nn
3
- <<<<<<< HEAD
4
  # from torch.nn import functional as F
5
  from torch.optim import Adam
6
  from torch.utils.data import DataLoader
@@ -36,75 +35,6 @@ class BERTTrainer:
36
  train_dataloader: DataLoader, val_dataloader: DataLoader = None, test_dataloader: DataLoader = None,
37
  lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=5000,
38
  with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, log_folder_path: str = None):
39
- =======
40
- from torch.nn import functional as F
41
- from torch.optim import Adam, SGD
42
- from torch.utils.data import DataLoader
43
- import pickle
44
-
45
- from bert import BERT
46
- from seq_model import BERTSM
47
- from classifier_model import BERTForClassification
48
- from optim_schedule import ScheduledOptim
49
-
50
- import tqdm
51
- import sys
52
-
53
- import numpy as np
54
- import visualization
55
-
56
- from sklearn.metrics import precision_score, recall_score, f1_score
57
-
58
- class ECE(nn.Module):
59
-
60
- def __init__(self, n_bins=15):
61
- """
62
- n_bins (int): number of confidence interval bins
63
- """
64
- super(ECE, self).__init__()
65
- bin_boundaries = torch.linspace(0, 1, n_bins + 1)
66
- self.bin_lowers = bin_boundaries[:-1]
67
- self.bin_uppers = bin_boundaries[1:]
68
-
69
- def forward(self, logits, labels):
70
- softmaxes = F.softmax(logits, dim=1)
71
- confidences, predictions = torch.max(softmaxes, 1)
72
- labels = torch.argmax(labels,1)
73
- accuracies = predictions.eq(labels)
74
-
75
- ece = torch.zeros(1, device=logits.device)
76
- for bin_lower, bin_upper in zip(self.bin_lowers, self.bin_uppers):
77
- # Calculated |confidence - accuracy| in each bin
78
- in_bin = confidences.gt(bin_lower.item()) * confidences.le(bin_upper.item())
79
- prop_in_bin = in_bin.float().mean()
80
- if prop_in_bin.item() > 0:
81
- accuracy_in_bin = accuracies[in_bin].float().mean()
82
- avg_confidence_in_bin = confidences[in_bin].mean()
83
- ece += torch.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin
84
-
85
- return ece
86
-
87
- def accurate_nb(preds, labels):
88
- pred_flat = np.argmax(preds, axis=1).flatten()
89
- labels_flat = np.argmax(labels, axis=1).flatten()
90
- labels_flat = labels.flatten()
91
- return np.sum(pred_flat == labels_flat)
92
-
93
- class BERTTrainer:
94
- """
95
- # Sequence..
96
-
97
- BERTTrainer make the pretrained BERT model with two LM training method.
98
-
99
- 1. Masked Language Model : 3.3.1 Task #1: Masked LM
100
- """
101
-
102
- def __init__(self, bert: BERT, vocab_size: int,
103
- train_dataloader: DataLoader, test_dataloader: DataLoader = None,
104
- lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=10000,
105
- with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, same_student_prediction = False,
106
- workspace_name=None):
107
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
108
  """
109
  :param bert: BERT model which you want to train
110
  :param vocab_size: total word vocab size
@@ -117,7 +47,6 @@ class BERTTrainer:
117
  :param log_freq: logging frequency of the batch iteration
118
  """
119
 
120
- <<<<<<< HEAD
121
  cuda_condition = torch.cuda.is_available() and with_cuda
122
  self.device = torch.device("cuda:0" if cuda_condition else "cpu")
123
  print(cuda_condition, " Device used = ", self.device)
@@ -127,33 +56,16 @@ class BERTTrainer:
127
  # This BERT model will be saved
128
  self.bert = bert.to(self.device)
129
  # Initialize the BERT Sequence Model, with BERT model
130
- =======
131
- # Setup cuda device for BERT training, argument -c, --cuda should be true
132
- cuda_condition = torch.cuda.is_available() and with_cuda
133
- self.device = torch.device("cuda:0" if cuda_condition else "cpu")
134
- print("Device used = ", self.device)
135
-
136
- # This BERT model will be saved every epoch
137
- self.bert = bert
138
- # Initialize the BERT Language Model, with BERT model
139
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
140
  self.model = BERTSM(bert, vocab_size).to(self.device)
141
 
142
  # Distributed GPU training if CUDA can detect more than 1 GPU
143
  if with_cuda and torch.cuda.device_count() > 1:
144
  print("Using %d GPUS for BERT" % torch.cuda.device_count())
145
- <<<<<<< HEAD
146
  self.model = nn.DataParallel(self.model, device_ids=available_gpus)
147
 
148
  # Setting the train, validation and test data loader
149
  self.train_data = train_dataloader
150
  self.val_data = val_dataloader
151
- =======
152
- self.model = nn.DataParallel(self.model, device_ids=cuda_devices)
153
-
154
- # Setting the train and test data loader
155
- self.train_data = train_dataloader
156
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
157
  self.test_data = test_dataloader
158
 
159
  # Setting the Adam optimizer with hyper-param
@@ -164,7 +76,6 @@ class BERTTrainer:
164
  self.criterion = nn.NLLLoss(ignore_index=0)
165
 
166
  self.log_freq = log_freq
167
- <<<<<<< HEAD
168
  self.log_folder_path = log_folder_path
169
  # self.workspace_name = workspace_name
170
  self.save_model = False
@@ -175,18 +86,11 @@ class BERTTrainer:
175
  f.close()
176
  self.start_time = time.time()
177
 
178
- =======
179
- self.same_student_prediction = same_student_prediction
180
- self.workspace_name = workspace_name
181
- self.save_model = False
182
- self.avg_loss = 10000
183
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
184
  print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()]))
185
 
186
  def train(self, epoch):
187
  self.iteration(epoch, self.train_data)
188
 
189
- <<<<<<< HEAD
190
  def val(self, epoch):
191
  if epoch == 0:
192
  self.avg_loss = 10000
@@ -196,12 +100,6 @@ class BERTTrainer:
196
  self.iteration(epoch, self.test_data, phase="test")
197
 
198
  def iteration(self, epoch, data_loader, phase="train"):
199
- =======
200
- def test(self, epoch):
201
- self.iteration(epoch, self.test_data, train=False)
202
-
203
- def iteration(self, epoch, data_loader, train=True):
204
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
205
  """
206
  loop over the data_loader for training or testing
207
  if on train status, backward operation is activated
@@ -212,7 +110,6 @@ class BERTTrainer:
212
  :param train: boolean value of is train or test
213
  :return: None
214
  """
215
- <<<<<<< HEAD
216
 
217
  # self.log_file = f"{self.workspace_name}/logs/{self.code}/log_{phase}_pretrained.txt"
218
  # bert_hidden_representations = [] can be used
@@ -235,39 +132,10 @@ class BERTTrainer:
235
  else:
236
  self.model.eval()
237
  with open(self.log_folder_path+f"/log_{phase}_pretrained.txt", 'a') as f:
238
- =======
239
- str_code = "train" if train else "test"
240
- code = "masked_prediction" if self.same_student_prediction else "masked"
241
-
242
- self.log_file = f"{self.workspace_name}/logs/{code}/log_{str_code}_pretrained.txt"
243
- bert_hidden_representations = []
244
- if epoch == 0:
245
- f = open(self.log_file, 'w')
246
- f.close()
247
- if not train:
248
- self.avg_loss = 10000
249
- # Setting the tqdm progress bar
250
- data_iter = tqdm.tqdm(enumerate(data_loader),
251
- desc="EP_%s:%d" % (str_code, epoch),
252
- total=len(data_loader),
253
- bar_format="{l_bar}{r_bar}")
254
-
255
- avg_loss_mask = 0.0
256
- total_correct_mask = 0
257
- total_element_mask = 0
258
-
259
- avg_loss_pred = 0.0
260
- total_correct_pred = 0
261
- total_element_pred = 0
262
-
263
- avg_loss = 0.0
264
- with open(self.log_file, 'a') as f:
265
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
266
  sys.stdout = f
267
  for i, data in data_iter:
268
  # 0. batch_data will be sent into the device(GPU or cpu)
269
  data = {key: value.to(self.device) for key, value in data.items()}
270
- <<<<<<< HEAD
271
 
272
  # 1. forward masked_sm model
273
  # mask_sm_output is log-probabilities output
@@ -280,38 +148,10 @@ class BERTTrainer:
280
 
281
  # 3. backward and optimization only in train
282
  if phase == "train":
283
- =======
284
-
285
- # 1. forward the next_sentence_prediction and masked_lm model
286
- # next_sent_output, mask_lm_output = self.model.forward(data["bert_input"], data["segment_label"])
287
- if self.same_student_prediction:
288
- bert_hidden_rep, mask_lm_output, same_student_output = self.model.forward(data["bert_input"], data["segment_label"], self.same_student_prediction)
289
- else:
290
- bert_hidden_rep, mask_lm_output = self.model.forward(data["bert_input"], data["segment_label"], self.same_student_prediction)
291
-
292
- embeddings = [h for h in bert_hidden_rep.cpu().detach().numpy()]
293
- bert_hidden_representations.extend(embeddings)
294
-
295
-
296
- # 2-2. NLLLoss of predicting masked token word
297
- mask_loss = self.criterion(mask_lm_output.transpose(1, 2), data["bert_label"])
298
-
299
- # 2-3. Adding next_loss and mask_loss : 3.4 Pre-training Procedure
300
- if self.same_student_prediction:
301
- # 2-1. NLL(negative log likelihood) loss of is_next classification result
302
- same_student_loss = self.criterion(same_student_output, data["is_same_student"])
303
- loss = same_student_loss + mask_loss
304
- else:
305
- loss = mask_loss
306
-
307
- # 3. backward and optimization only in train
308
- if train:
309
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
310
  self.optim_schedule.zero_grad()
311
  loss.backward()
312
  self.optim_schedule.step_and_update_lr()
313
 
314
- <<<<<<< HEAD
315
  # tokens with highest log-probabilities creates a predicted sequence
316
  pred_tokens = torch.argmax(mask_sm_output, dim=-1)
317
  mask_correct = (data["bert_label"] == pred_tokens) & data["masked_pos"]
@@ -348,69 +188,6 @@ class BERTTrainer:
348
  if self.avg_loss > (avg_loss / len(data_iter)):
349
  self.save_model = True
350
  self.avg_loss = (avg_loss / len(data_iter))
351
- =======
352
-
353
- non_zero_mask = (data["bert_label"] != 0).float()
354
- predictions = torch.argmax(mask_lm_output, dim=-1)
355
- predicted_masked = predictions*non_zero_mask
356
- mask_correct = ((data["bert_label"] == predicted_masked)*non_zero_mask).sum().item()
357
-
358
- avg_loss_mask += loss.item()
359
- total_correct_mask += mask_correct
360
- total_element_mask += non_zero_mask.sum().item()
361
-
362
- post_fix = {
363
- "epoch": epoch,
364
- "iter": i,
365
- "avg_loss": avg_loss_mask / (i + 1),
366
- "avg_acc_mask": total_correct_mask / total_element_mask * 100,
367
- "loss": loss.item()
368
- }
369
-
370
- # next sentence prediction accuracy
371
- if self.same_student_prediction:
372
- correct = same_student_output.argmax(dim=-1).eq(data["is_same_student"]).sum().item()
373
- avg_loss_pred += loss.item()
374
- total_correct_pred += correct
375
- total_element_pred += data["is_same_student"].nelement()
376
- # correct = next_sent_output.argmax(dim=-1).eq(data["is_next"]).sum().item()
377
- post_fix["avg_loss"] = avg_loss_pred / (i + 1)
378
- post_fix["avg_acc_pred"] = total_correct_pred / total_element_pred * 100
379
- post_fix["loss"] = loss.item()
380
-
381
- avg_loss +=loss.item()
382
-
383
- if i % self.log_freq == 0:
384
- data_iter.write(str(post_fix))
385
- # if not train and epoch > 20 :
386
- # pickle.dump(mask_lm_output.cpu().detach().numpy(), open(f"logs/mask/mask_out_e{epoch}_{i}.pkl","wb"))
387
- # pickle.dump(data["bert_label"].cpu().detach().numpy(), open(f"logs/mask/label_e{epoch}_{i}.pkl","wb"))
388
-
389
- final_msg = {
390
- "epoch": f"EP{epoch}_{str_code}",
391
- "avg_loss": avg_loss / len(data_iter),
392
- "total_masked_acc": total_correct_mask * 100.0 / total_element_mask
393
- }
394
- if self.same_student_prediction:
395
- final_msg["total_prediction_acc"] = total_correct_pred * 100.0 / total_element_pred
396
-
397
- print(final_msg)
398
- # print("EP%d_%s, avg_loss=" % (epoch, str_code), avg_loss / len(data_iter), "total_masked_acc=", total_correct_mask * 100.0 / total_element_mask, "total_prediction_acc=", total_correct_pred * 100.0 / total_element_pred)
399
- # else:
400
- # print("EP%d_%s, avg_loss=" % (epoch, str_code), avg_loss / len(data_iter), "total_masked_acc=", total_correct_mask * 100.0 / total_element_mask)
401
- # print("EP%d_%s, " % (epoch, str_code))
402
-
403
- f.close()
404
- sys.stdout = sys.__stdout__
405
- self.save_model = False
406
- if self.avg_loss > (avg_loss / len(data_iter)):
407
- self.save_model = True
408
- self.avg_loss = (avg_loss / len(data_iter))
409
-
410
- # pickle.dump(bert_hidden_representations, open(f"embeddings/{code}/{str_code}_embeddings_{epoch}.pkl","wb"))
411
-
412
-
413
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
414
 
415
  def save(self, epoch, file_path="output/bert_trained.model"):
416
  """
@@ -432,12 +209,8 @@ class BERTFineTuneTrainer:
432
  def __init__(self, bert: BERT, vocab_size: int,
433
  train_dataloader: DataLoader, test_dataloader: DataLoader = None,
434
  lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=10000,
435
- <<<<<<< HEAD
436
  with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, workspace_name=None,
437
  num_labels=2, log_folder_path: str = None):
438
- =======
439
- with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, workspace_name=None, num_labels=2):
440
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
441
  """
442
  :param bert: BERT model which you want to train
443
  :param vocab_size: total word vocab size
@@ -453,7 +226,6 @@ class BERTFineTuneTrainer:
453
  # Setup cuda device for BERT training, argument -c, --cuda should be true
454
  cuda_condition = torch.cuda.is_available() and with_cuda
455
  self.device = torch.device("cuda:0" if cuda_condition else "cpu")
456
- <<<<<<< HEAD
457
  print(cuda_condition, " Device used = ", self.device)
458
 
459
  available_gpus = list(range(torch.cuda.device_count()))
@@ -462,6 +234,16 @@ class BERTFineTuneTrainer:
462
  self.bert = bert
463
  for param in self.bert.parameters():
464
  param.requires_grad = False
 
 
 
 
 
 
 
 
 
 
465
  # Initialize the BERT Language Model, with BERT model
466
  # self.model = BERTForClassification(self.bert, vocab_size, num_labels).to(self.device)
467
  # self.model = BERTForClassificationWithFeats(self.bert, num_labels, 8).to(self.device)
@@ -748,48 +530,11 @@ class BERTFineTuneTrainer1:
748
  for fi in ['train', 'test']: #'val',
749
  f = open(self.log_folder_path+f"/log_{fi}_finetuned.txt", 'w')
750
  f.close()
751
- =======
752
- print("Device used = ", self.device)
753
-
754
- # This BERT model will be saved every epoch
755
- self.bert = bert
756
- # for param in self.bert.parameters():
757
- # param.requires_grad = False
758
- # Initialize the BERT Language Model, with BERT model
759
- self.model = BERTForClassification(self.bert, vocab_size, num_labels).to(self.device)
760
-
761
- # Distributed GPU training if CUDA can detect more than 1 GPU
762
- if with_cuda and torch.cuda.device_count() > 1:
763
- print("Using %d GPUS for BERT" % torch.cuda.device_count())
764
- self.model = nn.DataParallel(self.model, device_ids=cuda_devices)
765
-
766
- # Setting the train and test data loader
767
- self.train_data = train_dataloader
768
- self.test_data = test_dataloader
769
-
770
- self.optim = Adam(self.model.parameters(), lr=lr, weight_decay=weight_decay, eps=1e-9)
771
- # self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1)
772
-
773
- if num_labels == 1:
774
- self.criterion = nn.MSELoss()
775
- elif num_labels == 2:
776
- self.criterion = nn.CrossEntropyLoss()
777
- elif num_labels > 2:
778
- self.criterion = nn.BCEWithLogitsLoss()
779
-
780
- self.ece_criterion = ECE().to(self.device)
781
-
782
- self.log_freq = log_freq
783
- self.workspace_name = workspace_name
784
- self.save_model = False
785
- self.avg_loss = 10000
786
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
787
  print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()]))
788
 
789
  def train(self, epoch):
790
  self.iteration(epoch, self.train_data)
791
 
792
- <<<<<<< HEAD
793
  # def val(self, epoch):
794
  # self.iteration(epoch, self.val_data, phase="val")
795
 
@@ -799,12 +544,6 @@ class BERTFineTuneTrainer1:
799
  self.iteration(epoch, self.test_data, phase="test")
800
 
801
  def iteration(self, epoch, data_loader, phase="train"):
802
- =======
803
- def test(self, epoch):
804
- self.iteration(epoch, self.test_data, train=False)
805
-
806
- def iteration(self, epoch, data_loader, train=True):
807
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
808
  """
809
  loop over the data_loader for training or testing
810
  if on train status, backward operation is activated
@@ -815,26 +554,10 @@ class BERTFineTuneTrainer1:
815
  :param train: boolean value of is train or test
816
  :return: None
817
  """
818
- <<<<<<< HEAD
819
 
820
  # Setting the tqdm progress bar
821
  data_iter = tqdm.tqdm(enumerate(data_loader),
822
  desc="EP_%s:%d" % (phase, epoch),
823
- =======
824
- str_code = "train" if train else "test"
825
-
826
- self.log_file = f"{self.workspace_name}/logs/masked/log_{str_code}_FS_finetuned.txt"
827
-
828
- if epoch == 0:
829
- f = open(self.log_file, 'w')
830
- f.close()
831
- if not train:
832
- self.avg_loss = 10000
833
-
834
- # Setting the tqdm progress bar
835
- data_iter = tqdm.tqdm(enumerate(data_loader),
836
- desc="EP_%s:%d" % (str_code, epoch),
837
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
838
  total=len(data_loader),
839
  bar_format="{l_bar}{r_bar}")
840
 
@@ -843,7 +566,6 @@ class BERTFineTuneTrainer1:
843
  total_element = 0
844
  plabels = []
845
  tlabels = []
846
- <<<<<<< HEAD
847
  probabs = []
848
 
849
  if phase == "train":
@@ -864,43 +586,10 @@ class BERTFineTuneTrainer1:
864
  logits = self.model.forward(data["input"], data["segment_label"])#, data["feat"])
865
 
866
  loss = self.criterion(logits, data["label"])
867
- =======
868
- eval_accurate_nb = 0
869
- nb_eval_examples = 0
870
- logits_list = []
871
- labels_list = []
872
-
873
- if train:
874
- self.model.train()
875
- else:
876
- self.model.eval()
877
-
878
- with open(self.log_file, 'a') as f:
879
- sys.stdout = f
880
-
881
- for i, data in data_iter:
882
- # 0. batch_data will be sent into the device(GPU or cpu)
883
- data = {key: value.to(self.device) for key, value in data.items()}
884
- if train:
885
- h_rep, logits = self.model.forward(data["bert_input"], data["segment_label"])
886
- else:
887
- with torch.no_grad():
888
- h_rep, logits = self.model.forward(data["bert_input"], data["segment_label"])
889
- # print(logits, logits.shape)
890
- logits_list.append(logits.cpu())
891
- labels_list.append(data["progress_status"].cpu())
892
- # print(">>>>>>>>>>>>", progress_output)
893
- # print(f"{epoch}---nelement--- {data['progress_status'].nelement()}")
894
- # print(data["progress_status"].shape, logits.shape)
895
- progress_loss = self.criterion(logits, data["progress_status"])
896
- loss = progress_loss
897
-
898
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
899
  if torch.cuda.device_count() > 1:
900
  loss = loss.mean()
901
 
902
  # 3. backward and optimization only in train
903
- <<<<<<< HEAD
904
  if phase == "train":
905
  self.optim_schedule.zero_grad()
906
  loss.backward()
@@ -969,108 +658,10 @@ class BERTFineTuneTrainer1:
969
  sys.stdout = sys.__stdout__
970
 
971
  if phase == "test":
972
- =======
973
- if train:
974
- self.optim.zero_grad()
975
- loss.backward()
976
- torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
977
- self.optim.step()
978
-
979
- # progress prediction accuracy
980
- # correct = progress_output.argmax(dim=-1).eq(data["progress_status"]).sum().item()
981
- probs = nn.LogSoftmax(dim=-1)(logits)
982
- predicted_labels = torch.argmax(probs, dim=-1)
983
- true_labels = torch.argmax(data["progress_status"], dim=-1)
984
- plabels.extend(predicted_labels.cpu().numpy())
985
- tlabels.extend(true_labels.cpu().numpy())
986
-
987
- # print(">>>>>>>>>>>>>>", predicted_labels, true_labels)
988
- # Compare predicted labels to true labels and calculate accuracy
989
- correct = (predicted_labels == true_labels).sum().item()
990
- avg_loss += loss.item()
991
- total_correct += correct
992
- total_element += true_labels.nelement()
993
-
994
- if train:
995
- post_fix = {
996
- "epoch": epoch,
997
- "iter": i,
998
- "avg_loss": avg_loss / (i + 1),
999
- "avg_acc": total_correct / total_element * 100,
1000
- "loss": loss.item()
1001
- }
1002
- else:
1003
- logits = logits.detach().cpu().numpy()
1004
- label_ids = data["progress_status"].to('cpu').numpy()
1005
- tmp_eval_nb = accurate_nb(logits, label_ids)
1006
-
1007
- eval_accurate_nb += tmp_eval_nb
1008
- nb_eval_examples += label_ids.shape[0]
1009
-
1010
- total_element += data["progress_status"].nelement()
1011
- # avg_loss += loss.item()
1012
-
1013
- post_fix = {
1014
- "epoch": epoch,
1015
- "iter": i,
1016
- "avg_loss": avg_loss / (i + 1),
1017
- "avg_acc": tmp_eval_nb / total_element * 100,
1018
- "loss": loss.item()
1019
- }
1020
-
1021
-
1022
- if i % self.log_freq == 0:
1023
- data_iter.write(str(post_fix))
1024
-
1025
- # precisions = precision_score(plabels, tlabels, average="weighted")
1026
- # recalls = recall_score(plabels, tlabels, average="weighted")
1027
- f1_scores = f1_score(plabels, tlabels, average="weighted")
1028
- if train:
1029
- final_msg = {
1030
- "epoch": f"EP{epoch}_{str_code}",
1031
- "avg_loss": avg_loss / len(data_iter),
1032
- "total_acc": total_correct * 100.0 / total_element,
1033
- # "precisions": precisions,
1034
- # "recalls": recalls,
1035
- "f1_scores": f1_scores
1036
- }
1037
- else:
1038
- eval_accuracy = eval_accurate_nb/nb_eval_examples
1039
-
1040
- logits_ece = torch.cat(logits_list)
1041
- labels_ece = torch.cat(labels_list)
1042
- ece = self.ece_criterion(logits_ece, labels_ece).item()
1043
- final_msg = {
1044
- "epoch": f"EP{epoch}_{str_code}",
1045
- "eval_accuracy": eval_accuracy,
1046
- "ece": ece,
1047
- "avg_loss": avg_loss / len(data_iter),
1048
- # "precisions": precisions,
1049
- # "recalls": recalls,
1050
- "f1_scores": f1_scores
1051
- }
1052
- if self.save_model:
1053
- conf_hist = visualization.ConfidenceHistogram()
1054
- plt_test = conf_hist.plot(np.array(logits_ece), np.array(labels_ece), title= f"Confidence Histogram {epoch}")
1055
- plt_test.savefig(f"{self.workspace_name}/plots/confidence_histogram/FS/conf_histogram_test_{epoch}.png",bbox_inches='tight')
1056
- plt_test.close()
1057
-
1058
- rel_diagram = visualization.ReliabilityDiagram()
1059
- plt_test_2 = rel_diagram.plot(np.array(logits_ece), np.array(labels_ece),title=f"Reliability Diagram {epoch}")
1060
- plt_test_2.savefig(f"{self.workspace_name}/plots/confidence_histogram/FS/rel_diagram_test_{epoch}.png",bbox_inches='tight')
1061
- plt_test_2.close()
1062
- print(final_msg)
1063
-
1064
- # print("EP%d_%s, avg_loss=" % (epoch, str_code), avg_loss / len(data_iter), "total_acc=", total_correct * 100.0 / total_element)
1065
- f.close()
1066
- sys.stdout = sys.__stdout__
1067
- if train:
1068
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
1069
  self.save_model = False
1070
  if self.avg_loss > (avg_loss / len(data_iter)):
1071
  self.save_model = True
1072
  self.avg_loss = (avg_loss / len(data_iter))
1073
- <<<<<<< HEAD
1074
 
1075
  def iteration_1(self, epoch_idx, data):
1076
  try:
@@ -1094,11 +685,6 @@ class BERTFineTuneTrainer1:
1094
  print(f"Error during iteration: {e}")
1095
  raise
1096
 
1097
- =======
1098
-
1099
- # plt_test.show()
1100
- # print("EP%d_%s, " % (epoch, str_code))
1101
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
1102
 
1103
  def save(self, epoch, file_path="output/bert_fine_tuned_trained.model"):
1104
  """
@@ -1113,7 +699,6 @@ class BERTFineTuneTrainer1:
1113
  self.model.to(self.device)
1114
  print("EP:%d Model Saved on:" % epoch, output_path)
1115
  return output_path
1116
- <<<<<<< HEAD
1117
 
1118
 
1119
  class BERTAttention:
@@ -1221,5 +806,3 @@ class BERTAttention:
1221
 
1222
 
1223
 
1224
- =======
1225
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
 
1
  import torch
2
  import torch.nn as nn
 
3
  # from torch.nn import functional as F
4
  from torch.optim import Adam
5
  from torch.utils.data import DataLoader
 
35
  train_dataloader: DataLoader, val_dataloader: DataLoader = None, test_dataloader: DataLoader = None,
36
  lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=5000,
37
  with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, log_folder_path: str = None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  """
39
  :param bert: BERT model which you want to train
40
  :param vocab_size: total word vocab size
 
47
  :param log_freq: logging frequency of the batch iteration
48
  """
49
 
 
50
  cuda_condition = torch.cuda.is_available() and with_cuda
51
  self.device = torch.device("cuda:0" if cuda_condition else "cpu")
52
  print(cuda_condition, " Device used = ", self.device)
 
56
  # This BERT model will be saved
57
  self.bert = bert.to(self.device)
58
  # Initialize the BERT Sequence Model, with BERT model
 
 
 
 
 
 
 
 
 
 
59
  self.model = BERTSM(bert, vocab_size).to(self.device)
60
 
61
  # Distributed GPU training if CUDA can detect more than 1 GPU
62
  if with_cuda and torch.cuda.device_count() > 1:
63
  print("Using %d GPUS for BERT" % torch.cuda.device_count())
 
64
  self.model = nn.DataParallel(self.model, device_ids=available_gpus)
65
 
66
  # Setting the train, validation and test data loader
67
  self.train_data = train_dataloader
68
  self.val_data = val_dataloader
 
 
 
 
 
 
69
  self.test_data = test_dataloader
70
 
71
  # Setting the Adam optimizer with hyper-param
 
76
  self.criterion = nn.NLLLoss(ignore_index=0)
77
 
78
  self.log_freq = log_freq
 
79
  self.log_folder_path = log_folder_path
80
  # self.workspace_name = workspace_name
81
  self.save_model = False
 
86
  f.close()
87
  self.start_time = time.time()
88
 
 
 
 
 
 
 
89
  print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()]))
90
 
91
  def train(self, epoch):
92
  self.iteration(epoch, self.train_data)
93
 
 
94
  def val(self, epoch):
95
  if epoch == 0:
96
  self.avg_loss = 10000
 
100
  self.iteration(epoch, self.test_data, phase="test")
101
 
102
  def iteration(self, epoch, data_loader, phase="train"):
 
 
 
 
 
 
103
  """
104
  loop over the data_loader for training or testing
105
  if on train status, backward operation is activated
 
110
  :param train: boolean value of is train or test
111
  :return: None
112
  """
 
113
 
114
  # self.log_file = f"{self.workspace_name}/logs/{self.code}/log_{phase}_pretrained.txt"
115
  # bert_hidden_representations = [] can be used
 
132
  else:
133
  self.model.eval()
134
  with open(self.log_folder_path+f"/log_{phase}_pretrained.txt", 'a') as f:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  sys.stdout = f
136
  for i, data in data_iter:
137
  # 0. batch_data will be sent into the device(GPU or cpu)
138
  data = {key: value.to(self.device) for key, value in data.items()}
 
139
 
140
  # 1. forward masked_sm model
141
  # mask_sm_output is log-probabilities output
 
148
 
149
  # 3. backward and optimization only in train
150
  if phase == "train":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  self.optim_schedule.zero_grad()
152
  loss.backward()
153
  self.optim_schedule.step_and_update_lr()
154
 
 
155
  # tokens with highest log-probabilities creates a predicted sequence
156
  pred_tokens = torch.argmax(mask_sm_output, dim=-1)
157
  mask_correct = (data["bert_label"] == pred_tokens) & data["masked_pos"]
 
188
  if self.avg_loss > (avg_loss / len(data_iter)):
189
  self.save_model = True
190
  self.avg_loss = (avg_loss / len(data_iter))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
  def save(self, epoch, file_path="output/bert_trained.model"):
193
  """
 
209
  def __init__(self, bert: BERT, vocab_size: int,
210
  train_dataloader: DataLoader, test_dataloader: DataLoader = None,
211
  lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=10000,
 
212
  with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, workspace_name=None,
213
  num_labels=2, log_folder_path: str = None):
 
 
 
214
  """
215
  :param bert: BERT model which you want to train
216
  :param vocab_size: total word vocab size
 
226
  # Setup cuda device for BERT training, argument -c, --cuda should be true
227
  cuda_condition = torch.cuda.is_available() and with_cuda
228
  self.device = torch.device("cuda:0" if cuda_condition else "cpu")
 
229
  print(cuda_condition, " Device used = ", self.device)
230
 
231
  available_gpus = list(range(torch.cuda.device_count()))
 
234
  self.bert = bert
235
  for param in self.bert.parameters():
236
  param.requires_grad = False
237
+
238
+ # for name, param in self.bert.named_parameters():
239
+ # if '.attention.linear_layers.0' in name or \
240
+ # '.attention.linear_layers.1' in name or \
241
+ # '.attention.linear_layers.2' in name:
242
+ # # if 'transformer_blocks.' in name:# or \
243
+ # # 'transformer_blocks.3.' in name:
244
+ # # if '2.attention.linear_layers.' in name or \
245
+ # # '3.attention.linear_layers.' in name:
246
+ # param.requires_grad = True
247
  # Initialize the BERT Language Model, with BERT model
248
  # self.model = BERTForClassification(self.bert, vocab_size, num_labels).to(self.device)
249
  # self.model = BERTForClassificationWithFeats(self.bert, num_labels, 8).to(self.device)
 
530
  for fi in ['train', 'test']: #'val',
531
  f = open(self.log_folder_path+f"/log_{fi}_finetuned.txt", 'w')
532
  f.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
533
  print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()]))
534
 
535
  def train(self, epoch):
536
  self.iteration(epoch, self.train_data)
537
 
 
538
  # def val(self, epoch):
539
  # self.iteration(epoch, self.val_data, phase="val")
540
 
 
544
  self.iteration(epoch, self.test_data, phase="test")
545
 
546
  def iteration(self, epoch, data_loader, phase="train"):
 
 
 
 
 
 
547
  """
548
  loop over the data_loader for training or testing
549
  if on train status, backward operation is activated
 
554
  :param train: boolean value of is train or test
555
  :return: None
556
  """
 
557
 
558
  # Setting the tqdm progress bar
559
  data_iter = tqdm.tqdm(enumerate(data_loader),
560
  desc="EP_%s:%d" % (phase, epoch),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561
  total=len(data_loader),
562
  bar_format="{l_bar}{r_bar}")
563
 
 
566
  total_element = 0
567
  plabels = []
568
  tlabels = []
 
569
  probabs = []
570
 
571
  if phase == "train":
 
586
  logits = self.model.forward(data["input"], data["segment_label"])#, data["feat"])
587
 
588
  loss = self.criterion(logits, data["label"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
589
  if torch.cuda.device_count() > 1:
590
  loss = loss.mean()
591
 
592
  # 3. backward and optimization only in train
 
593
  if phase == "train":
594
  self.optim_schedule.zero_grad()
595
  loss.backward()
 
658
  sys.stdout = sys.__stdout__
659
 
660
  if phase == "test":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
661
  self.save_model = False
662
  if self.avg_loss > (avg_loss / len(data_iter)):
663
  self.save_model = True
664
  self.avg_loss = (avg_loss / len(data_iter))
 
665
 
666
  def iteration_1(self, epoch_idx, data):
667
  try:
 
685
  print(f"Error during iteration: {e}")
686
  raise
687
 
 
 
 
 
 
688
 
689
  def save(self, epoch, file_path="output/bert_fine_tuned_trained.model"):
690
  """
 
699
  self.model.to(self.device)
700
  print("EP:%d Model Saved on:" % epoch, output_path)
701
  return output_path
 
702
 
703
 
704
  class BERTAttention:
 
806
 
807
 
808
 
 
 
src/seq_model.py CHANGED
@@ -1,10 +1,6 @@
1
  import torch.nn as nn
2
 
3
- <<<<<<< HEAD
4
  from .bert import BERT
5
- =======
6
- from bert import BERT
7
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
8
 
9
 
10
  class BERTSM(nn.Module):
@@ -22,23 +18,10 @@ class BERTSM(nn.Module):
22
  super().__init__()
23
  self.bert = bert
24
  self.mask_lm = MaskedSequenceModel(self.bert.hidden, vocab_size)
25
- <<<<<<< HEAD
26
 
27
  def forward(self, x, segment_label):
28
  x = self.bert(x, segment_label)
29
  return self.mask_lm(x), x[:, 0]
30
- =======
31
- self.same_student = SameStudentPrediction(self.bert.hidden)
32
-
33
- def forward(self, x, segment_label, pred=False):
34
- x = self.bert(x, segment_label)
35
- # torch.Size([32, 200, 512])
36
- # print("???????????? ",x.shape)
37
- if pred:
38
- return x[:, 0], self.mask_lm(x), self.same_student(x)
39
- else:
40
- return x[:, 0], self.mask_lm(x)
41
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
42
 
43
 
44
  class MaskedSequenceModel(nn.Module):
@@ -57,23 +40,4 @@ class MaskedSequenceModel(nn.Module):
57
  self.softmax = nn.LogSoftmax(dim=-1)
58
 
59
  def forward(self, x):
60
- <<<<<<< HEAD
61
- return self.softmax(self.linear(x))
62
- =======
63
- return self.softmax(self.linear(x))
64
-
65
-
66
- class SameStudentPrediction(nn.Module):
67
-
68
- def __init__(self, hidden):
69
- """
70
- :param hidden: BERT model output size
71
- """
72
- super().__init__()
73
- self.linear = nn.Linear(hidden, 2)
74
- self.softmax = nn.LogSoftmax(dim=-1)
75
-
76
- def forward(self, x):
77
- return self.softmax(self.linear(x[:, 0]))
78
-
79
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
 
1
  import torch.nn as nn
2
 
 
3
  from .bert import BERT
 
 
 
4
 
5
 
6
  class BERTSM(nn.Module):
 
18
  super().__init__()
19
  self.bert = bert
20
  self.mask_lm = MaskedSequenceModel(self.bert.hidden, vocab_size)
 
21
 
22
  def forward(self, x, segment_label):
23
  x = self.bert(x, segment_label)
24
  return self.mask_lm(x), x[:, 0]
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
 
27
  class MaskedSequenceModel(nn.Module):
 
40
  self.softmax = nn.LogSoftmax(dim=-1)
41
 
42
  def forward(self, x):
43
+ return self.softmax(self.linear(x))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/transformer.py CHANGED
@@ -1,12 +1,7 @@
1
  import torch.nn as nn
2
 
3
- <<<<<<< HEAD
4
  from .attention import MultiHeadedAttention
5
  from .transformer_component import SublayerConnection, PositionwiseFeedForward
6
- =======
7
- from attention import MultiHeadedAttention
8
- from transformer_component import SublayerConnection, PositionwiseFeedForward
9
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
10
 
11
  class TransformerBlock(nn.Module):
12
  """
@@ -30,12 +25,8 @@ class TransformerBlock(nn.Module):
30
  self.dropout = nn.Dropout(p=dropout)
31
 
32
  def forward(self, x, mask):
33
- <<<<<<< HEAD
34
  attn_output, p_attn = self.attention.forward(x, x, x, mask=mask)
35
  self.p_attn = p_attn.cpu().detach().numpy()
36
  x = self.input_sublayer(x, lambda _x: attn_output)
37
- =======
38
- x = self.input_sublayer(x, lambda _x: self.attention.forward(_x, _x, _x, mask=mask))
39
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
40
  x = self.output_sublayer(x, self.feed_forward)
41
  return self.dropout(x)
 
1
  import torch.nn as nn
2
 
 
3
  from .attention import MultiHeadedAttention
4
  from .transformer_component import SublayerConnection, PositionwiseFeedForward
 
 
 
 
5
 
6
  class TransformerBlock(nn.Module):
7
  """
 
25
  self.dropout = nn.Dropout(p=dropout)
26
 
27
  def forward(self, x, mask):
 
28
  attn_output, p_attn = self.attention.forward(x, x, x, mask=mask)
29
  self.p_attn = p_attn.cpu().detach().numpy()
30
  x = self.input_sublayer(x, lambda _x: attn_output)
 
 
 
31
  x = self.output_sublayer(x, self.feed_forward)
32
  return self.dropout(x)
src/vocab.py CHANGED
@@ -1,22 +1,16 @@
1
  import collections
2
  import tqdm
3
- <<<<<<< HEAD
4
  import os
5
  from pathlib import Path
6
 
7
  head_directory = Path(__file__).resolve().parent.parent
8
  # print(head_directory)
9
  os.chdir(head_directory)
10
- =======
11
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
12
 
13
  class Vocab(object):
14
  """
15
  Special tokens predefined in the vocab file are:
16
- <<<<<<< HEAD
17
  -[PAD]
18
- =======
19
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
20
  -[UNK]
21
  -[MASK]
22
  -[CLS]
@@ -48,11 +42,7 @@ class Vocab(object):
48
  words = [self.invocab[index] if index < len(self.invocab)
49
  else "[%d]" % index for index in seq ]
50
 
51
- <<<<<<< HEAD
52
  return words #" ".join(words)
53
- =======
54
- return " ".join(words)
55
- >>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
56
 
57
 
58
  # if __init__ == "__main__":
 
1
  import collections
2
  import tqdm
 
3
  import os
4
  from pathlib import Path
5
 
6
  head_directory = Path(__file__).resolve().parent.parent
7
  # print(head_directory)
8
  os.chdir(head_directory)
 
 
9
 
10
  class Vocab(object):
11
  """
12
  Special tokens predefined in the vocab file are:
 
13
  -[PAD]
 
 
14
  -[UNK]
15
  -[MASK]
16
  -[CLS]
 
42
  words = [self.invocab[index] if index < len(self.invocab)
43
  else "[%d]" % index for index in seq ]
44
 
 
45
  return words #" ".join(words)
 
 
 
46
 
47
 
48
  # if __init__ == "__main__":