Spaces:
Running
Running
removed head
Browse files- app.py +15 -22
- src/attention.py +0 -24
- src/bert.py +0 -20
- src/classifier_model.py +1 -25
- src/dataset.py +0 -229
- src/pretrainer.py +10 -427
- src/seq_model.py +1 -37
- src/transformer.py +0 -9
- src/vocab.py +0 -10
app.py
CHANGED
@@ -101,24 +101,22 @@ import shutil
|
|
101 |
import matplotlib.pyplot as plt
|
102 |
from sklearn.metrics import roc_curve, auc
|
103 |
# Define the function to process the input file and model selection
|
104 |
-
|
105 |
def process_file(file,label,info, model_name):
|
106 |
-
|
107 |
-
def process_file(file,label, model_name):
|
108 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
109 |
with open(file.name, 'r') as f:
|
110 |
content = f.read()
|
111 |
saved_test_dataset = "train.txt"
|
112 |
saved_test_label = "train_label.txt"
|
113 |
-
|
114 |
saved_train_info="train_info.txt"
|
115 |
-
|
116 |
-
|
117 |
|
118 |
# Save the uploaded file content to a specified location
|
119 |
shutil.copyfile(file.name, saved_test_dataset)
|
120 |
shutil.copyfile(label.name, saved_test_label)
|
121 |
-
|
122 |
shutil.copyfile(info.name, saved_train_info)
|
123 |
# For demonstration purposes, we'll just return the content with the selected model name
|
124 |
# if(model_name=="highGRschool10"):
|
@@ -142,7 +140,7 @@ def process_file(file,label, model_name):
|
|
142 |
"-e",str(1),
|
143 |
"-b",str(5)
|
144 |
], shell=True)
|
145 |
-
|
146 |
# For demonstration purposes, we'll just return the content with the selected model name
|
147 |
if(model_name=="FS"):
|
148 |
checkpoint="ratio_proportion_change3/output/FS/bert_fine_tuned.model.ep32"
|
@@ -159,7 +157,7 @@ def process_file(file,label, model_name):
|
|
159 |
subprocess.run(["python", "src/test_saved_model.py",
|
160 |
"--finetuned_bert_checkpoint",checkpoint
|
161 |
])
|
162 |
-
|
163 |
result = {}
|
164 |
with open("result.txt", 'r') as file:
|
165 |
for line in file:
|
@@ -194,11 +192,9 @@ def process_file(file,label, model_name):
|
|
194 |
return text_output,plot_path
|
195 |
|
196 |
# List of models for the dropdown menu
|
197 |
-
|
198 |
models = ["highGRschool10", "lowGRschoolAll", "fullTest"]
|
199 |
-
|
200 |
-
models = ["FS", "IS", "CORRECTNESS","EFFECTIVENESS"]
|
201 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
202 |
|
203 |
# Create the Gradio interface
|
204 |
with gr.Blocks(css="""
|
@@ -388,25 +384,22 @@ tbody.svelte-18wv37q>tr.svelte-18wv37q:nth-child(odd) {
|
|
388 |
with gr.Row():
|
389 |
file_input = gr.File(label="Upload a test file", file_types=['.txt'], elem_classes="file-box")
|
390 |
label_input = gr.File(label="Upload test labels", file_types=['.txt'], elem_classes="file-box")
|
391 |
-
|
392 |
info_input = gr.File(label="Upload test info", file_types=['.txt'], elem_classes="file-box")
|
393 |
|
394 |
model_dropdown = gr.Dropdown(choices=models, label="Select Finetune Task", elem_classes="dropdown-menu")
|
395 |
-
|
396 |
|
397 |
-
|
398 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
399 |
|
400 |
with gr.Row():
|
401 |
output_text = gr.Textbox(label="Output Text")
|
402 |
output_image = gr.Image(label="Output Plot")
|
403 |
|
404 |
btn = gr.Button("Submit")
|
405 |
-
|
406 |
btn.click(fn=process_file, inputs=[file_input,label_input,info_input, model_dropdown], outputs=[output_text,output_image])
|
407 |
-
|
408 |
-
btn.click(fn=process_file, inputs=[file_input,label_input, model_dropdown], outputs=[output_text,output_image])
|
409 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
410 |
|
411 |
# Launch the app
|
412 |
demo.launch()
|
|
|
101 |
import matplotlib.pyplot as plt
|
102 |
from sklearn.metrics import roc_curve, auc
|
103 |
# Define the function to process the input file and model selection
|
104 |
+
|
105 |
def process_file(file,label,info, model_name):
|
106 |
+
|
|
|
|
|
107 |
with open(file.name, 'r') as f:
|
108 |
content = f.read()
|
109 |
saved_test_dataset = "train.txt"
|
110 |
saved_test_label = "train_label.txt"
|
111 |
+
|
112 |
saved_train_info="train_info.txt"
|
113 |
+
|
114 |
+
|
115 |
|
116 |
# Save the uploaded file content to a specified location
|
117 |
shutil.copyfile(file.name, saved_test_dataset)
|
118 |
shutil.copyfile(label.name, saved_test_label)
|
119 |
+
|
120 |
shutil.copyfile(info.name, saved_train_info)
|
121 |
# For demonstration purposes, we'll just return the content with the selected model name
|
122 |
# if(model_name=="highGRschool10"):
|
|
|
140 |
"-e",str(1),
|
141 |
"-b",str(5)
|
142 |
], shell=True)
|
143 |
+
|
144 |
# For demonstration purposes, we'll just return the content with the selected model name
|
145 |
if(model_name=="FS"):
|
146 |
checkpoint="ratio_proportion_change3/output/FS/bert_fine_tuned.model.ep32"
|
|
|
157 |
subprocess.run(["python", "src/test_saved_model.py",
|
158 |
"--finetuned_bert_checkpoint",checkpoint
|
159 |
])
|
160 |
+
|
161 |
result = {}
|
162 |
with open("result.txt", 'r') as file:
|
163 |
for line in file:
|
|
|
192 |
return text_output,plot_path
|
193 |
|
194 |
# List of models for the dropdown menu
|
195 |
+
|
196 |
models = ["highGRschool10", "lowGRschoolAll", "fullTest"]
|
197 |
+
|
|
|
|
|
198 |
|
199 |
# Create the Gradio interface
|
200 |
with gr.Blocks(css="""
|
|
|
384 |
with gr.Row():
|
385 |
file_input = gr.File(label="Upload a test file", file_types=['.txt'], elem_classes="file-box")
|
386 |
label_input = gr.File(label="Upload test labels", file_types=['.txt'], elem_classes="file-box")
|
387 |
+
|
388 |
info_input = gr.File(label="Upload test info", file_types=['.txt'], elem_classes="file-box")
|
389 |
|
390 |
model_dropdown = gr.Dropdown(choices=models, label="Select Finetune Task", elem_classes="dropdown-menu")
|
391 |
+
|
392 |
|
393 |
+
|
|
|
394 |
|
395 |
with gr.Row():
|
396 |
output_text = gr.Textbox(label="Output Text")
|
397 |
output_image = gr.Image(label="Output Plot")
|
398 |
|
399 |
btn = gr.Button("Submit")
|
400 |
+
|
401 |
btn.click(fn=process_file, inputs=[file_input,label_input,info_input, model_dropdown], outputs=[output_text,output_image])
|
402 |
+
|
|
|
|
|
403 |
|
404 |
# Launch the app
|
405 |
demo.launch()
|
src/attention.py
CHANGED
@@ -3,19 +3,11 @@ import torch.nn.functional as F
|
|
3 |
import torch
|
4 |
|
5 |
import math
|
6 |
-
<<<<<<< HEAD
|
7 |
import pickle
|
8 |
|
9 |
class Attention(nn.Module):
|
10 |
"""
|
11 |
Compute Scaled Dot Product Attention
|
12 |
-
=======
|
13 |
-
|
14 |
-
|
15 |
-
class Attention(nn.Module):
|
16 |
-
"""
|
17 |
-
Compute 'Scaled Dot Product Attention
|
18 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
19 |
"""
|
20 |
|
21 |
def __init__(self):
|
@@ -53,10 +45,6 @@ class MultiHeadedAttention(nn.Module):
|
|
53 |
self.linear_layers = nn.ModuleList([nn.Linear(d_model, d_model) for _ in range(3)])
|
54 |
self.output_linear = nn.Linear(d_model, d_model)
|
55 |
self.attention = Attention()
|
56 |
-
<<<<<<< HEAD
|
57 |
-
=======
|
58 |
-
|
59 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
60 |
self.dropout = nn.Dropout(p=dropout)
|
61 |
|
62 |
def forward(self, query, key, value, mask=None):
|
@@ -70,21 +58,9 @@ class MultiHeadedAttention(nn.Module):
|
|
70 |
query, key, value = [l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2)
|
71 |
for l, x in zip(self.linear_layers, (query, key, value))]
|
72 |
# 2) Apply attention on all the projected vectors in batch.
|
73 |
-
<<<<<<< HEAD
|
74 |
x, p_attn = self.attention(query, key, value, mask=mask, dropout=self.dropout)
|
75 |
|
76 |
# 3) "Concat" using a view and apply a final linear.
|
77 |
x = x.transpose(1, 2).contiguous().view(nbatches, -1, self.h * self.d_k)
|
78 |
|
79 |
return self.output_linear(x), p_attn
|
80 |
-
=======
|
81 |
-
x, attn = self.attention(query, key, value, mask=mask, dropout=self.dropout)
|
82 |
-
# torch.Size([64, 8, 100, 100])
|
83 |
-
# print("Attention", attn.shape)
|
84 |
-
|
85 |
-
# 3) "Concat" using a view and apply a final linear.
|
86 |
-
x = x.transpose(1, 2).contiguous().view(nbatches, -1, self.h * self.d_k)
|
87 |
-
|
88 |
-
return self.output_linear(x)
|
89 |
-
|
90 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
|
|
3 |
import torch
|
4 |
|
5 |
import math
|
|
|
6 |
import pickle
|
7 |
|
8 |
class Attention(nn.Module):
|
9 |
"""
|
10 |
Compute Scaled Dot Product Attention
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
"""
|
12 |
|
13 |
def __init__(self):
|
|
|
45 |
self.linear_layers = nn.ModuleList([nn.Linear(d_model, d_model) for _ in range(3)])
|
46 |
self.output_linear = nn.Linear(d_model, d_model)
|
47 |
self.attention = Attention()
|
|
|
|
|
|
|
|
|
48 |
self.dropout = nn.Dropout(p=dropout)
|
49 |
|
50 |
def forward(self, query, key, value, mask=None):
|
|
|
58 |
query, key, value = [l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2)
|
59 |
for l, x in zip(self.linear_layers, (query, key, value))]
|
60 |
# 2) Apply attention on all the projected vectors in batch.
|
|
|
61 |
x, p_attn = self.attention(query, key, value, mask=mask, dropout=self.dropout)
|
62 |
|
63 |
# 3) "Concat" using a view and apply a final linear.
|
64 |
x = x.transpose(1, 2).contiguous().view(nbatches, -1, self.h * self.d_k)
|
65 |
|
66 |
return self.output_linear(x), p_attn
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/bert.py
CHANGED
@@ -1,14 +1,8 @@
|
|
1 |
import torch.nn as nn
|
2 |
-
<<<<<<< HEAD
|
3 |
import torch
|
4 |
|
5 |
from .transformer import TransformerBlock
|
6 |
from .embedding import BERTEmbedding
|
7 |
-
=======
|
8 |
-
|
9 |
-
from transformer import TransformerBlock
|
10 |
-
from embedding import BERTEmbedding
|
11 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
12 |
|
13 |
class BERT(nn.Module):
|
14 |
"""
|
@@ -38,15 +32,11 @@ class BERT(nn.Module):
|
|
38 |
# multi-layers transformer blocks, deep network
|
39 |
self.transformer_blocks = nn.ModuleList(
|
40 |
[TransformerBlock(hidden, attn_heads, hidden * 4, dropout) for _ in range(n_layers)])
|
41 |
-
<<<<<<< HEAD
|
42 |
# self.attention_values = []
|
43 |
-
=======
|
44 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
45 |
|
46 |
def forward(self, x, segment_info):
|
47 |
# attention masking for padded token
|
48 |
# torch.ByteTensor([batch_size, 1, seq_len, seq_len)
|
49 |
-
<<<<<<< HEAD
|
50 |
|
51 |
device = x.device
|
52 |
|
@@ -68,15 +58,5 @@ class BERT(nn.Module):
|
|
68 |
for transformer in self.transformer_blocks:
|
69 |
x = transformer.forward(x, mask)
|
70 |
# self.attention_values.append(transformer.p_attn)
|
71 |
-
=======
|
72 |
-
mask = (x > 0).unsqueeze(1).repeat(1, x.size(1), 1).unsqueeze(1)
|
73 |
-
# print("bert mask: ", mask)
|
74 |
-
# embedding the indexed sequence to sequence of vectors
|
75 |
-
x = self.embedding(x, segment_info)
|
76 |
-
|
77 |
-
# running over multiple transformer blocks
|
78 |
-
for transformer in self.transformer_blocks:
|
79 |
-
x = transformer.forward(x, mask)
|
80 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
81 |
|
82 |
return x
|
|
|
1 |
import torch.nn as nn
|
|
|
2 |
import torch
|
3 |
|
4 |
from .transformer import TransformerBlock
|
5 |
from .embedding import BERTEmbedding
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
class BERT(nn.Module):
|
8 |
"""
|
|
|
32 |
# multi-layers transformer blocks, deep network
|
33 |
self.transformer_blocks = nn.ModuleList(
|
34 |
[TransformerBlock(hidden, attn_heads, hidden * 4, dropout) for _ in range(n_layers)])
|
|
|
35 |
# self.attention_values = []
|
|
|
|
|
36 |
|
37 |
def forward(self, x, segment_info):
|
38 |
# attention masking for padded token
|
39 |
# torch.ByteTensor([batch_size, 1, seq_len, seq_len)
|
|
|
40 |
|
41 |
device = x.device
|
42 |
|
|
|
58 |
for transformer in self.transformer_blocks:
|
59 |
x = transformer.forward(x, mask)
|
60 |
# self.attention_values.append(transformer.p_attn)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
return x
|
src/classifier_model.py
CHANGED
@@ -1,28 +1,17 @@
|
|
1 |
-
<<<<<<< HEAD
|
2 |
import torch
|
3 |
import torch.nn as nn
|
4 |
|
5 |
from .bert import BERT
|
6 |
-
=======
|
7 |
-
import torch.nn as nn
|
8 |
-
|
9 |
-
from bert import BERT
|
10 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
11 |
|
12 |
|
13 |
class BERTForClassification(nn.Module):
|
14 |
"""
|
15 |
-
<<<<<<< HEAD
|
16 |
Fine-tune Task Classifier Model
|
17 |
-
=======
|
18 |
-
Progress Classifier Model
|
19 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
20 |
"""
|
21 |
|
22 |
def __init__(self, bert: BERT, vocab_size, n_labels):
|
23 |
"""
|
24 |
:param bert: BERT model which should be trained
|
25 |
-
<<<<<<< HEAD
|
26 |
:param vocab_size: total vocab size
|
27 |
:param n_labels: number of labels for the task
|
28 |
"""
|
@@ -59,17 +48,4 @@ class BERTForClassificationWithFeats(nn.Module):
|
|
59 |
# x = self.linear1(x)
|
60 |
# x = self.RELU(x)
|
61 |
# return self.linear2(x)
|
62 |
-
return self.linear(x)
|
63 |
-
=======
|
64 |
-
:param vocab_size: total vocab size for masked_lm
|
65 |
-
"""
|
66 |
-
|
67 |
-
super().__init__()
|
68 |
-
self.bert = bert
|
69 |
-
self.linear = nn.Linear(self.bert.hidden, n_labels)
|
70 |
-
# self.softmax = nn.LogSoftmax(dim=-1)
|
71 |
-
|
72 |
-
def forward(self, x, segment_label):
|
73 |
-
x = self.bert(x, segment_label)
|
74 |
-
return x, self.linear(x[:, 0])
|
75 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
|
|
|
|
1 |
import torch
|
2 |
import torch.nn as nn
|
3 |
|
4 |
from .bert import BERT
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
|
7 |
class BERTForClassification(nn.Module):
|
8 |
"""
|
|
|
9 |
Fine-tune Task Classifier Model
|
|
|
|
|
|
|
10 |
"""
|
11 |
|
12 |
def __init__(self, bert: BERT, vocab_size, n_labels):
|
13 |
"""
|
14 |
:param bert: BERT model which should be trained
|
|
|
15 |
:param vocab_size: total vocab size
|
16 |
:param n_labels: number of labels for the task
|
17 |
"""
|
|
|
48 |
# x = self.linear1(x)
|
49 |
# x = self.RELU(x)
|
50 |
# return self.linear2(x)
|
51 |
+
return self.linear(x)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/dataset.py
CHANGED
@@ -4,28 +4,17 @@ import pandas as pd
|
|
4 |
import numpy as np
|
5 |
import tqdm
|
6 |
import random
|
7 |
-
<<<<<<< HEAD
|
8 |
from .vocab import Vocab
|
9 |
import pickle
|
10 |
import copy
|
11 |
# from sklearn.preprocessing import OneHotEncoder
|
12 |
-
=======
|
13 |
-
from vocab import Vocab
|
14 |
-
import pickle
|
15 |
-
import copy
|
16 |
-
from sklearn.preprocessing import OneHotEncoder
|
17 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
18 |
|
19 |
class PretrainerDataset(Dataset):
|
20 |
"""
|
21 |
Class name: PretrainDataset
|
22 |
|
23 |
"""
|
24 |
-
<<<<<<< HEAD
|
25 |
def __init__(self, dataset_path, vocab, seq_len=30, max_mask=0.15):
|
26 |
-
=======
|
27 |
-
def __init__(self, dataset_path, vocab, seq_len=30, select_next_seq= False):
|
28 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
29 |
self.dataset_path = dataset_path
|
30 |
self.vocab = vocab # Vocab object
|
31 |
|
@@ -46,7 +35,6 @@ class PretrainerDataset(Dataset):
|
|
46 |
self.index_documents[i] = []
|
47 |
else:
|
48 |
self.index_documents[i].append(index)
|
49 |
-
<<<<<<< HEAD
|
50 |
self.lines.append(line.split("\t"))
|
51 |
len_line = len(line.split("\t"))
|
52 |
seq_len_list.append(len_line)
|
@@ -61,22 +49,6 @@ class PretrainerDataset(Dataset):
|
|
61 |
print("Sequence length set at: ", self.seq_len)
|
62 |
self.max_mask = max_mask
|
63 |
print("% of input tokens selected for masking : ",self.max_mask)
|
64 |
-
=======
|
65 |
-
self.lines.append(line.split())
|
66 |
-
len_line = len(line.split())
|
67 |
-
seq_len_list.append(len_line)
|
68 |
-
index+=1
|
69 |
-
reader.close()
|
70 |
-
print("Sequence Stats: ", len(seq_len_list), min(seq_len_list), max(seq_len_list), sum(seq_len_list)/len(seq_len_list))
|
71 |
-
print("Unique Sequences: ", len({tuple(ll) for ll in self.lines}))
|
72 |
-
self.index_documents = {k:v for k,v in self.index_documents.items() if v}
|
73 |
-
self.seq_len = seq_len
|
74 |
-
self.max_mask_per_seq = 0.15
|
75 |
-
self.select_next_seq = select_next_seq
|
76 |
-
print("Sequence length set at ", self.seq_len)
|
77 |
-
print("select_next_seq: ", self.select_next_seq)
|
78 |
-
print(len(self.index_documents))
|
79 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
80 |
|
81 |
|
82 |
def __len__(self):
|
@@ -84,7 +56,6 @@ class PretrainerDataset(Dataset):
|
|
84 |
|
85 |
def __getitem__(self, item):
|
86 |
token_a = self.lines[item]
|
87 |
-
<<<<<<< HEAD
|
88 |
# sa_masked = None
|
89 |
# sa_masked_label = None
|
90 |
# token_b = None
|
@@ -130,44 +101,6 @@ class PretrainerDataset(Dataset):
|
|
130 |
|
131 |
# print(item, len(s1), len(s1_label), len(segment_label))
|
132 |
# print(f"{item}.")
|
133 |
-
=======
|
134 |
-
token_b = None
|
135 |
-
is_same_student = None
|
136 |
-
sa_masked = None
|
137 |
-
sa_masked_label = None
|
138 |
-
sb_masked = None
|
139 |
-
sb_masked_label = None
|
140 |
-
|
141 |
-
if self.select_next_seq:
|
142 |
-
is_same_student, token_b = self.get_token_b(item)
|
143 |
-
is_same_student = 1 if is_same_student else 0
|
144 |
-
token_a1, token_b1 = self.truncate_to_max_seq(token_a, token_b)
|
145 |
-
sa_masked, sa_masked_label = self.random_mask_seq(token_a1)
|
146 |
-
sb_masked, sb_masked_label = self.random_mask_seq(token_b1)
|
147 |
-
else:
|
148 |
-
token_a = token_a[:self.seq_len-2]
|
149 |
-
sa_masked, sa_masked_label = self.random_mask_seq(token_a)
|
150 |
-
|
151 |
-
s1 = ([self.vocab.vocab['[CLS]']] + sa_masked + [self.vocab.vocab['[SEP]']])
|
152 |
-
s1_label = ([self.vocab.vocab['[PAD]']] + sa_masked_label + [self.vocab.vocab['[PAD]']])
|
153 |
-
segment_label = [1 for _ in range(len(s1))]
|
154 |
-
|
155 |
-
if self.select_next_seq:
|
156 |
-
s1 = s1 + sb_masked + [self.vocab.vocab['[SEP]']]
|
157 |
-
s1_label = s1_label + sb_masked_label + [self.vocab.vocab['[PAD]']]
|
158 |
-
segment_label = segment_label + [2 for _ in range(len(sb_masked)+1)]
|
159 |
-
|
160 |
-
padding = [self.vocab.vocab['[PAD]'] for _ in range(self.seq_len - len(s1))]
|
161 |
-
s1.extend(padding), s1_label.extend(padding), segment_label.extend(padding)
|
162 |
-
|
163 |
-
output = {'bert_input': s1,
|
164 |
-
'bert_label': s1_label,
|
165 |
-
'segment_label': segment_label}
|
166 |
-
|
167 |
-
if self.select_next_seq:
|
168 |
-
output['is_same_student'] = is_same_student
|
169 |
-
# print(item, len(s1), len(s1_label), len(segment_label))
|
170 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
171 |
return {key: torch.tensor(value) for key, value in output.items()}
|
172 |
|
173 |
def random_mask_seq(self, tokens):
|
@@ -176,7 +109,6 @@ class PretrainerDataset(Dataset):
|
|
176 |
Output: masked token seq, output label
|
177 |
"""
|
178 |
|
179 |
-
<<<<<<< HEAD
|
180 |
masked_pos = []
|
181 |
output_labels = []
|
182 |
output_tokens = copy.deepcopy(tokens)
|
@@ -197,22 +129,11 @@ class PretrainerDataset(Dataset):
|
|
197 |
# else:
|
198 |
prob = random.random()
|
199 |
if prob < self.max_mask:
|
200 |
-
=======
|
201 |
-
# masked_pos_label = {}
|
202 |
-
output_labels = []
|
203 |
-
output_tokens = copy.deepcopy(tokens)
|
204 |
-
|
205 |
-
# while(len(label_tokens) < self.max_mask_per_seq*len(tokens)):
|
206 |
-
for i, token in enumerate(tokens):
|
207 |
-
prob = random.random()
|
208 |
-
if prob < 0.15:
|
209 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
210 |
# chooses 15% of token positions at random
|
211 |
# prob /= 0.15
|
212 |
prob = random.random()
|
213 |
if prob < 0.8: #[MASK] token 80% of the time
|
214 |
output_tokens[i] = self.vocab.vocab['[MASK]']
|
215 |
-
<<<<<<< HEAD
|
216 |
masked_pos.append(1)
|
217 |
elif prob < 0.9: # a random token 10% of the time
|
218 |
# print(".......0.8-0.9......")
|
@@ -226,14 +147,6 @@ class PretrainerDataset(Dataset):
|
|
226 |
# print(".......unchanged......")
|
227 |
output_tokens[i] = self.vocab.vocab.get(token, self.vocab.vocab['[UNK]'])
|
228 |
masked_pos.append(0)
|
229 |
-
=======
|
230 |
-
elif prob < 0.9: # a random token 10% of the time
|
231 |
-
# print(".......0.8-0.9......")
|
232 |
-
output_tokens[i] = random.randint(1, len(self.vocab.vocab)-1)
|
233 |
-
else: # the unchanged i-th token 10% of the time
|
234 |
-
# print(".......unchanged......")
|
235 |
-
output_tokens[i] = self.vocab.vocab.get(token, self.vocab.vocab['[UNK]'])
|
236 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
237 |
# True Label
|
238 |
output_labels.append(self.vocab.vocab.get(token, self.vocab.vocab['[UNK]']))
|
239 |
# masked_pos_label[i] = self.vocab.vocab.get(token, self.vocab.vocab['[UNK]'])
|
@@ -242,16 +155,12 @@ class PretrainerDataset(Dataset):
|
|
242 |
output_tokens[i] = self.vocab.vocab.get(token, self.vocab.vocab['[UNK]'])
|
243 |
# Padded label
|
244 |
output_labels.append(self.vocab.vocab['[PAD]'])
|
245 |
-
<<<<<<< HEAD
|
246 |
masked_pos.append(0)
|
247 |
-
=======
|
248 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
249 |
# label_position = []
|
250 |
# label_tokens = []
|
251 |
# for k, v in masked_pos_label.items():
|
252 |
# label_position.append(k)
|
253 |
# label_tokens.append(v)
|
254 |
-
<<<<<<< HEAD
|
255 |
return output_tokens, output_labels, masked_pos
|
256 |
|
257 |
# def get_token_b(self, item):
|
@@ -288,43 +197,6 @@ class PretrainerDataset(Dataset):
|
|
288 |
# sb.pop()
|
289 |
# return sa, sb
|
290 |
|
291 |
-
=======
|
292 |
-
return output_tokens, output_labels
|
293 |
-
|
294 |
-
def get_token_b(self, item):
|
295 |
-
document_id = [k for k,v in self.index_documents.items() if item in v][0]
|
296 |
-
random_document_id = document_id
|
297 |
-
|
298 |
-
if random.random() < 0.5:
|
299 |
-
document_ids = [k for k in self.index_documents.keys() if k != document_id]
|
300 |
-
random_document_id = random.choice(document_ids)
|
301 |
-
|
302 |
-
same_student = (random_document_id == document_id)
|
303 |
-
|
304 |
-
nex_seq_list = self.index_documents.get(random_document_id)
|
305 |
-
|
306 |
-
if same_student:
|
307 |
-
if len(nex_seq_list) != 1:
|
308 |
-
nex_seq_list = [v for v in nex_seq_list if v !=item]
|
309 |
-
|
310 |
-
next_seq = random.choice(nex_seq_list)
|
311 |
-
tokens = self.lines[next_seq]
|
312 |
-
# print(f"item = {item}, tokens: {tokens}")
|
313 |
-
# print(f"item={item}, next={next_seq}, same_student = {same_student}, {document_id} == {random_document_id}, b. {tokens}")
|
314 |
-
return same_student, tokens
|
315 |
-
|
316 |
-
def truncate_to_max_seq(self, s1, s2):
|
317 |
-
sa = copy.deepcopy(s1)
|
318 |
-
sb = copy.deepcopy(s1)
|
319 |
-
total_allowed_seq = self.seq_len - 3
|
320 |
-
|
321 |
-
while((len(sa)+len(sb)) > total_allowed_seq):
|
322 |
-
if random.random() < 0.5:
|
323 |
-
sa.pop()
|
324 |
-
else:
|
325 |
-
sb.pop()
|
326 |
-
return sa, sb
|
327 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
328 |
|
329 |
class TokenizerDataset(Dataset):
|
330 |
"""
|
@@ -332,24 +204,15 @@ class TokenizerDataset(Dataset):
|
|
332 |
Tokenize the data in the dataset
|
333 |
|
334 |
"""
|
335 |
-
<<<<<<< HEAD
|
336 |
def __init__(self, dataset_path, label_path, vocab, seq_len=30):
|
337 |
self.dataset_path = dataset_path
|
338 |
self.label_path = label_path
|
339 |
self.vocab = vocab # Vocab object
|
340 |
# self.encoder = OneHotEncoder(sparse=False)
|
341 |
-
=======
|
342 |
-
def __init__(self, dataset_path, label_path, vocab, seq_len=30, train=True):
|
343 |
-
self.dataset_path = dataset_path
|
344 |
-
self.label_path = label_path
|
345 |
-
self.vocab = vocab # Vocab object
|
346 |
-
self.encoder = OneHotEncoder(sparse_output=False)
|
347 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
348 |
|
349 |
# Related to input dataset file
|
350 |
self.lines = []
|
351 |
self.labels = []
|
352 |
-
<<<<<<< HEAD
|
353 |
self.feats = []
|
354 |
if self.label_path:
|
355 |
self.label_file = open(self.label_path, "r")
|
@@ -414,97 +277,21 @@ class TokenizerDataset(Dataset):
|
|
414 |
# self.labels = self.encoder.transform(np.array(self.labels).reshape(-1,1))
|
415 |
|
416 |
self.file = open(self.dataset_path, "r")
|
417 |
-
=======
|
418 |
-
self.labels = []
|
419 |
-
|
420 |
-
self.label_file = open(self.label_path, "r")
|
421 |
-
for line in self.label_file:
|
422 |
-
if line:
|
423 |
-
line = line.strip()
|
424 |
-
if not line:
|
425 |
-
continue
|
426 |
-
self.labels.append(float(line))
|
427 |
-
self.label_file.close()
|
428 |
-
labeler = np.unique(self.labels)
|
429 |
-
self.encoder.fit(labeler.reshape(-1,1))
|
430 |
-
self.labels = self.encoder.transform(np.array(self.labels).reshape(-1,1))
|
431 |
-
# print(f"labels: {self.labels}")
|
432 |
-
|
433 |
-
# info_file_name = self.dataset_path.split('.')
|
434 |
-
# info_file_name = info_file_name[0]+"_info."+info_file_name[1]
|
435 |
-
# progress = []
|
436 |
-
# with open(info_file_name, "r") as f:
|
437 |
-
# for line in f:
|
438 |
-
# if line:
|
439 |
-
# line = line.strip()
|
440 |
-
# if not line:
|
441 |
-
# continue
|
442 |
-
# line = line.split(",")[0]
|
443 |
-
# pstat = 1 if line == "GRADUATED" else 0
|
444 |
-
# progress.append(pstat)
|
445 |
-
# f.close()
|
446 |
-
|
447 |
-
# indices_of_grad = np.where(np.array(progress) == 1)[0]
|
448 |
-
# indices_of_prom = np.where(np.array(progress) == 0)[0]
|
449 |
-
|
450 |
-
# indices_of_zeros = np.where(np.array(labels) == 0)[0]
|
451 |
-
# indices_of_ones = np.where(np.array(labels) == 1)[0]
|
452 |
-
|
453 |
-
# number_of_items = min(len(indices_of_zeros), len(indices_of_ones))
|
454 |
-
# # number_of_items = min(len(indices_of_grad), len(indices_of_prom))
|
455 |
-
# print(number_of_items)
|
456 |
-
|
457 |
-
# indices_of_zeros = indices_of_zeros[:number_of_items]
|
458 |
-
# indices_of_ones = indices_of_ones[:number_of_items]
|
459 |
-
# print(indices_of_zeros)
|
460 |
-
# print(indices_of_ones)
|
461 |
-
|
462 |
-
# indices_of_grad = indices_of_grad[:number_of_items]
|
463 |
-
# indices_of_prom = indices_of_prom[:number_of_items]
|
464 |
-
# print(indices_of_grad)
|
465 |
-
# print(indices_of_prom)
|
466 |
-
|
467 |
-
self.file = open(self.dataset_path, "r")
|
468 |
-
# index = 0
|
469 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
470 |
for line in self.file:
|
471 |
if line:
|
472 |
line = line.strip()
|
473 |
if line:
|
474 |
self.lines.append(line)
|
475 |
-
<<<<<<< HEAD
|
476 |
-
=======
|
477 |
-
# if train:
|
478 |
-
# if index in indices_of_zeros:
|
479 |
-
# # if index in indices_of_prom:
|
480 |
-
# self.lines.append(line)
|
481 |
-
# self.labels.append(0)
|
482 |
-
# if index in indices_of_ones:
|
483 |
-
# # if index in indices_of_grad:
|
484 |
-
# self.lines.append(line)
|
485 |
-
# self.labels.append(1)
|
486 |
-
# else:
|
487 |
-
# self.lines.append(line)
|
488 |
-
# self.labels.append(labels[index])
|
489 |
-
# self.labels.append(progress[index])
|
490 |
-
# index += 1
|
491 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
492 |
self.file.close()
|
493 |
|
494 |
self.len = len(self.lines)
|
495 |
self.seq_len = seq_len
|
496 |
-
<<<<<<< HEAD
|
497 |
print("Sequence length set at ", self.seq_len, len(self.lines), len(self.labels) if self.label_path else 0)
|
498 |
-
=======
|
499 |
-
|
500 |
-
print("Sequence length set at ", self.seq_len, len(self.lines), len(self.labels))
|
501 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
502 |
|
503 |
def __len__(self):
|
504 |
return self.len
|
505 |
|
506 |
def __getitem__(self, item):
|
507 |
-
<<<<<<< HEAD
|
508 |
org_line = self.lines[item].split("\t")
|
509 |
dup_line = []
|
510 |
opt = False
|
@@ -527,23 +314,10 @@ class TokenizerDataset(Dataset):
|
|
527 |
output = {'input': s1,
|
528 |
'label': s1_label,
|
529 |
'feat': s1_feat,
|
530 |
-
=======
|
531 |
-
|
532 |
-
s1 = self.vocab.to_seq(self.lines[item], self.seq_len) # This is like tokenizer and adds [CLS] and [SEP].
|
533 |
-
s1_label = self.labels[item]
|
534 |
-
segment_label = [1 for _ in range(len(s1))]
|
535 |
-
|
536 |
-
padding = [self.vocab.vocab['[PAD]'] for _ in range(self.seq_len - len(s1))]
|
537 |
-
s1.extend(padding), segment_label.extend(padding)
|
538 |
-
|
539 |
-
output = {'bert_input': s1,
|
540 |
-
'progress_status': s1_label,
|
541 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
542 |
'segment_label': segment_label}
|
543 |
return {key: torch.tensor(value) for key, value in output.items()}
|
544 |
|
545 |
|
546 |
-
<<<<<<< HEAD
|
547 |
class TokenizerDatasetForCalibration(Dataset):
|
548 |
"""
|
549 |
Class name: TokenizerDataset
|
@@ -661,9 +435,6 @@ class TokenizerDatasetForCalibration(Dataset):
|
|
661 |
|
662 |
|
663 |
# if __name__ == "__main__":
|
664 |
-
=======
|
665 |
-
# if __name__ == "__main__":
|
666 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
667 |
# # import pickle
|
668 |
# # k = pickle.load(open("dataset/CL4999_1920/unique_steps_list.pkl","rb"))
|
669 |
# # print(k)
|
|
|
4 |
import numpy as np
|
5 |
import tqdm
|
6 |
import random
|
|
|
7 |
from .vocab import Vocab
|
8 |
import pickle
|
9 |
import copy
|
10 |
# from sklearn.preprocessing import OneHotEncoder
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
class PretrainerDataset(Dataset):
|
13 |
"""
|
14 |
Class name: PretrainDataset
|
15 |
|
16 |
"""
|
|
|
17 |
def __init__(self, dataset_path, vocab, seq_len=30, max_mask=0.15):
|
|
|
|
|
|
|
18 |
self.dataset_path = dataset_path
|
19 |
self.vocab = vocab # Vocab object
|
20 |
|
|
|
35 |
self.index_documents[i] = []
|
36 |
else:
|
37 |
self.index_documents[i].append(index)
|
|
|
38 |
self.lines.append(line.split("\t"))
|
39 |
len_line = len(line.split("\t"))
|
40 |
seq_len_list.append(len_line)
|
|
|
49 |
print("Sequence length set at: ", self.seq_len)
|
50 |
self.max_mask = max_mask
|
51 |
print("% of input tokens selected for masking : ",self.max_mask)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
|
54 |
def __len__(self):
|
|
|
56 |
|
57 |
def __getitem__(self, item):
|
58 |
token_a = self.lines[item]
|
|
|
59 |
# sa_masked = None
|
60 |
# sa_masked_label = None
|
61 |
# token_b = None
|
|
|
101 |
|
102 |
# print(item, len(s1), len(s1_label), len(segment_label))
|
103 |
# print(f"{item}.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
return {key: torch.tensor(value) for key, value in output.items()}
|
105 |
|
106 |
def random_mask_seq(self, tokens):
|
|
|
109 |
Output: masked token seq, output label
|
110 |
"""
|
111 |
|
|
|
112 |
masked_pos = []
|
113 |
output_labels = []
|
114 |
output_tokens = copy.deepcopy(tokens)
|
|
|
129 |
# else:
|
130 |
prob = random.random()
|
131 |
if prob < self.max_mask:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
# chooses 15% of token positions at random
|
133 |
# prob /= 0.15
|
134 |
prob = random.random()
|
135 |
if prob < 0.8: #[MASK] token 80% of the time
|
136 |
output_tokens[i] = self.vocab.vocab['[MASK]']
|
|
|
137 |
masked_pos.append(1)
|
138 |
elif prob < 0.9: # a random token 10% of the time
|
139 |
# print(".......0.8-0.9......")
|
|
|
147 |
# print(".......unchanged......")
|
148 |
output_tokens[i] = self.vocab.vocab.get(token, self.vocab.vocab['[UNK]'])
|
149 |
masked_pos.append(0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
# True Label
|
151 |
output_labels.append(self.vocab.vocab.get(token, self.vocab.vocab['[UNK]']))
|
152 |
# masked_pos_label[i] = self.vocab.vocab.get(token, self.vocab.vocab['[UNK]'])
|
|
|
155 |
output_tokens[i] = self.vocab.vocab.get(token, self.vocab.vocab['[UNK]'])
|
156 |
# Padded label
|
157 |
output_labels.append(self.vocab.vocab['[PAD]'])
|
|
|
158 |
masked_pos.append(0)
|
|
|
|
|
159 |
# label_position = []
|
160 |
# label_tokens = []
|
161 |
# for k, v in masked_pos_label.items():
|
162 |
# label_position.append(k)
|
163 |
# label_tokens.append(v)
|
|
|
164 |
return output_tokens, output_labels, masked_pos
|
165 |
|
166 |
# def get_token_b(self, item):
|
|
|
197 |
# sb.pop()
|
198 |
# return sa, sb
|
199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
|
201 |
class TokenizerDataset(Dataset):
|
202 |
"""
|
|
|
204 |
Tokenize the data in the dataset
|
205 |
|
206 |
"""
|
|
|
207 |
def __init__(self, dataset_path, label_path, vocab, seq_len=30):
|
208 |
self.dataset_path = dataset_path
|
209 |
self.label_path = label_path
|
210 |
self.vocab = vocab # Vocab object
|
211 |
# self.encoder = OneHotEncoder(sparse=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
|
213 |
# Related to input dataset file
|
214 |
self.lines = []
|
215 |
self.labels = []
|
|
|
216 |
self.feats = []
|
217 |
if self.label_path:
|
218 |
self.label_file = open(self.label_path, "r")
|
|
|
277 |
# self.labels = self.encoder.transform(np.array(self.labels).reshape(-1,1))
|
278 |
|
279 |
self.file = open(self.dataset_path, "r")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
280 |
for line in self.file:
|
281 |
if line:
|
282 |
line = line.strip()
|
283 |
if line:
|
284 |
self.lines.append(line)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
self.file.close()
|
286 |
|
287 |
self.len = len(self.lines)
|
288 |
self.seq_len = seq_len
|
|
|
289 |
print("Sequence length set at ", self.seq_len, len(self.lines), len(self.labels) if self.label_path else 0)
|
|
|
|
|
|
|
|
|
290 |
|
291 |
def __len__(self):
|
292 |
return self.len
|
293 |
|
294 |
def __getitem__(self, item):
|
|
|
295 |
org_line = self.lines[item].split("\t")
|
296 |
dup_line = []
|
297 |
opt = False
|
|
|
314 |
output = {'input': s1,
|
315 |
'label': s1_label,
|
316 |
'feat': s1_feat,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
'segment_label': segment_label}
|
318 |
return {key: torch.tensor(value) for key, value in output.items()}
|
319 |
|
320 |
|
|
|
321 |
class TokenizerDatasetForCalibration(Dataset):
|
322 |
"""
|
323 |
Class name: TokenizerDataset
|
|
|
435 |
|
436 |
|
437 |
# if __name__ == "__main__":
|
|
|
|
|
|
|
438 |
# # import pickle
|
439 |
# # k = pickle.load(open("dataset/CL4999_1920/unique_steps_list.pkl","rb"))
|
440 |
# # print(k)
|
src/pretrainer.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import torch
|
2 |
import torch.nn as nn
|
3 |
-
<<<<<<< HEAD
|
4 |
# from torch.nn import functional as F
|
5 |
from torch.optim import Adam
|
6 |
from torch.utils.data import DataLoader
|
@@ -36,75 +35,6 @@ class BERTTrainer:
|
|
36 |
train_dataloader: DataLoader, val_dataloader: DataLoader = None, test_dataloader: DataLoader = None,
|
37 |
lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=5000,
|
38 |
with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, log_folder_path: str = None):
|
39 |
-
=======
|
40 |
-
from torch.nn import functional as F
|
41 |
-
from torch.optim import Adam, SGD
|
42 |
-
from torch.utils.data import DataLoader
|
43 |
-
import pickle
|
44 |
-
|
45 |
-
from bert import BERT
|
46 |
-
from seq_model import BERTSM
|
47 |
-
from classifier_model import BERTForClassification
|
48 |
-
from optim_schedule import ScheduledOptim
|
49 |
-
|
50 |
-
import tqdm
|
51 |
-
import sys
|
52 |
-
|
53 |
-
import numpy as np
|
54 |
-
import visualization
|
55 |
-
|
56 |
-
from sklearn.metrics import precision_score, recall_score, f1_score
|
57 |
-
|
58 |
-
class ECE(nn.Module):
|
59 |
-
|
60 |
-
def __init__(self, n_bins=15):
|
61 |
-
"""
|
62 |
-
n_bins (int): number of confidence interval bins
|
63 |
-
"""
|
64 |
-
super(ECE, self).__init__()
|
65 |
-
bin_boundaries = torch.linspace(0, 1, n_bins + 1)
|
66 |
-
self.bin_lowers = bin_boundaries[:-1]
|
67 |
-
self.bin_uppers = bin_boundaries[1:]
|
68 |
-
|
69 |
-
def forward(self, logits, labels):
|
70 |
-
softmaxes = F.softmax(logits, dim=1)
|
71 |
-
confidences, predictions = torch.max(softmaxes, 1)
|
72 |
-
labels = torch.argmax(labels,1)
|
73 |
-
accuracies = predictions.eq(labels)
|
74 |
-
|
75 |
-
ece = torch.zeros(1, device=logits.device)
|
76 |
-
for bin_lower, bin_upper in zip(self.bin_lowers, self.bin_uppers):
|
77 |
-
# Calculated |confidence - accuracy| in each bin
|
78 |
-
in_bin = confidences.gt(bin_lower.item()) * confidences.le(bin_upper.item())
|
79 |
-
prop_in_bin = in_bin.float().mean()
|
80 |
-
if prop_in_bin.item() > 0:
|
81 |
-
accuracy_in_bin = accuracies[in_bin].float().mean()
|
82 |
-
avg_confidence_in_bin = confidences[in_bin].mean()
|
83 |
-
ece += torch.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin
|
84 |
-
|
85 |
-
return ece
|
86 |
-
|
87 |
-
def accurate_nb(preds, labels):
|
88 |
-
pred_flat = np.argmax(preds, axis=1).flatten()
|
89 |
-
labels_flat = np.argmax(labels, axis=1).flatten()
|
90 |
-
labels_flat = labels.flatten()
|
91 |
-
return np.sum(pred_flat == labels_flat)
|
92 |
-
|
93 |
-
class BERTTrainer:
|
94 |
-
"""
|
95 |
-
# Sequence..
|
96 |
-
|
97 |
-
BERTTrainer make the pretrained BERT model with two LM training method.
|
98 |
-
|
99 |
-
1. Masked Language Model : 3.3.1 Task #1: Masked LM
|
100 |
-
"""
|
101 |
-
|
102 |
-
def __init__(self, bert: BERT, vocab_size: int,
|
103 |
-
train_dataloader: DataLoader, test_dataloader: DataLoader = None,
|
104 |
-
lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=10000,
|
105 |
-
with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, same_student_prediction = False,
|
106 |
-
workspace_name=None):
|
107 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
108 |
"""
|
109 |
:param bert: BERT model which you want to train
|
110 |
:param vocab_size: total word vocab size
|
@@ -117,7 +47,6 @@ class BERTTrainer:
|
|
117 |
:param log_freq: logging frequency of the batch iteration
|
118 |
"""
|
119 |
|
120 |
-
<<<<<<< HEAD
|
121 |
cuda_condition = torch.cuda.is_available() and with_cuda
|
122 |
self.device = torch.device("cuda:0" if cuda_condition else "cpu")
|
123 |
print(cuda_condition, " Device used = ", self.device)
|
@@ -127,33 +56,16 @@ class BERTTrainer:
|
|
127 |
# This BERT model will be saved
|
128 |
self.bert = bert.to(self.device)
|
129 |
# Initialize the BERT Sequence Model, with BERT model
|
130 |
-
=======
|
131 |
-
# Setup cuda device for BERT training, argument -c, --cuda should be true
|
132 |
-
cuda_condition = torch.cuda.is_available() and with_cuda
|
133 |
-
self.device = torch.device("cuda:0" if cuda_condition else "cpu")
|
134 |
-
print("Device used = ", self.device)
|
135 |
-
|
136 |
-
# This BERT model will be saved every epoch
|
137 |
-
self.bert = bert
|
138 |
-
# Initialize the BERT Language Model, with BERT model
|
139 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
140 |
self.model = BERTSM(bert, vocab_size).to(self.device)
|
141 |
|
142 |
# Distributed GPU training if CUDA can detect more than 1 GPU
|
143 |
if with_cuda and torch.cuda.device_count() > 1:
|
144 |
print("Using %d GPUS for BERT" % torch.cuda.device_count())
|
145 |
-
<<<<<<< HEAD
|
146 |
self.model = nn.DataParallel(self.model, device_ids=available_gpus)
|
147 |
|
148 |
# Setting the train, validation and test data loader
|
149 |
self.train_data = train_dataloader
|
150 |
self.val_data = val_dataloader
|
151 |
-
=======
|
152 |
-
self.model = nn.DataParallel(self.model, device_ids=cuda_devices)
|
153 |
-
|
154 |
-
# Setting the train and test data loader
|
155 |
-
self.train_data = train_dataloader
|
156 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
157 |
self.test_data = test_dataloader
|
158 |
|
159 |
# Setting the Adam optimizer with hyper-param
|
@@ -164,7 +76,6 @@ class BERTTrainer:
|
|
164 |
self.criterion = nn.NLLLoss(ignore_index=0)
|
165 |
|
166 |
self.log_freq = log_freq
|
167 |
-
<<<<<<< HEAD
|
168 |
self.log_folder_path = log_folder_path
|
169 |
# self.workspace_name = workspace_name
|
170 |
self.save_model = False
|
@@ -175,18 +86,11 @@ class BERTTrainer:
|
|
175 |
f.close()
|
176 |
self.start_time = time.time()
|
177 |
|
178 |
-
=======
|
179 |
-
self.same_student_prediction = same_student_prediction
|
180 |
-
self.workspace_name = workspace_name
|
181 |
-
self.save_model = False
|
182 |
-
self.avg_loss = 10000
|
183 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
184 |
print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()]))
|
185 |
|
186 |
def train(self, epoch):
|
187 |
self.iteration(epoch, self.train_data)
|
188 |
|
189 |
-
<<<<<<< HEAD
|
190 |
def val(self, epoch):
|
191 |
if epoch == 0:
|
192 |
self.avg_loss = 10000
|
@@ -196,12 +100,6 @@ class BERTTrainer:
|
|
196 |
self.iteration(epoch, self.test_data, phase="test")
|
197 |
|
198 |
def iteration(self, epoch, data_loader, phase="train"):
|
199 |
-
=======
|
200 |
-
def test(self, epoch):
|
201 |
-
self.iteration(epoch, self.test_data, train=False)
|
202 |
-
|
203 |
-
def iteration(self, epoch, data_loader, train=True):
|
204 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
205 |
"""
|
206 |
loop over the data_loader for training or testing
|
207 |
if on train status, backward operation is activated
|
@@ -212,7 +110,6 @@ class BERTTrainer:
|
|
212 |
:param train: boolean value of is train or test
|
213 |
:return: None
|
214 |
"""
|
215 |
-
<<<<<<< HEAD
|
216 |
|
217 |
# self.log_file = f"{self.workspace_name}/logs/{self.code}/log_{phase}_pretrained.txt"
|
218 |
# bert_hidden_representations = [] can be used
|
@@ -235,39 +132,10 @@ class BERTTrainer:
|
|
235 |
else:
|
236 |
self.model.eval()
|
237 |
with open(self.log_folder_path+f"/log_{phase}_pretrained.txt", 'a') as f:
|
238 |
-
=======
|
239 |
-
str_code = "train" if train else "test"
|
240 |
-
code = "masked_prediction" if self.same_student_prediction else "masked"
|
241 |
-
|
242 |
-
self.log_file = f"{self.workspace_name}/logs/{code}/log_{str_code}_pretrained.txt"
|
243 |
-
bert_hidden_representations = []
|
244 |
-
if epoch == 0:
|
245 |
-
f = open(self.log_file, 'w')
|
246 |
-
f.close()
|
247 |
-
if not train:
|
248 |
-
self.avg_loss = 10000
|
249 |
-
# Setting the tqdm progress bar
|
250 |
-
data_iter = tqdm.tqdm(enumerate(data_loader),
|
251 |
-
desc="EP_%s:%d" % (str_code, epoch),
|
252 |
-
total=len(data_loader),
|
253 |
-
bar_format="{l_bar}{r_bar}")
|
254 |
-
|
255 |
-
avg_loss_mask = 0.0
|
256 |
-
total_correct_mask = 0
|
257 |
-
total_element_mask = 0
|
258 |
-
|
259 |
-
avg_loss_pred = 0.0
|
260 |
-
total_correct_pred = 0
|
261 |
-
total_element_pred = 0
|
262 |
-
|
263 |
-
avg_loss = 0.0
|
264 |
-
with open(self.log_file, 'a') as f:
|
265 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
266 |
sys.stdout = f
|
267 |
for i, data in data_iter:
|
268 |
# 0. batch_data will be sent into the device(GPU or cpu)
|
269 |
data = {key: value.to(self.device) for key, value in data.items()}
|
270 |
-
<<<<<<< HEAD
|
271 |
|
272 |
# 1. forward masked_sm model
|
273 |
# mask_sm_output is log-probabilities output
|
@@ -280,38 +148,10 @@ class BERTTrainer:
|
|
280 |
|
281 |
# 3. backward and optimization only in train
|
282 |
if phase == "train":
|
283 |
-
=======
|
284 |
-
|
285 |
-
# 1. forward the next_sentence_prediction and masked_lm model
|
286 |
-
# next_sent_output, mask_lm_output = self.model.forward(data["bert_input"], data["segment_label"])
|
287 |
-
if self.same_student_prediction:
|
288 |
-
bert_hidden_rep, mask_lm_output, same_student_output = self.model.forward(data["bert_input"], data["segment_label"], self.same_student_prediction)
|
289 |
-
else:
|
290 |
-
bert_hidden_rep, mask_lm_output = self.model.forward(data["bert_input"], data["segment_label"], self.same_student_prediction)
|
291 |
-
|
292 |
-
embeddings = [h for h in bert_hidden_rep.cpu().detach().numpy()]
|
293 |
-
bert_hidden_representations.extend(embeddings)
|
294 |
-
|
295 |
-
|
296 |
-
# 2-2. NLLLoss of predicting masked token word
|
297 |
-
mask_loss = self.criterion(mask_lm_output.transpose(1, 2), data["bert_label"])
|
298 |
-
|
299 |
-
# 2-3. Adding next_loss and mask_loss : 3.4 Pre-training Procedure
|
300 |
-
if self.same_student_prediction:
|
301 |
-
# 2-1. NLL(negative log likelihood) loss of is_next classification result
|
302 |
-
same_student_loss = self.criterion(same_student_output, data["is_same_student"])
|
303 |
-
loss = same_student_loss + mask_loss
|
304 |
-
else:
|
305 |
-
loss = mask_loss
|
306 |
-
|
307 |
-
# 3. backward and optimization only in train
|
308 |
-
if train:
|
309 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
310 |
self.optim_schedule.zero_grad()
|
311 |
loss.backward()
|
312 |
self.optim_schedule.step_and_update_lr()
|
313 |
|
314 |
-
<<<<<<< HEAD
|
315 |
# tokens with highest log-probabilities creates a predicted sequence
|
316 |
pred_tokens = torch.argmax(mask_sm_output, dim=-1)
|
317 |
mask_correct = (data["bert_label"] == pred_tokens) & data["masked_pos"]
|
@@ -348,69 +188,6 @@ class BERTTrainer:
|
|
348 |
if self.avg_loss > (avg_loss / len(data_iter)):
|
349 |
self.save_model = True
|
350 |
self.avg_loss = (avg_loss / len(data_iter))
|
351 |
-
=======
|
352 |
-
|
353 |
-
non_zero_mask = (data["bert_label"] != 0).float()
|
354 |
-
predictions = torch.argmax(mask_lm_output, dim=-1)
|
355 |
-
predicted_masked = predictions*non_zero_mask
|
356 |
-
mask_correct = ((data["bert_label"] == predicted_masked)*non_zero_mask).sum().item()
|
357 |
-
|
358 |
-
avg_loss_mask += loss.item()
|
359 |
-
total_correct_mask += mask_correct
|
360 |
-
total_element_mask += non_zero_mask.sum().item()
|
361 |
-
|
362 |
-
post_fix = {
|
363 |
-
"epoch": epoch,
|
364 |
-
"iter": i,
|
365 |
-
"avg_loss": avg_loss_mask / (i + 1),
|
366 |
-
"avg_acc_mask": total_correct_mask / total_element_mask * 100,
|
367 |
-
"loss": loss.item()
|
368 |
-
}
|
369 |
-
|
370 |
-
# next sentence prediction accuracy
|
371 |
-
if self.same_student_prediction:
|
372 |
-
correct = same_student_output.argmax(dim=-1).eq(data["is_same_student"]).sum().item()
|
373 |
-
avg_loss_pred += loss.item()
|
374 |
-
total_correct_pred += correct
|
375 |
-
total_element_pred += data["is_same_student"].nelement()
|
376 |
-
# correct = next_sent_output.argmax(dim=-1).eq(data["is_next"]).sum().item()
|
377 |
-
post_fix["avg_loss"] = avg_loss_pred / (i + 1)
|
378 |
-
post_fix["avg_acc_pred"] = total_correct_pred / total_element_pred * 100
|
379 |
-
post_fix["loss"] = loss.item()
|
380 |
-
|
381 |
-
avg_loss +=loss.item()
|
382 |
-
|
383 |
-
if i % self.log_freq == 0:
|
384 |
-
data_iter.write(str(post_fix))
|
385 |
-
# if not train and epoch > 20 :
|
386 |
-
# pickle.dump(mask_lm_output.cpu().detach().numpy(), open(f"logs/mask/mask_out_e{epoch}_{i}.pkl","wb"))
|
387 |
-
# pickle.dump(data["bert_label"].cpu().detach().numpy(), open(f"logs/mask/label_e{epoch}_{i}.pkl","wb"))
|
388 |
-
|
389 |
-
final_msg = {
|
390 |
-
"epoch": f"EP{epoch}_{str_code}",
|
391 |
-
"avg_loss": avg_loss / len(data_iter),
|
392 |
-
"total_masked_acc": total_correct_mask * 100.0 / total_element_mask
|
393 |
-
}
|
394 |
-
if self.same_student_prediction:
|
395 |
-
final_msg["total_prediction_acc"] = total_correct_pred * 100.0 / total_element_pred
|
396 |
-
|
397 |
-
print(final_msg)
|
398 |
-
# print("EP%d_%s, avg_loss=" % (epoch, str_code), avg_loss / len(data_iter), "total_masked_acc=", total_correct_mask * 100.0 / total_element_mask, "total_prediction_acc=", total_correct_pred * 100.0 / total_element_pred)
|
399 |
-
# else:
|
400 |
-
# print("EP%d_%s, avg_loss=" % (epoch, str_code), avg_loss / len(data_iter), "total_masked_acc=", total_correct_mask * 100.0 / total_element_mask)
|
401 |
-
# print("EP%d_%s, " % (epoch, str_code))
|
402 |
-
|
403 |
-
f.close()
|
404 |
-
sys.stdout = sys.__stdout__
|
405 |
-
self.save_model = False
|
406 |
-
if self.avg_loss > (avg_loss / len(data_iter)):
|
407 |
-
self.save_model = True
|
408 |
-
self.avg_loss = (avg_loss / len(data_iter))
|
409 |
-
|
410 |
-
# pickle.dump(bert_hidden_representations, open(f"embeddings/{code}/{str_code}_embeddings_{epoch}.pkl","wb"))
|
411 |
-
|
412 |
-
|
413 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
414 |
|
415 |
def save(self, epoch, file_path="output/bert_trained.model"):
|
416 |
"""
|
@@ -432,12 +209,8 @@ class BERTFineTuneTrainer:
|
|
432 |
def __init__(self, bert: BERT, vocab_size: int,
|
433 |
train_dataloader: DataLoader, test_dataloader: DataLoader = None,
|
434 |
lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=10000,
|
435 |
-
<<<<<<< HEAD
|
436 |
with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, workspace_name=None,
|
437 |
num_labels=2, log_folder_path: str = None):
|
438 |
-
=======
|
439 |
-
with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, workspace_name=None, num_labels=2):
|
440 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
441 |
"""
|
442 |
:param bert: BERT model which you want to train
|
443 |
:param vocab_size: total word vocab size
|
@@ -453,7 +226,6 @@ class BERTFineTuneTrainer:
|
|
453 |
# Setup cuda device for BERT training, argument -c, --cuda should be true
|
454 |
cuda_condition = torch.cuda.is_available() and with_cuda
|
455 |
self.device = torch.device("cuda:0" if cuda_condition else "cpu")
|
456 |
-
<<<<<<< HEAD
|
457 |
print(cuda_condition, " Device used = ", self.device)
|
458 |
|
459 |
available_gpus = list(range(torch.cuda.device_count()))
|
@@ -462,6 +234,16 @@ class BERTFineTuneTrainer:
|
|
462 |
self.bert = bert
|
463 |
for param in self.bert.parameters():
|
464 |
param.requires_grad = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
465 |
# Initialize the BERT Language Model, with BERT model
|
466 |
# self.model = BERTForClassification(self.bert, vocab_size, num_labels).to(self.device)
|
467 |
# self.model = BERTForClassificationWithFeats(self.bert, num_labels, 8).to(self.device)
|
@@ -748,48 +530,11 @@ class BERTFineTuneTrainer1:
|
|
748 |
for fi in ['train', 'test']: #'val',
|
749 |
f = open(self.log_folder_path+f"/log_{fi}_finetuned.txt", 'w')
|
750 |
f.close()
|
751 |
-
=======
|
752 |
-
print("Device used = ", self.device)
|
753 |
-
|
754 |
-
# This BERT model will be saved every epoch
|
755 |
-
self.bert = bert
|
756 |
-
# for param in self.bert.parameters():
|
757 |
-
# param.requires_grad = False
|
758 |
-
# Initialize the BERT Language Model, with BERT model
|
759 |
-
self.model = BERTForClassification(self.bert, vocab_size, num_labels).to(self.device)
|
760 |
-
|
761 |
-
# Distributed GPU training if CUDA can detect more than 1 GPU
|
762 |
-
if with_cuda and torch.cuda.device_count() > 1:
|
763 |
-
print("Using %d GPUS for BERT" % torch.cuda.device_count())
|
764 |
-
self.model = nn.DataParallel(self.model, device_ids=cuda_devices)
|
765 |
-
|
766 |
-
# Setting the train and test data loader
|
767 |
-
self.train_data = train_dataloader
|
768 |
-
self.test_data = test_dataloader
|
769 |
-
|
770 |
-
self.optim = Adam(self.model.parameters(), lr=lr, weight_decay=weight_decay, eps=1e-9)
|
771 |
-
# self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1)
|
772 |
-
|
773 |
-
if num_labels == 1:
|
774 |
-
self.criterion = nn.MSELoss()
|
775 |
-
elif num_labels == 2:
|
776 |
-
self.criterion = nn.CrossEntropyLoss()
|
777 |
-
elif num_labels > 2:
|
778 |
-
self.criterion = nn.BCEWithLogitsLoss()
|
779 |
-
|
780 |
-
self.ece_criterion = ECE().to(self.device)
|
781 |
-
|
782 |
-
self.log_freq = log_freq
|
783 |
-
self.workspace_name = workspace_name
|
784 |
-
self.save_model = False
|
785 |
-
self.avg_loss = 10000
|
786 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
787 |
print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()]))
|
788 |
|
789 |
def train(self, epoch):
|
790 |
self.iteration(epoch, self.train_data)
|
791 |
|
792 |
-
<<<<<<< HEAD
|
793 |
# def val(self, epoch):
|
794 |
# self.iteration(epoch, self.val_data, phase="val")
|
795 |
|
@@ -799,12 +544,6 @@ class BERTFineTuneTrainer1:
|
|
799 |
self.iteration(epoch, self.test_data, phase="test")
|
800 |
|
801 |
def iteration(self, epoch, data_loader, phase="train"):
|
802 |
-
=======
|
803 |
-
def test(self, epoch):
|
804 |
-
self.iteration(epoch, self.test_data, train=False)
|
805 |
-
|
806 |
-
def iteration(self, epoch, data_loader, train=True):
|
807 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
808 |
"""
|
809 |
loop over the data_loader for training or testing
|
810 |
if on train status, backward operation is activated
|
@@ -815,26 +554,10 @@ class BERTFineTuneTrainer1:
|
|
815 |
:param train: boolean value of is train or test
|
816 |
:return: None
|
817 |
"""
|
818 |
-
<<<<<<< HEAD
|
819 |
|
820 |
# Setting the tqdm progress bar
|
821 |
data_iter = tqdm.tqdm(enumerate(data_loader),
|
822 |
desc="EP_%s:%d" % (phase, epoch),
|
823 |
-
=======
|
824 |
-
str_code = "train" if train else "test"
|
825 |
-
|
826 |
-
self.log_file = f"{self.workspace_name}/logs/masked/log_{str_code}_FS_finetuned.txt"
|
827 |
-
|
828 |
-
if epoch == 0:
|
829 |
-
f = open(self.log_file, 'w')
|
830 |
-
f.close()
|
831 |
-
if not train:
|
832 |
-
self.avg_loss = 10000
|
833 |
-
|
834 |
-
# Setting the tqdm progress bar
|
835 |
-
data_iter = tqdm.tqdm(enumerate(data_loader),
|
836 |
-
desc="EP_%s:%d" % (str_code, epoch),
|
837 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
838 |
total=len(data_loader),
|
839 |
bar_format="{l_bar}{r_bar}")
|
840 |
|
@@ -843,7 +566,6 @@ class BERTFineTuneTrainer1:
|
|
843 |
total_element = 0
|
844 |
plabels = []
|
845 |
tlabels = []
|
846 |
-
<<<<<<< HEAD
|
847 |
probabs = []
|
848 |
|
849 |
if phase == "train":
|
@@ -864,43 +586,10 @@ class BERTFineTuneTrainer1:
|
|
864 |
logits = self.model.forward(data["input"], data["segment_label"])#, data["feat"])
|
865 |
|
866 |
loss = self.criterion(logits, data["label"])
|
867 |
-
=======
|
868 |
-
eval_accurate_nb = 0
|
869 |
-
nb_eval_examples = 0
|
870 |
-
logits_list = []
|
871 |
-
labels_list = []
|
872 |
-
|
873 |
-
if train:
|
874 |
-
self.model.train()
|
875 |
-
else:
|
876 |
-
self.model.eval()
|
877 |
-
|
878 |
-
with open(self.log_file, 'a') as f:
|
879 |
-
sys.stdout = f
|
880 |
-
|
881 |
-
for i, data in data_iter:
|
882 |
-
# 0. batch_data will be sent into the device(GPU or cpu)
|
883 |
-
data = {key: value.to(self.device) for key, value in data.items()}
|
884 |
-
if train:
|
885 |
-
h_rep, logits = self.model.forward(data["bert_input"], data["segment_label"])
|
886 |
-
else:
|
887 |
-
with torch.no_grad():
|
888 |
-
h_rep, logits = self.model.forward(data["bert_input"], data["segment_label"])
|
889 |
-
# print(logits, logits.shape)
|
890 |
-
logits_list.append(logits.cpu())
|
891 |
-
labels_list.append(data["progress_status"].cpu())
|
892 |
-
# print(">>>>>>>>>>>>", progress_output)
|
893 |
-
# print(f"{epoch}---nelement--- {data['progress_status'].nelement()}")
|
894 |
-
# print(data["progress_status"].shape, logits.shape)
|
895 |
-
progress_loss = self.criterion(logits, data["progress_status"])
|
896 |
-
loss = progress_loss
|
897 |
-
|
898 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
899 |
if torch.cuda.device_count() > 1:
|
900 |
loss = loss.mean()
|
901 |
|
902 |
# 3. backward and optimization only in train
|
903 |
-
<<<<<<< HEAD
|
904 |
if phase == "train":
|
905 |
self.optim_schedule.zero_grad()
|
906 |
loss.backward()
|
@@ -969,108 +658,10 @@ class BERTFineTuneTrainer1:
|
|
969 |
sys.stdout = sys.__stdout__
|
970 |
|
971 |
if phase == "test":
|
972 |
-
=======
|
973 |
-
if train:
|
974 |
-
self.optim.zero_grad()
|
975 |
-
loss.backward()
|
976 |
-
torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
|
977 |
-
self.optim.step()
|
978 |
-
|
979 |
-
# progress prediction accuracy
|
980 |
-
# correct = progress_output.argmax(dim=-1).eq(data["progress_status"]).sum().item()
|
981 |
-
probs = nn.LogSoftmax(dim=-1)(logits)
|
982 |
-
predicted_labels = torch.argmax(probs, dim=-1)
|
983 |
-
true_labels = torch.argmax(data["progress_status"], dim=-1)
|
984 |
-
plabels.extend(predicted_labels.cpu().numpy())
|
985 |
-
tlabels.extend(true_labels.cpu().numpy())
|
986 |
-
|
987 |
-
# print(">>>>>>>>>>>>>>", predicted_labels, true_labels)
|
988 |
-
# Compare predicted labels to true labels and calculate accuracy
|
989 |
-
correct = (predicted_labels == true_labels).sum().item()
|
990 |
-
avg_loss += loss.item()
|
991 |
-
total_correct += correct
|
992 |
-
total_element += true_labels.nelement()
|
993 |
-
|
994 |
-
if train:
|
995 |
-
post_fix = {
|
996 |
-
"epoch": epoch,
|
997 |
-
"iter": i,
|
998 |
-
"avg_loss": avg_loss / (i + 1),
|
999 |
-
"avg_acc": total_correct / total_element * 100,
|
1000 |
-
"loss": loss.item()
|
1001 |
-
}
|
1002 |
-
else:
|
1003 |
-
logits = logits.detach().cpu().numpy()
|
1004 |
-
label_ids = data["progress_status"].to('cpu').numpy()
|
1005 |
-
tmp_eval_nb = accurate_nb(logits, label_ids)
|
1006 |
-
|
1007 |
-
eval_accurate_nb += tmp_eval_nb
|
1008 |
-
nb_eval_examples += label_ids.shape[0]
|
1009 |
-
|
1010 |
-
total_element += data["progress_status"].nelement()
|
1011 |
-
# avg_loss += loss.item()
|
1012 |
-
|
1013 |
-
post_fix = {
|
1014 |
-
"epoch": epoch,
|
1015 |
-
"iter": i,
|
1016 |
-
"avg_loss": avg_loss / (i + 1),
|
1017 |
-
"avg_acc": tmp_eval_nb / total_element * 100,
|
1018 |
-
"loss": loss.item()
|
1019 |
-
}
|
1020 |
-
|
1021 |
-
|
1022 |
-
if i % self.log_freq == 0:
|
1023 |
-
data_iter.write(str(post_fix))
|
1024 |
-
|
1025 |
-
# precisions = precision_score(plabels, tlabels, average="weighted")
|
1026 |
-
# recalls = recall_score(plabels, tlabels, average="weighted")
|
1027 |
-
f1_scores = f1_score(plabels, tlabels, average="weighted")
|
1028 |
-
if train:
|
1029 |
-
final_msg = {
|
1030 |
-
"epoch": f"EP{epoch}_{str_code}",
|
1031 |
-
"avg_loss": avg_loss / len(data_iter),
|
1032 |
-
"total_acc": total_correct * 100.0 / total_element,
|
1033 |
-
# "precisions": precisions,
|
1034 |
-
# "recalls": recalls,
|
1035 |
-
"f1_scores": f1_scores
|
1036 |
-
}
|
1037 |
-
else:
|
1038 |
-
eval_accuracy = eval_accurate_nb/nb_eval_examples
|
1039 |
-
|
1040 |
-
logits_ece = torch.cat(logits_list)
|
1041 |
-
labels_ece = torch.cat(labels_list)
|
1042 |
-
ece = self.ece_criterion(logits_ece, labels_ece).item()
|
1043 |
-
final_msg = {
|
1044 |
-
"epoch": f"EP{epoch}_{str_code}",
|
1045 |
-
"eval_accuracy": eval_accuracy,
|
1046 |
-
"ece": ece,
|
1047 |
-
"avg_loss": avg_loss / len(data_iter),
|
1048 |
-
# "precisions": precisions,
|
1049 |
-
# "recalls": recalls,
|
1050 |
-
"f1_scores": f1_scores
|
1051 |
-
}
|
1052 |
-
if self.save_model:
|
1053 |
-
conf_hist = visualization.ConfidenceHistogram()
|
1054 |
-
plt_test = conf_hist.plot(np.array(logits_ece), np.array(labels_ece), title= f"Confidence Histogram {epoch}")
|
1055 |
-
plt_test.savefig(f"{self.workspace_name}/plots/confidence_histogram/FS/conf_histogram_test_{epoch}.png",bbox_inches='tight')
|
1056 |
-
plt_test.close()
|
1057 |
-
|
1058 |
-
rel_diagram = visualization.ReliabilityDiagram()
|
1059 |
-
plt_test_2 = rel_diagram.plot(np.array(logits_ece), np.array(labels_ece),title=f"Reliability Diagram {epoch}")
|
1060 |
-
plt_test_2.savefig(f"{self.workspace_name}/plots/confidence_histogram/FS/rel_diagram_test_{epoch}.png",bbox_inches='tight')
|
1061 |
-
plt_test_2.close()
|
1062 |
-
print(final_msg)
|
1063 |
-
|
1064 |
-
# print("EP%d_%s, avg_loss=" % (epoch, str_code), avg_loss / len(data_iter), "total_acc=", total_correct * 100.0 / total_element)
|
1065 |
-
f.close()
|
1066 |
-
sys.stdout = sys.__stdout__
|
1067 |
-
if train:
|
1068 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
1069 |
self.save_model = False
|
1070 |
if self.avg_loss > (avg_loss / len(data_iter)):
|
1071 |
self.save_model = True
|
1072 |
self.avg_loss = (avg_loss / len(data_iter))
|
1073 |
-
<<<<<<< HEAD
|
1074 |
|
1075 |
def iteration_1(self, epoch_idx, data):
|
1076 |
try:
|
@@ -1094,11 +685,6 @@ class BERTFineTuneTrainer1:
|
|
1094 |
print(f"Error during iteration: {e}")
|
1095 |
raise
|
1096 |
|
1097 |
-
=======
|
1098 |
-
|
1099 |
-
# plt_test.show()
|
1100 |
-
# print("EP%d_%s, " % (epoch, str_code))
|
1101 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
1102 |
|
1103 |
def save(self, epoch, file_path="output/bert_fine_tuned_trained.model"):
|
1104 |
"""
|
@@ -1113,7 +699,6 @@ class BERTFineTuneTrainer1:
|
|
1113 |
self.model.to(self.device)
|
1114 |
print("EP:%d Model Saved on:" % epoch, output_path)
|
1115 |
return output_path
|
1116 |
-
<<<<<<< HEAD
|
1117 |
|
1118 |
|
1119 |
class BERTAttention:
|
@@ -1221,5 +806,3 @@ class BERTAttention:
|
|
1221 |
|
1222 |
|
1223 |
|
1224 |
-
=======
|
1225 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
|
|
1 |
import torch
|
2 |
import torch.nn as nn
|
|
|
3 |
# from torch.nn import functional as F
|
4 |
from torch.optim import Adam
|
5 |
from torch.utils.data import DataLoader
|
|
|
35 |
train_dataloader: DataLoader, val_dataloader: DataLoader = None, test_dataloader: DataLoader = None,
|
36 |
lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=5000,
|
37 |
with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, log_folder_path: str = None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
"""
|
39 |
:param bert: BERT model which you want to train
|
40 |
:param vocab_size: total word vocab size
|
|
|
47 |
:param log_freq: logging frequency of the batch iteration
|
48 |
"""
|
49 |
|
|
|
50 |
cuda_condition = torch.cuda.is_available() and with_cuda
|
51 |
self.device = torch.device("cuda:0" if cuda_condition else "cpu")
|
52 |
print(cuda_condition, " Device used = ", self.device)
|
|
|
56 |
# This BERT model will be saved
|
57 |
self.bert = bert.to(self.device)
|
58 |
# Initialize the BERT Sequence Model, with BERT model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
self.model = BERTSM(bert, vocab_size).to(self.device)
|
60 |
|
61 |
# Distributed GPU training if CUDA can detect more than 1 GPU
|
62 |
if with_cuda and torch.cuda.device_count() > 1:
|
63 |
print("Using %d GPUS for BERT" % torch.cuda.device_count())
|
|
|
64 |
self.model = nn.DataParallel(self.model, device_ids=available_gpus)
|
65 |
|
66 |
# Setting the train, validation and test data loader
|
67 |
self.train_data = train_dataloader
|
68 |
self.val_data = val_dataloader
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
self.test_data = test_dataloader
|
70 |
|
71 |
# Setting the Adam optimizer with hyper-param
|
|
|
76 |
self.criterion = nn.NLLLoss(ignore_index=0)
|
77 |
|
78 |
self.log_freq = log_freq
|
|
|
79 |
self.log_folder_path = log_folder_path
|
80 |
# self.workspace_name = workspace_name
|
81 |
self.save_model = False
|
|
|
86 |
f.close()
|
87 |
self.start_time = time.time()
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()]))
|
90 |
|
91 |
def train(self, epoch):
|
92 |
self.iteration(epoch, self.train_data)
|
93 |
|
|
|
94 |
def val(self, epoch):
|
95 |
if epoch == 0:
|
96 |
self.avg_loss = 10000
|
|
|
100 |
self.iteration(epoch, self.test_data, phase="test")
|
101 |
|
102 |
def iteration(self, epoch, data_loader, phase="train"):
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
"""
|
104 |
loop over the data_loader for training or testing
|
105 |
if on train status, backward operation is activated
|
|
|
110 |
:param train: boolean value of is train or test
|
111 |
:return: None
|
112 |
"""
|
|
|
113 |
|
114 |
# self.log_file = f"{self.workspace_name}/logs/{self.code}/log_{phase}_pretrained.txt"
|
115 |
# bert_hidden_representations = [] can be used
|
|
|
132 |
else:
|
133 |
self.model.eval()
|
134 |
with open(self.log_folder_path+f"/log_{phase}_pretrained.txt", 'a') as f:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
sys.stdout = f
|
136 |
for i, data in data_iter:
|
137 |
# 0. batch_data will be sent into the device(GPU or cpu)
|
138 |
data = {key: value.to(self.device) for key, value in data.items()}
|
|
|
139 |
|
140 |
# 1. forward masked_sm model
|
141 |
# mask_sm_output is log-probabilities output
|
|
|
148 |
|
149 |
# 3. backward and optimization only in train
|
150 |
if phase == "train":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
self.optim_schedule.zero_grad()
|
152 |
loss.backward()
|
153 |
self.optim_schedule.step_and_update_lr()
|
154 |
|
|
|
155 |
# tokens with highest log-probabilities creates a predicted sequence
|
156 |
pred_tokens = torch.argmax(mask_sm_output, dim=-1)
|
157 |
mask_correct = (data["bert_label"] == pred_tokens) & data["masked_pos"]
|
|
|
188 |
if self.avg_loss > (avg_loss / len(data_iter)):
|
189 |
self.save_model = True
|
190 |
self.avg_loss = (avg_loss / len(data_iter))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
|
192 |
def save(self, epoch, file_path="output/bert_trained.model"):
|
193 |
"""
|
|
|
209 |
def __init__(self, bert: BERT, vocab_size: int,
|
210 |
train_dataloader: DataLoader, test_dataloader: DataLoader = None,
|
211 |
lr: float = 1e-4, betas=(0.9, 0.999), weight_decay: float = 0.01, warmup_steps=10000,
|
|
|
212 |
with_cuda: bool = True, cuda_devices=None, log_freq: int = 10, workspace_name=None,
|
213 |
num_labels=2, log_folder_path: str = None):
|
|
|
|
|
|
|
214 |
"""
|
215 |
:param bert: BERT model which you want to train
|
216 |
:param vocab_size: total word vocab size
|
|
|
226 |
# Setup cuda device for BERT training, argument -c, --cuda should be true
|
227 |
cuda_condition = torch.cuda.is_available() and with_cuda
|
228 |
self.device = torch.device("cuda:0" if cuda_condition else "cpu")
|
|
|
229 |
print(cuda_condition, " Device used = ", self.device)
|
230 |
|
231 |
available_gpus = list(range(torch.cuda.device_count()))
|
|
|
234 |
self.bert = bert
|
235 |
for param in self.bert.parameters():
|
236 |
param.requires_grad = False
|
237 |
+
|
238 |
+
# for name, param in self.bert.named_parameters():
|
239 |
+
# if '.attention.linear_layers.0' in name or \
|
240 |
+
# '.attention.linear_layers.1' in name or \
|
241 |
+
# '.attention.linear_layers.2' in name:
|
242 |
+
# # if 'transformer_blocks.' in name:# or \
|
243 |
+
# # 'transformer_blocks.3.' in name:
|
244 |
+
# # if '2.attention.linear_layers.' in name or \
|
245 |
+
# # '3.attention.linear_layers.' in name:
|
246 |
+
# param.requires_grad = True
|
247 |
# Initialize the BERT Language Model, with BERT model
|
248 |
# self.model = BERTForClassification(self.bert, vocab_size, num_labels).to(self.device)
|
249 |
# self.model = BERTForClassificationWithFeats(self.bert, num_labels, 8).to(self.device)
|
|
|
530 |
for fi in ['train', 'test']: #'val',
|
531 |
f = open(self.log_folder_path+f"/log_{fi}_finetuned.txt", 'w')
|
532 |
f.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
533 |
print("Total Parameters:", sum([p.nelement() for p in self.model.parameters()]))
|
534 |
|
535 |
def train(self, epoch):
|
536 |
self.iteration(epoch, self.train_data)
|
537 |
|
|
|
538 |
# def val(self, epoch):
|
539 |
# self.iteration(epoch, self.val_data, phase="val")
|
540 |
|
|
|
544 |
self.iteration(epoch, self.test_data, phase="test")
|
545 |
|
546 |
def iteration(self, epoch, data_loader, phase="train"):
|
|
|
|
|
|
|
|
|
|
|
|
|
547 |
"""
|
548 |
loop over the data_loader for training or testing
|
549 |
if on train status, backward operation is activated
|
|
|
554 |
:param train: boolean value of is train or test
|
555 |
:return: None
|
556 |
"""
|
|
|
557 |
|
558 |
# Setting the tqdm progress bar
|
559 |
data_iter = tqdm.tqdm(enumerate(data_loader),
|
560 |
desc="EP_%s:%d" % (phase, epoch),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
561 |
total=len(data_loader),
|
562 |
bar_format="{l_bar}{r_bar}")
|
563 |
|
|
|
566 |
total_element = 0
|
567 |
plabels = []
|
568 |
tlabels = []
|
|
|
569 |
probabs = []
|
570 |
|
571 |
if phase == "train":
|
|
|
586 |
logits = self.model.forward(data["input"], data["segment_label"])#, data["feat"])
|
587 |
|
588 |
loss = self.criterion(logits, data["label"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
589 |
if torch.cuda.device_count() > 1:
|
590 |
loss = loss.mean()
|
591 |
|
592 |
# 3. backward and optimization only in train
|
|
|
593 |
if phase == "train":
|
594 |
self.optim_schedule.zero_grad()
|
595 |
loss.backward()
|
|
|
658 |
sys.stdout = sys.__stdout__
|
659 |
|
660 |
if phase == "test":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
661 |
self.save_model = False
|
662 |
if self.avg_loss > (avg_loss / len(data_iter)):
|
663 |
self.save_model = True
|
664 |
self.avg_loss = (avg_loss / len(data_iter))
|
|
|
665 |
|
666 |
def iteration_1(self, epoch_idx, data):
|
667 |
try:
|
|
|
685 |
print(f"Error during iteration: {e}")
|
686 |
raise
|
687 |
|
|
|
|
|
|
|
|
|
|
|
688 |
|
689 |
def save(self, epoch, file_path="output/bert_fine_tuned_trained.model"):
|
690 |
"""
|
|
|
699 |
self.model.to(self.device)
|
700 |
print("EP:%d Model Saved on:" % epoch, output_path)
|
701 |
return output_path
|
|
|
702 |
|
703 |
|
704 |
class BERTAttention:
|
|
|
806 |
|
807 |
|
808 |
|
|
|
|
src/seq_model.py
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
import torch.nn as nn
|
2 |
|
3 |
-
<<<<<<< HEAD
|
4 |
from .bert import BERT
|
5 |
-
=======
|
6 |
-
from bert import BERT
|
7 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
8 |
|
9 |
|
10 |
class BERTSM(nn.Module):
|
@@ -22,23 +18,10 @@ class BERTSM(nn.Module):
|
|
22 |
super().__init__()
|
23 |
self.bert = bert
|
24 |
self.mask_lm = MaskedSequenceModel(self.bert.hidden, vocab_size)
|
25 |
-
<<<<<<< HEAD
|
26 |
|
27 |
def forward(self, x, segment_label):
|
28 |
x = self.bert(x, segment_label)
|
29 |
return self.mask_lm(x), x[:, 0]
|
30 |
-
=======
|
31 |
-
self.same_student = SameStudentPrediction(self.bert.hidden)
|
32 |
-
|
33 |
-
def forward(self, x, segment_label, pred=False):
|
34 |
-
x = self.bert(x, segment_label)
|
35 |
-
# torch.Size([32, 200, 512])
|
36 |
-
# print("???????????? ",x.shape)
|
37 |
-
if pred:
|
38 |
-
return x[:, 0], self.mask_lm(x), self.same_student(x)
|
39 |
-
else:
|
40 |
-
return x[:, 0], self.mask_lm(x)
|
41 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
42 |
|
43 |
|
44 |
class MaskedSequenceModel(nn.Module):
|
@@ -57,23 +40,4 @@ class MaskedSequenceModel(nn.Module):
|
|
57 |
self.softmax = nn.LogSoftmax(dim=-1)
|
58 |
|
59 |
def forward(self, x):
|
60 |
-
|
61 |
-
return self.softmax(self.linear(x))
|
62 |
-
=======
|
63 |
-
return self.softmax(self.linear(x))
|
64 |
-
|
65 |
-
|
66 |
-
class SameStudentPrediction(nn.Module):
|
67 |
-
|
68 |
-
def __init__(self, hidden):
|
69 |
-
"""
|
70 |
-
:param hidden: BERT model output size
|
71 |
-
"""
|
72 |
-
super().__init__()
|
73 |
-
self.linear = nn.Linear(hidden, 2)
|
74 |
-
self.softmax = nn.LogSoftmax(dim=-1)
|
75 |
-
|
76 |
-
def forward(self, x):
|
77 |
-
return self.softmax(self.linear(x[:, 0]))
|
78 |
-
|
79 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
|
|
1 |
import torch.nn as nn
|
2 |
|
|
|
3 |
from .bert import BERT
|
|
|
|
|
|
|
4 |
|
5 |
|
6 |
class BERTSM(nn.Module):
|
|
|
18 |
super().__init__()
|
19 |
self.bert = bert
|
20 |
self.mask_lm = MaskedSequenceModel(self.bert.hidden, vocab_size)
|
|
|
21 |
|
22 |
def forward(self, x, segment_label):
|
23 |
x = self.bert(x, segment_label)
|
24 |
return self.mask_lm(x), x[:, 0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
|
27 |
class MaskedSequenceModel(nn.Module):
|
|
|
40 |
self.softmax = nn.LogSoftmax(dim=-1)
|
41 |
|
42 |
def forward(self, x):
|
43 |
+
return self.softmax(self.linear(x))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/transformer.py
CHANGED
@@ -1,12 +1,7 @@
|
|
1 |
import torch.nn as nn
|
2 |
|
3 |
-
<<<<<<< HEAD
|
4 |
from .attention import MultiHeadedAttention
|
5 |
from .transformer_component import SublayerConnection, PositionwiseFeedForward
|
6 |
-
=======
|
7 |
-
from attention import MultiHeadedAttention
|
8 |
-
from transformer_component import SublayerConnection, PositionwiseFeedForward
|
9 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
10 |
|
11 |
class TransformerBlock(nn.Module):
|
12 |
"""
|
@@ -30,12 +25,8 @@ class TransformerBlock(nn.Module):
|
|
30 |
self.dropout = nn.Dropout(p=dropout)
|
31 |
|
32 |
def forward(self, x, mask):
|
33 |
-
<<<<<<< HEAD
|
34 |
attn_output, p_attn = self.attention.forward(x, x, x, mask=mask)
|
35 |
self.p_attn = p_attn.cpu().detach().numpy()
|
36 |
x = self.input_sublayer(x, lambda _x: attn_output)
|
37 |
-
=======
|
38 |
-
x = self.input_sublayer(x, lambda _x: self.attention.forward(_x, _x, _x, mask=mask))
|
39 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
40 |
x = self.output_sublayer(x, self.feed_forward)
|
41 |
return self.dropout(x)
|
|
|
1 |
import torch.nn as nn
|
2 |
|
|
|
3 |
from .attention import MultiHeadedAttention
|
4 |
from .transformer_component import SublayerConnection, PositionwiseFeedForward
|
|
|
|
|
|
|
|
|
5 |
|
6 |
class TransformerBlock(nn.Module):
|
7 |
"""
|
|
|
25 |
self.dropout = nn.Dropout(p=dropout)
|
26 |
|
27 |
def forward(self, x, mask):
|
|
|
28 |
attn_output, p_attn = self.attention.forward(x, x, x, mask=mask)
|
29 |
self.p_attn = p_attn.cpu().detach().numpy()
|
30 |
x = self.input_sublayer(x, lambda _x: attn_output)
|
|
|
|
|
|
|
31 |
x = self.output_sublayer(x, self.feed_forward)
|
32 |
return self.dropout(x)
|
src/vocab.py
CHANGED
@@ -1,22 +1,16 @@
|
|
1 |
import collections
|
2 |
import tqdm
|
3 |
-
<<<<<<< HEAD
|
4 |
import os
|
5 |
from pathlib import Path
|
6 |
|
7 |
head_directory = Path(__file__).resolve().parent.parent
|
8 |
# print(head_directory)
|
9 |
os.chdir(head_directory)
|
10 |
-
=======
|
11 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
12 |
|
13 |
class Vocab(object):
|
14 |
"""
|
15 |
Special tokens predefined in the vocab file are:
|
16 |
-
<<<<<<< HEAD
|
17 |
-[PAD]
|
18 |
-
=======
|
19 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
20 |
-[UNK]
|
21 |
-[MASK]
|
22 |
-[CLS]
|
@@ -48,11 +42,7 @@ class Vocab(object):
|
|
48 |
words = [self.invocab[index] if index < len(self.invocab)
|
49 |
else "[%d]" % index for index in seq ]
|
50 |
|
51 |
-
<<<<<<< HEAD
|
52 |
return words #" ".join(words)
|
53 |
-
=======
|
54 |
-
return " ".join(words)
|
55 |
-
>>>>>>> bffd3381ccb717f802fe651d4111ec0a268e3896
|
56 |
|
57 |
|
58 |
# if __init__ == "__main__":
|
|
|
1 |
import collections
|
2 |
import tqdm
|
|
|
3 |
import os
|
4 |
from pathlib import Path
|
5 |
|
6 |
head_directory = Path(__file__).resolve().parent.parent
|
7 |
# print(head_directory)
|
8 |
os.chdir(head_directory)
|
|
|
|
|
9 |
|
10 |
class Vocab(object):
|
11 |
"""
|
12 |
Special tokens predefined in the vocab file are:
|
|
|
13 |
-[PAD]
|
|
|
|
|
14 |
-[UNK]
|
15 |
-[MASK]
|
16 |
-[CLS]
|
|
|
42 |
words = [self.invocab[index] if index < len(self.invocab)
|
43 |
else "[%d]" % index for index in seq ]
|
44 |
|
|
|
45 |
return words #" ".join(words)
|
|
|
|
|
|
|
46 |
|
47 |
|
48 |
# if __init__ == "__main__":
|