step 50
Browse files- config.json +1 -1
- gptesla_training.py +294 -0
- log/debug_0.log +251 -0
- log/debug_1.log +0 -0
- log/debug_2.log +0 -0
- log/debug_3.log +0 -0
- model.safetensors +1 -1
- my_checkpoint/model.safetensors +3 -0
- my_checkpoint/optimizer.bin +3 -0
- my_checkpoint/random_states_0.pkl +3 -0
- my_checkpoint/scaler.pt +3 -0
- runs/Jul25_08-52-29_lab/1721897549.8326483/events.out.tfevents.1721897549.lab.173640.1 +3 -0
- runs/Jul25_08-52-29_lab/events.out.tfevents.1721897549.lab.173640.0 +3 -0
- torch_checkpoint/latest_checkpoint.pth +3 -0
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
"GPT2LMHeadModel"
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "./",
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
"GPT2LMHeadModel"
|
gptesla_training.py
ADDED
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import datasets, transformers
|
4 |
+
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, set_seed
|
5 |
+
from transformers.optimization import get_scheduler
|
6 |
+
from datasets import load_dataset, DownloadConfig
|
7 |
+
|
8 |
+
import torch
|
9 |
+
from torch.utils.data import IterableDataset
|
10 |
+
from torch.utils.data.dataloader import DataLoader
|
11 |
+
from torch.utils.tensorboard import SummaryWriter
|
12 |
+
from torch.optim import AdamW
|
13 |
+
|
14 |
+
import logging
|
15 |
+
import wandb
|
16 |
+
from huggingface_hub import Repository, create_branch
|
17 |
+
from accelerate import Accelerator
|
18 |
+
from argparse import Namespace
|
19 |
+
|
20 |
+
|
21 |
+
# Set the API token as an environment variable
|
22 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
23 |
+
|
24 |
+
|
25 |
+
def save_checkpoint_state():
|
26 |
+
|
27 |
+
dir_name = "./torch_checkpoint"
|
28 |
+
os.makedirs(dir_name, exist_ok=True)
|
29 |
+
|
30 |
+
checkpoint = {
|
31 |
+
"lr_scheduler": lr_scheduler.state_dict(),
|
32 |
+
"completed_steps": completed_steps,
|
33 |
+
"run_name": run_name,
|
34 |
+
"optimizer": optimizer
|
35 |
+
}
|
36 |
+
torch.save(checkpoint, f"torch_checkpoint/latest_checkpoint.pth")
|
37 |
+
|
38 |
+
|
39 |
+
class ConstantLengthDataset(IterableDataset):
|
40 |
+
|
41 |
+
def __init__(
|
42 |
+
self,
|
43 |
+
tokenizer,
|
44 |
+
dataset,
|
45 |
+
seq_length=1024,
|
46 |
+
num_of_sequences=1024,
|
47 |
+
chars_per_token=3.6,
|
48 |
+
):
|
49 |
+
self.tokenizer = tokenizer
|
50 |
+
self.concat_token_id = tokenizer.eos_token_id
|
51 |
+
self.dataset = dataset
|
52 |
+
self.seq_length = seq_length
|
53 |
+
self.input_characters = seq_length * chars_per_token * num_of_sequences
|
54 |
+
|
55 |
+
def __iter__(self):
|
56 |
+
iterator = iter(self.dataset)
|
57 |
+
more_examples = True
|
58 |
+
while more_examples:
|
59 |
+
buffer, buffer_len = [], 0
|
60 |
+
while True:
|
61 |
+
if buffer_len >= self.input_characters:
|
62 |
+
m = f"Buffer full: {buffer_len}>={self.input_characters:.0f}"
|
63 |
+
# print(m)
|
64 |
+
break
|
65 |
+
try:
|
66 |
+
m = f"Fill buffer: {buffer_len}<{self.input_characters:.0f}"
|
67 |
+
# print(m)
|
68 |
+
buffer.append(next(iterator)["content"])
|
69 |
+
buffer_len += len(buffer[-1])
|
70 |
+
except StopIteration:
|
71 |
+
# iterator = iter(self.dataset)
|
72 |
+
more_examples = False
|
73 |
+
break
|
74 |
+
|
75 |
+
all_token_ids = []
|
76 |
+
tokenized_inputs = self.tokenizer(buffer, truncation=False)
|
77 |
+
for tokenized_input in tokenized_inputs["input_ids"]:
|
78 |
+
all_token_ids.extend(tokenized_input + [self.concat_token_id])
|
79 |
+
|
80 |
+
for i in range(0, len(all_token_ids), self.seq_length):
|
81 |
+
input_ids = all_token_ids[i : i + self.seq_length]
|
82 |
+
if len(input_ids) == self.seq_length:
|
83 |
+
yield torch.tensor(input_ids)
|
84 |
+
|
85 |
+
|
86 |
+
def setup_logging(project_name):
|
87 |
+
logger = logging.getLogger(__name__)
|
88 |
+
|
89 |
+
dir_name = "./log"
|
90 |
+
if not os.path.exists(dir_name):
|
91 |
+
os.makedirs(dir_name)
|
92 |
+
print(f"Directory '{dir_name}' was created.")
|
93 |
+
else:
|
94 |
+
print(f"Directory '{dir_name}' already exists.")
|
95 |
+
|
96 |
+
# setting up log directory
|
97 |
+
logging.basicConfig(
|
98 |
+
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
|
99 |
+
datefmt="%m/%d/%Y %H:%M:%S",
|
100 |
+
level=logging.INFO,
|
101 |
+
handlers=[
|
102 |
+
logging.FileHandler(f"log/debug_{accelerator.process_index}.log"),
|
103 |
+
logging.StreamHandler(),
|
104 |
+
],
|
105 |
+
)
|
106 |
+
if accelerator.is_main_process: # We only want to set up logging once
|
107 |
+
wandb.init(project=project_name, config=args, dir="./../")
|
108 |
+
run_name = wandb.run.name
|
109 |
+
tb_writer = SummaryWriter()
|
110 |
+
tb_writer.add_hparams(vars(args), {"0": 0})
|
111 |
+
logger.setLevel(logging.INFO)
|
112 |
+
datasets.utils.logging.set_verbosity_debug()
|
113 |
+
transformers.utils.logging.set_verbosity_info()
|
114 |
+
else:
|
115 |
+
tb_writer = None
|
116 |
+
run_name = ""
|
117 |
+
logger.setLevel(logging.ERROR)
|
118 |
+
datasets.utils.logging.set_verbosity_error()
|
119 |
+
transformers.utils.logging.set_verbosity_error()
|
120 |
+
return logger, tb_writer, run_name
|
121 |
+
|
122 |
+
|
123 |
+
def create_dataloaders(dataset_name):
|
124 |
+
train_data = load_dataset(dataset_name + "-train", split="train", streaming=True)
|
125 |
+
train_data = train_data.shuffle(buffer_size=args.shuffle_buffer, seed=args.seed)
|
126 |
+
valid_data = load_dataset(
|
127 |
+
dataset_name + "-valid", split="validation", streaming=True
|
128 |
+
)
|
129 |
+
|
130 |
+
train_dataset = ConstantLengthDataset(
|
131 |
+
tokenizer, train_data, seq_length=args.seq_length
|
132 |
+
)
|
133 |
+
valid_dataset = ConstantLengthDataset(
|
134 |
+
tokenizer, valid_data, seq_length=args.seq_length
|
135 |
+
)
|
136 |
+
|
137 |
+
train_dataloader = DataLoader(
|
138 |
+
train_dataset, batch_size=args.train_batch_size, num_workers=96
|
139 |
+
)
|
140 |
+
eval_dataloader = DataLoader(
|
141 |
+
valid_dataset, batch_size=args.valid_batch_size, num_workers=1
|
142 |
+
)
|
143 |
+
return train_dataloader, eval_dataloader
|
144 |
+
|
145 |
+
|
146 |
+
def log_metrics(step, metrics):
|
147 |
+
logger.info(f"Step {step}: {metrics}")
|
148 |
+
if accelerator.is_main_process:
|
149 |
+
wandb.log(metrics)
|
150 |
+
[tb_writer.add_scalar(k, v, step) for k, v in metrics.items()]
|
151 |
+
|
152 |
+
|
153 |
+
def get_grouped_params(model, no_decay=["bias", "LayerNorm.weight"]):
|
154 |
+
params_with_wd, params_without_wd = [], []
|
155 |
+
for n, p in model.named_parameters():
|
156 |
+
if any(nd in n for nd in no_decay):
|
157 |
+
params_without_wd.append(p)
|
158 |
+
else:
|
159 |
+
params_with_wd.append(p)
|
160 |
+
return [
|
161 |
+
{"params": params_with_wd, "weight_decay": args.weight_decay},
|
162 |
+
{"params": params_without_wd, "weight_decay": 0.0},
|
163 |
+
]
|
164 |
+
|
165 |
+
|
166 |
+
def evaluate():
|
167 |
+
model.eval()
|
168 |
+
losses = []
|
169 |
+
for step, batch in enumerate(eval_dataloader):
|
170 |
+
with torch.no_grad():
|
171 |
+
outputs = model(batch, labels=batch)
|
172 |
+
loss = outputs.loss.repeat(args.valid_batch_size)
|
173 |
+
losses.append(accelerator.gather(loss))
|
174 |
+
if args.max_eval_steps > 0 and step >= args.max_eval_steps:
|
175 |
+
break
|
176 |
+
loss = torch.mean(torch.cat(losses))
|
177 |
+
|
178 |
+
try:
|
179 |
+
perplexity = torch.exp(loss)
|
180 |
+
except OverflowError:
|
181 |
+
perplexity = torch.tensor(float("inf"))
|
182 |
+
|
183 |
+
return loss.item(), perplexity.item()
|
184 |
+
|
185 |
+
|
186 |
+
# Accelerator
|
187 |
+
accelerator = Accelerator(dispatch_batches=True)
|
188 |
+
acc_state = {str(k): str(v) for k, v in accelerator.state.__dict__.items()}
|
189 |
+
|
190 |
+
project_name = "shng2025/gptesla-small"
|
191 |
+
dataset_name = "shng2025/gptesla"
|
192 |
+
|
193 |
+
# GPTesla - 111M param setup in comment. Modification to make lighter training requirement needed
|
194 |
+
config = {
|
195 |
+
"train_batch_size": 12, # 12
|
196 |
+
"valid_batch_size": 12, # 12
|
197 |
+
"weight_decay": 0.1,
|
198 |
+
"shuffle_buffer": 1000,
|
199 |
+
"learning_rate": 5e-4, # 5e-4
|
200 |
+
"lr_scheduler_type": "cosine",
|
201 |
+
"num_warmup_steps": 700, # 2000
|
202 |
+
"gradient_accumulation_steps": 1, # 1
|
203 |
+
"max_train_steps": 50000, # 150000
|
204 |
+
"max_eval_steps": 10,
|
205 |
+
"seq_length": 1024,
|
206 |
+
"seed": 1,
|
207 |
+
"save_checkpoint_steps": 50,
|
208 |
+
} # 15000
|
209 |
+
|
210 |
+
args = Namespace(**config, **acc_state)
|
211 |
+
samples_per_step = accelerator.state.num_processes * args.train_batch_size
|
212 |
+
set_seed(args.seed)
|
213 |
+
|
214 |
+
# Logging
|
215 |
+
logger, tb_writer, run_name = setup_logging(project_name.split("/")[1])
|
216 |
+
logger.info(accelerator.state)
|
217 |
+
|
218 |
+
# Load model and tokenizer
|
219 |
+
if accelerator.is_main_process:
|
220 |
+
new_branch_name = run_name
|
221 |
+
create_branch(project_name, repo_type="model", branch=new_branch_name)
|
222 |
+
hf_repo = Repository("./", clone_from=project_name, revision=run_name)
|
223 |
+
|
224 |
+
model = AutoModelForCausalLM.from_pretrained("./") # , gradient_checkpointing=True)
|
225 |
+
tokenizer = AutoTokenizer.from_pretrained("./")
|
226 |
+
|
227 |
+
# Load dataset and dataloader
|
228 |
+
train_dataloader, eval_dataloader = create_dataloaders(dataset_name)
|
229 |
+
|
230 |
+
# Prepare the optimizer and learning rate scheduler
|
231 |
+
optimizer = AdamW(get_grouped_params(model), lr=args.learning_rate)
|
232 |
+
lr_scheduler = get_scheduler(
|
233 |
+
name=args.lr_scheduler_type,
|
234 |
+
optimizer=optimizer,
|
235 |
+
num_warmup_steps=args.num_warmup_steps,
|
236 |
+
num_training_steps=args.max_train_steps,
|
237 |
+
)
|
238 |
+
|
239 |
+
|
240 |
+
def get_lr():
|
241 |
+
return optimizer.param_groups[0]["lr"]
|
242 |
+
|
243 |
+
|
244 |
+
# Prepare everything with our `accelerator` (order of args is not important)
|
245 |
+
model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
|
246 |
+
model, optimizer, train_dataloader, eval_dataloader
|
247 |
+
)
|
248 |
+
|
249 |
+
# Train model
|
250 |
+
model.train()
|
251 |
+
completed_steps = 0
|
252 |
+
for step, batch in enumerate(train_dataloader, start=1):
|
253 |
+
loss = model(batch, labels=batch).loss
|
254 |
+
log_metrics(
|
255 |
+
step,
|
256 |
+
{
|
257 |
+
"lr": get_lr(),
|
258 |
+
"samples": step * samples_per_step,
|
259 |
+
"steps": completed_steps,
|
260 |
+
"loss/train": loss.item(),
|
261 |
+
},
|
262 |
+
)
|
263 |
+
loss = loss / args.gradient_accumulation_steps
|
264 |
+
accelerator.backward(loss)
|
265 |
+
if step % args.gradient_accumulation_steps == 0:
|
266 |
+
optimizer.step()
|
267 |
+
lr_scheduler.step()
|
268 |
+
optimizer.zero_grad()
|
269 |
+
completed_steps += 1
|
270 |
+
if step % args.save_checkpoint_steps == 0:
|
271 |
+
logger.info("Evaluating and saving model checkpoint")
|
272 |
+
eval_loss, perplexity = evaluate()
|
273 |
+
log_metrics(step, {"loss/eval": eval_loss, "perplexity": perplexity})
|
274 |
+
accelerator.wait_for_everyone()
|
275 |
+
unwrapped_model = accelerator.unwrap_model(model)
|
276 |
+
if accelerator.is_main_process:
|
277 |
+
save_checkpoint_state()
|
278 |
+
unwrapped_model.save_pretrained("./")
|
279 |
+
accelerator.save_state(output_dir="my_checkpoint")
|
280 |
+
hf_repo.push_to_hub(commit_message=f"step {step}")
|
281 |
+
model.train()
|
282 |
+
if completed_steps >= args.max_train_steps:
|
283 |
+
break
|
284 |
+
|
285 |
+
|
286 |
+
# Evaluate and save the last checkpoint
|
287 |
+
logger.info("Evaluating and saving model after training")
|
288 |
+
eval_loss, perplexity = evaluate()
|
289 |
+
log_metrics(step, {"loss/eval": eval_loss, "perplexity": perplexity})
|
290 |
+
accelerator.wait_for_everyone()
|
291 |
+
unwrapped_model = accelerator.unwrap_model(model)
|
292 |
+
if accelerator.is_main_process:
|
293 |
+
unwrapped_model.save_pretrained("./")
|
294 |
+
hf_repo.push_to_hub(commit_message="final model")
|
log/debug_0.log
CHANGED
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
07/25/2024 08:52:29 - INFO - __main__ - Distributed environment: MULTI_GPU Backend: nccl
|
2 |
+
Num processes: 4
|
3 |
+
Process index: 0
|
4 |
+
Local process index: 0
|
5 |
+
Device: cuda:0
|
6 |
+
|
7 |
+
Mixed precision type: fp16
|
8 |
+
|
9 |
+
07/25/2024 08:52:30 - WARNING - huggingface_hub.repository - /dli/gptesla-small/./ is already a clone of https://huggingface.co/shng2025/gptesla-small. Make sure you pull the latest changes with `repo.git_pull()`.
|
10 |
+
07/25/2024 08:52:30 - WARNING - huggingface_hub.repository - Revision `spring-music-133` does not exist. Created and checked out branch `spring-music-133`.
|
11 |
+
07/25/2024 08:52:30 - WARNING - huggingface_hub.repository -
|
12 |
+
07/25/2024 08:52:31 - DEBUG - datasets.utils._dataset_viewer - Dataset info for shng2025/gptesla-train is not completely ready yet.
|
13 |
+
07/25/2024 08:52:32 - INFO - datasets.builder - No config specified, defaulting to the single config: gptesla-train/default
|
14 |
+
07/25/2024 08:52:32 - INFO - datasets.info - Loading Dataset Infos from /usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json
|
15 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#0, ': Starting to iterate over 2/183 shards.
|
16 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#4, ': Starting to iterate over 2/183 shards.
|
17 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#3, ': Starting to iterate over 2/183 shards.
|
18 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#5, ': Starting to iterate over 2/183 shards.
|
19 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#1, ': Starting to iterate over 2/183 shards.
|
20 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#2, ': Starting to iterate over 2/183 shards.
|
21 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#6, ': Starting to iterate over 2/183 shards.
|
22 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#7, ': Starting to iterate over 2/183 shards.
|
23 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#8, ': Starting to iterate over 2/183 shards.
|
24 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#10, ': Starting to iterate over 2/183 shards.
|
25 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#9, ': Starting to iterate over 2/183 shards.
|
26 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#12, ': Starting to iterate over 2/183 shards.
|
27 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#11, ': Starting to iterate over 2/183 shards.
|
28 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#13, ': Starting to iterate over 2/183 shards.
|
29 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#14, ': Starting to iterate over 2/183 shards.
|
30 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#15, ': Starting to iterate over 2/183 shards.
|
31 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#16, ': Starting to iterate over 2/183 shards.
|
32 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#19, ': Starting to iterate over 2/183 shards.
|
33 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#18, ': Starting to iterate over 2/183 shards.
|
34 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#17, ': Starting to iterate over 2/183 shards.
|
35 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#21, ': Starting to iterate over 2/183 shards.
|
36 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#23, ': Starting to iterate over 2/183 shards.
|
37 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#20, ': Starting to iterate over 2/183 shards.
|
38 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#25, ': Starting to iterate over 2/183 shards.
|
39 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#22, ': Starting to iterate over 2/183 shards.
|
40 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#24, ': Starting to iterate over 2/183 shards.
|
41 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#26, ': Starting to iterate over 2/183 shards.
|
42 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#27, ': Starting to iterate over 2/183 shards.
|
43 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#28, ': Starting to iterate over 2/183 shards.
|
44 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#29, ': Starting to iterate over 2/183 shards.
|
45 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#30, ': Starting to iterate over 2/183 shards.
|
46 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#31, ': Starting to iterate over 2/183 shards.
|
47 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#32, ': Starting to iterate over 2/183 shards.
|
48 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#35, ': Starting to iterate over 2/183 shards.
|
49 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#36, ': Starting to iterate over 2/183 shards.
|
50 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#33, ': Starting to iterate over 2/183 shards.
|
51 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#39, ': Starting to iterate over 2/183 shards.
|
52 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#38, ': Starting to iterate over 2/183 shards.
|
53 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#37, ': Starting to iterate over 2/183 shards.
|
54 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#34, ': Starting to iterate over 2/183 shards.
|
55 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#40, ': Starting to iterate over 2/183 shards.
|
56 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#41, ': Starting to iterate over 2/183 shards.
|
57 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#42, ': Starting to iterate over 2/183 shards.
|
58 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#43, ': Starting to iterate over 2/183 shards.
|
59 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#44, ': Starting to iterate over 2/183 shards.
|
60 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#45, ': Starting to iterate over 2/183 shards.
|
61 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#46, ': Starting to iterate over 2/183 shards.
|
62 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#47, ': Starting to iterate over 2/183 shards.
|
63 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#48, ': Starting to iterate over 2/183 shards.
|
64 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#49, ': Starting to iterate over 2/183 shards.
|
65 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#50, ': Starting to iterate over 2/183 shards.
|
66 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#51, ': Starting to iterate over 2/183 shards.
|
67 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#52, ': Starting to iterate over 2/183 shards.
|
68 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#54, ': Starting to iterate over 2/183 shards.
|
69 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#53, ': Starting to iterate over 2/183 shards.
|
70 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#55, ': Starting to iterate over 2/183 shards.
|
71 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#56, ': Starting to iterate over 2/183 shards.
|
72 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#57, ': Starting to iterate over 2/183 shards.
|
73 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#61, ': Starting to iterate over 2/183 shards.
|
74 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#62, ': Starting to iterate over 2/183 shards.
|
75 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#60, ': Starting to iterate over 2/183 shards.
|
76 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#59, ': Starting to iterate over 2/183 shards.
|
77 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#63, ': Starting to iterate over 2/183 shards.
|
78 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#58, ': Starting to iterate over 2/183 shards.
|
79 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#65, ': Starting to iterate over 2/183 shards.
|
80 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#64, ': Starting to iterate over 2/183 shards.
|
81 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#66, ': Starting to iterate over 2/183 shards.
|
82 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#67, ': Starting to iterate over 2/183 shards.
|
83 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#69, ': Starting to iterate over 2/183 shards.
|
84 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#68, ': Starting to iterate over 2/183 shards.
|
85 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#70, ': Starting to iterate over 2/183 shards.
|
86 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#73, ': Starting to iterate over 2/183 shards.
|
87 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#72, ': Starting to iterate over 2/183 shards.
|
88 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#74, ': Starting to iterate over 2/183 shards.
|
89 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#75, ': Starting to iterate over 2/183 shards.
|
90 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#71, ': Starting to iterate over 2/183 shards.
|
91 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#76, ': Starting to iterate over 2/183 shards.
|
92 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#77, ': Starting to iterate over 2/183 shards.
|
93 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#78, ': Starting to iterate over 2/183 shards.
|
94 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#79, ': Starting to iterate over 2/183 shards.
|
95 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#80, ': Starting to iterate over 2/183 shards.
|
96 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#86, ': Starting to iterate over 2/183 shards.
|
97 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#81, ': Starting to iterate over 2/183 shards.
|
98 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#85, ': Starting to iterate over 2/183 shards.
|
99 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#82, ': Starting to iterate over 2/183 shards.
|
100 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#84, ': Starting to iterate over 2/183 shards.
|
101 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#87, ': Starting to iterate over 1/183 shards.
|
102 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#83, ': Starting to iterate over 2/183 shards.
|
103 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#88, ': Starting to iterate over 1/183 shards.
|
104 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#91, ': Starting to iterate over 1/183 shards.
|
105 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#90, ': Starting to iterate over 1/183 shards.
|
106 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#93, ': Starting to iterate over 1/183 shards.
|
107 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#92, ': Starting to iterate over 1/183 shards.
|
108 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#89, ': Starting to iterate over 1/183 shards.
|
109 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#94, ': Starting to iterate over 1/183 shards.
|
110 |
+
07/25/2024 08:52:37 - DEBUG - datasets.iterable_dataset - dataloader worker#95, ': Starting to iterate over 1/183 shards.
|
111 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10500930 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
112 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10492277 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
113 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10489635 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
114 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10522596 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
115 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10486023 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
116 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10486023 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720.
|
117 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10486397 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
118 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10497062 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
119 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10536479 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
120 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10512203 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
121 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10491327 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
122 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10489599 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
123 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10525688 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
124 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10863935 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
125 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10495973 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
126 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10668116 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
127 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10511604 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720.
|
128 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488385 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
129 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488651 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
130 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10487482 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
131 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10497218 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
132 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10486172 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
133 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488651 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720.
|
134 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10621496 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
135 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10491272 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
136 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10511500 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
137 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10501535 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
138 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10553677 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
139 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10485918 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
140 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10499607 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
141 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10486616 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
142 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10525926 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
143 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10552417 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
144 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10553677 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720.
|
145 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488098 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
146 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10552417 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720.
|
147 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10493913 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
148 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10486616 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720.
|
149 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10686322 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
150 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10492861 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
151 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488608 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
152 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10492861 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720.
|
153 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10500290 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
154 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10491547 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
155 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488150 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
156 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10486276 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
157 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10640425 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
158 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10530453 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720.
|
159 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10492554 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
160 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10487097 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
161 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10487790 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
162 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10509262 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720.
|
163 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10498167 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
164 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10610581 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
165 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10487725 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
166 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10489575 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
167 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10751338 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
168 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10495520 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
169 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10486801 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
170 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10495520 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720.
|
171 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10949076 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
172 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10497335 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
173 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10509286 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
174 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 11286262 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
175 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10509286 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720.
|
176 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10497111 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
177 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10497111 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720.
|
178 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 11286262 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720.
|
179 |
+
07/25/2024 08:52:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10676628 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
180 |
+
07/25/2024 08:52:39 - DEBUG - datasets.packaged_modules.json.json - Batch of 10491889 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720.
|
181 |
+
07/25/2024 08:52:39 - DEBUG - datasets.packaged_modules.json.json - Batch of 10562022 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
182 |
+
07/25/2024 08:52:39 - DEBUG - datasets.packaged_modules.json.json - Batch of 10485842 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
183 |
+
07/25/2024 08:52:39 - DEBUG - datasets.packaged_modules.json.json - Batch of 11115863 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
184 |
+
07/25/2024 08:52:39 - DEBUG - datasets.packaged_modules.json.json - Batch of 10485847 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
185 |
+
07/25/2024 08:52:39 - DEBUG - datasets.packaged_modules.json.json - Batch of 10485912 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
186 |
+
07/25/2024 08:52:39 - DEBUG - datasets.packaged_modules.json.json - Batch of 10515063 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
187 |
+
07/25/2024 08:52:39 - DEBUG - datasets.packaged_modules.json.json - Batch of 10485912 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720.
|
188 |
+
07/25/2024 08:52:39 - DEBUG - datasets.packaged_modules.json.json - Batch of 10499106 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
189 |
+
07/25/2024 08:52:39 - DEBUG - datasets.packaged_modules.json.json - Batch of 10598254 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
190 |
+
07/25/2024 08:52:39 - DEBUG - datasets.packaged_modules.json.json - Batch of 10511515 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360.
|
191 |
+
07/25/2024 08:52:56 - INFO - __main__ - Step 1: {'lr': 0.0, 'samples': 48, 'steps': 0, 'loss/train': 10.554669380187988}
|
192 |
+
07/25/2024 08:52:57 - INFO - __main__ - Step 2: {'lr': 7.142857142857143e-07, 'samples': 96, 'steps': 1, 'loss/train': 10.494059562683105}
|
193 |
+
07/25/2024 08:52:58 - INFO - __main__ - Step 3: {'lr': 1.4285714285714286e-06, 'samples': 144, 'steps': 2, 'loss/train': 10.507988929748535}
|
194 |
+
07/25/2024 08:52:58 - INFO - __main__ - Step 4: {'lr': 2.142857142857143e-06, 'samples': 192, 'steps': 3, 'loss/train': 10.415447235107422}
|
195 |
+
07/25/2024 08:52:58 - INFO - __main__ - Step 5: {'lr': 2.8571428571428573e-06, 'samples': 240, 'steps': 4, 'loss/train': 10.345850944519043}
|
196 |
+
07/25/2024 08:52:59 - INFO - __main__ - Step 6: {'lr': 3.5714285714285714e-06, 'samples': 288, 'steps': 5, 'loss/train': 10.195524215698242}
|
197 |
+
07/25/2024 08:52:59 - INFO - __main__ - Step 7: {'lr': 4.285714285714286e-06, 'samples': 336, 'steps': 6, 'loss/train': 10.09341812133789}
|
198 |
+
07/25/2024 08:52:59 - INFO - __main__ - Step 8: {'lr': 5e-06, 'samples': 384, 'steps': 7, 'loss/train': 9.965239524841309}
|
199 |
+
07/25/2024 08:52:59 - INFO - __main__ - Step 9: {'lr': 5.7142857142857145e-06, 'samples': 432, 'steps': 8, 'loss/train': 9.698853492736816}
|
200 |
+
07/25/2024 08:53:00 - INFO - __main__ - Step 10: {'lr': 6.428571428571429e-06, 'samples': 480, 'steps': 9, 'loss/train': 9.80683708190918}
|
201 |
+
07/25/2024 08:53:00 - INFO - __main__ - Step 11: {'lr': 7.142857142857143e-06, 'samples': 528, 'steps': 10, 'loss/train': 9.633079528808594}
|
202 |
+
07/25/2024 08:53:00 - INFO - __main__ - Step 12: {'lr': 7.857142857142858e-06, 'samples': 576, 'steps': 11, 'loss/train': 9.700591087341309}
|
203 |
+
07/25/2024 08:53:00 - INFO - __main__ - Step 13: {'lr': 8.571428571428573e-06, 'samples': 624, 'steps': 12, 'loss/train': 9.603139877319336}
|
204 |
+
07/25/2024 08:53:01 - INFO - __main__ - Step 14: {'lr': 9.285714285714286e-06, 'samples': 672, 'steps': 13, 'loss/train': 9.30308723449707}
|
205 |
+
07/25/2024 08:53:01 - INFO - __main__ - Step 15: {'lr': 1e-05, 'samples': 720, 'steps': 14, 'loss/train': 9.333526611328125}
|
206 |
+
07/25/2024 08:53:01 - INFO - __main__ - Step 16: {'lr': 1.0714285714285714e-05, 'samples': 768, 'steps': 15, 'loss/train': 8.336181640625}
|
207 |
+
07/25/2024 08:53:02 - INFO - __main__ - Step 17: {'lr': 1.1428571428571429e-05, 'samples': 816, 'steps': 16, 'loss/train': 9.075631141662598}
|
208 |
+
07/25/2024 08:53:02 - INFO - __main__ - Step 18: {'lr': 1.2142857142857142e-05, 'samples': 864, 'steps': 17, 'loss/train': 9.18478012084961}
|
209 |
+
07/25/2024 08:53:02 - INFO - __main__ - Step 19: {'lr': 1.2857142857142857e-05, 'samples': 912, 'steps': 18, 'loss/train': 8.96328353881836}
|
210 |
+
07/25/2024 08:53:02 - INFO - __main__ - Step 20: {'lr': 1.3571428571428572e-05, 'samples': 960, 'steps': 19, 'loss/train': 9.45018196105957}
|
211 |
+
07/25/2024 08:53:03 - INFO - __main__ - Step 21: {'lr': 1.4285714285714285e-05, 'samples': 1008, 'steps': 20, 'loss/train': 8.517333984375}
|
212 |
+
07/25/2024 08:53:03 - INFO - __main__ - Step 22: {'lr': 1.5e-05, 'samples': 1056, 'steps': 21, 'loss/train': 9.207684516906738}
|
213 |
+
07/25/2024 08:53:03 - INFO - __main__ - Step 23: {'lr': 1.5714285714285715e-05, 'samples': 1104, 'steps': 22, 'loss/train': 8.681092262268066}
|
214 |
+
07/25/2024 08:53:04 - INFO - __main__ - Step 24: {'lr': 1.642857142857143e-05, 'samples': 1152, 'steps': 23, 'loss/train': 8.316036224365234}
|
215 |
+
07/25/2024 08:53:04 - INFO - __main__ - Step 25: {'lr': 1.7142857142857145e-05, 'samples': 1200, 'steps': 24, 'loss/train': 8.944169044494629}
|
216 |
+
07/25/2024 08:53:04 - INFO - __main__ - Step 26: {'lr': 1.7857142857142855e-05, 'samples': 1248, 'steps': 25, 'loss/train': 8.878201484680176}
|
217 |
+
07/25/2024 08:53:04 - INFO - __main__ - Step 27: {'lr': 1.8571428571428572e-05, 'samples': 1296, 'steps': 26, 'loss/train': 9.158102989196777}
|
218 |
+
07/25/2024 08:53:05 - INFO - __main__ - Step 28: {'lr': 1.9285714285714285e-05, 'samples': 1344, 'steps': 27, 'loss/train': 9.14354419708252}
|
219 |
+
07/25/2024 08:53:05 - INFO - __main__ - Step 29: {'lr': 2e-05, 'samples': 1392, 'steps': 28, 'loss/train': 8.860624313354492}
|
220 |
+
07/25/2024 08:53:05 - INFO - __main__ - Step 30: {'lr': 2.0714285714285715e-05, 'samples': 1440, 'steps': 29, 'loss/train': 8.876450538635254}
|
221 |
+
07/25/2024 08:53:05 - INFO - __main__ - Step 31: {'lr': 2.1428571428571428e-05, 'samples': 1488, 'steps': 30, 'loss/train': 8.425738334655762}
|
222 |
+
07/25/2024 08:53:06 - INFO - __main__ - Step 32: {'lr': 2.214285714285714e-05, 'samples': 1536, 'steps': 31, 'loss/train': 8.942279815673828}
|
223 |
+
07/25/2024 08:53:06 - INFO - __main__ - Step 33: {'lr': 2.2857142857142858e-05, 'samples': 1584, 'steps': 32, 'loss/train': 8.757084846496582}
|
224 |
+
07/25/2024 08:53:06 - INFO - __main__ - Step 34: {'lr': 2.3571428571428575e-05, 'samples': 1632, 'steps': 33, 'loss/train': 8.699286460876465}
|
225 |
+
07/25/2024 08:53:07 - INFO - __main__ - Step 35: {'lr': 2.4285714285714285e-05, 'samples': 1680, 'steps': 34, 'loss/train': 8.857367515563965}
|
226 |
+
07/25/2024 08:53:07 - INFO - __main__ - Step 36: {'lr': 2.5e-05, 'samples': 1728, 'steps': 35, 'loss/train': 8.830195426940918}
|
227 |
+
07/25/2024 08:53:07 - INFO - __main__ - Step 37: {'lr': 2.5714285714285714e-05, 'samples': 1776, 'steps': 36, 'loss/train': 8.944982528686523}
|
228 |
+
07/25/2024 08:53:07 - INFO - __main__ - Step 38: {'lr': 2.642857142857143e-05, 'samples': 1824, 'steps': 37, 'loss/train': 8.670278549194336}
|
229 |
+
07/25/2024 08:53:08 - INFO - __main__ - Step 39: {'lr': 2.7142857142857144e-05, 'samples': 1872, 'steps': 38, 'loss/train': 8.710525512695312}
|
230 |
+
07/25/2024 08:53:08 - INFO - __main__ - Step 40: {'lr': 2.7857142857142858e-05, 'samples': 1920, 'steps': 39, 'loss/train': 7.902089595794678}
|
231 |
+
07/25/2024 08:53:08 - INFO - __main__ - Step 41: {'lr': 2.857142857142857e-05, 'samples': 1968, 'steps': 40, 'loss/train': 8.400484085083008}
|
232 |
+
07/25/2024 08:53:09 - INFO - __main__ - Step 42: {'lr': 2.9285714285714288e-05, 'samples': 2016, 'steps': 41, 'loss/train': 8.789310455322266}
|
233 |
+
07/25/2024 08:53:09 - INFO - __main__ - Step 43: {'lr': 3e-05, 'samples': 2064, 'steps': 42, 'loss/train': 8.754344940185547}
|
234 |
+
07/25/2024 08:53:09 - INFO - __main__ - Step 44: {'lr': 3.071428571428572e-05, 'samples': 2112, 'steps': 43, 'loss/train': 8.84192943572998}
|
235 |
+
07/25/2024 08:53:09 - INFO - __main__ - Step 45: {'lr': 3.142857142857143e-05, 'samples': 2160, 'steps': 44, 'loss/train': 8.784793853759766}
|
236 |
+
07/25/2024 08:53:10 - INFO - __main__ - Step 46: {'lr': 3.214285714285714e-05, 'samples': 2208, 'steps': 45, 'loss/train': 8.67403793334961}
|
237 |
+
07/25/2024 08:53:10 - INFO - __main__ - Step 47: {'lr': 3.285714285714286e-05, 'samples': 2256, 'steps': 46, 'loss/train': 8.51427173614502}
|
238 |
+
07/25/2024 08:53:10 - INFO - __main__ - Step 48: {'lr': 3.357142857142857e-05, 'samples': 2304, 'steps': 47, 'loss/train': 8.48193073272705}
|
239 |
+
07/25/2024 08:53:11 - INFO - __main__ - Step 49: {'lr': 3.428571428571429e-05, 'samples': 2352, 'steps': 48, 'loss/train': 8.518038749694824}
|
240 |
+
07/25/2024 08:53:11 - INFO - __main__ - Step 50: {'lr': 3.5000000000000004e-05, 'samples': 2400, 'steps': 49, 'loss/train': 8.63569450378418}
|
241 |
+
07/25/2024 08:53:11 - INFO - __main__ - Evaluating and saving model checkpoint
|
242 |
+
07/25/2024 08:53:11 - DEBUG - datasets.iterable_dataset - dataloader worker#0, ': Starting to iterate over 1/1 shards.
|
243 |
+
07/25/2024 08:53:15 - INFO - __main__ - Step 50: {'loss/eval': 8.551246643066406, 'perplexity': 5173.19970703125}
|
244 |
+
07/25/2024 08:53:17 - INFO - accelerate.accelerator - Saving current state to my_checkpoint
|
245 |
+
07/25/2024 08:53:17 - WARNING - accelerate.utils.other - Removed shared tensor {'lm_head.weight'} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading
|
246 |
+
07/25/2024 08:53:17 - INFO - accelerate.checkpointing - Model weights saved in my_checkpoint/model.safetensors
|
247 |
+
07/25/2024 08:53:18 - INFO - accelerate.checkpointing - Optimizer state saved in my_checkpoint/optimizer.bin
|
248 |
+
07/25/2024 08:53:18 - INFO - accelerate.checkpointing - Sampler state for dataloader 0 saved in my_checkpoint/sampler.bin
|
249 |
+
07/25/2024 08:53:18 - INFO - accelerate.checkpointing - Sampler state for dataloader 1 saved in my_checkpoint/sampler_1.bin
|
250 |
+
07/25/2024 08:53:18 - INFO - accelerate.checkpointing - Gradient scaler state saved in my_checkpoint/scaler.pt
|
251 |
+
07/25/2024 08:53:18 - INFO - accelerate.checkpointing - Random states saved in my_checkpoint/random_states_0.pkl
|
log/debug_1.log
ADDED
File without changes
|
log/debug_2.log
ADDED
File without changes
|
log/debug_3.log
ADDED
File without changes
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 444048000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7eb13d61f8d3f9cb945838abd62b274c030a665710e11a81e40523fd167676e8
|
3 |
size 444048000
|
my_checkpoint/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7eb13d61f8d3f9cb945838abd62b274c030a665710e11a81e40523fd167676e8
|
3 |
+
size 444048000
|
my_checkpoint/optimizer.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e17945a190a7f13238a46e415a9d197e06e630393a465b4eb7395e34ac34cec8
|
3 |
+
size 888189882
|
my_checkpoint/random_states_0.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:836e8625e01d845543882c69cc524c4b56d536fd88b56370ad8514f35d1bb70a
|
3 |
+
size 15124
|
my_checkpoint/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a25e9003157e2a9d5bc24e837058bba42dd027da3fe38540cbd3087114c4852
|
3 |
+
size 988
|
runs/Jul25_08-52-29_lab/1721897549.8326483/events.out.tfevents.1721897549.lab.173640.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82dd56089ec6da1327772bb7d8f5b4a5a3c9944a6931c711fc00b9c2041f22e8
|
3 |
+
size 1702
|
runs/Jul25_08-52-29_lab/events.out.tfevents.1721897549.lab.173640.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:efe4990776a9c6cce5b8ec11cc26772e0cd6ba8171b3da17b37ce5ec0ba30f26
|
3 |
+
size 8983
|
torch_checkpoint/latest_checkpoint.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6acfce111e2cba64d47b90b5254719f18d4d83442132547a370710c348c28faa
|
3 |
+
size 1333274074
|