Update train_model.py
Browse files- train_model.py +1 -1
train_model.py
CHANGED
|
@@ -77,7 +77,7 @@ def collate_fn(batch):
|
|
| 77 |
return {"input_ids": input_ids, "labels": labels}
|
| 78 |
|
| 79 |
|
| 80 |
-
print("[*] Gathering
|
| 81 |
dataset = ChunkedDataset(fast_tokenizer, target_tokens=400_000_000, seq_len=256)
|
| 82 |
|
| 83 |
print("[*] Setting up model...")
|
|
|
|
| 77 |
return {"input_ids": input_ids, "labels": labels}
|
| 78 |
|
| 79 |
|
| 80 |
+
print("[*] Gathering 400 million tokens by streaming dataset...")
|
| 81 |
dataset = ChunkedDataset(fast_tokenizer, target_tokens=400_000_000, seq_len=256)
|
| 82 |
|
| 83 |
print("[*] Setting up model...")
|