Commit ·
a9f2764
1
Parent(s): 483d812
feat: update model
Browse files- classification_report.txt +11 -11
- config.json +1 -1
- confusion_matrix.png +2 -2
- model.safetensors +1 -1
- special_tokens_map.json +3 -15
- test_results.json +2 -2
- tokenizer.json +2 -2
- tokenizer_config.json +3 -55
- training_curves.png +2 -2
- training_scripts/run_training_manual.sh +3 -2
- training_scripts/train_nfqa_model.py +24 -53
classification_report.txt
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
precision recall f1-score support
|
| 2 |
|
| 3 |
-
NOT-A-QUESTION 0.
|
| 4 |
-
FACTOID 0.
|
| 5 |
-
DEBATE 0.
|
| 6 |
-
EVIDENCE-BASED 0.
|
| 7 |
-
INSTRUCTION 0.
|
| 8 |
-
REASON 0.
|
| 9 |
-
EXPERIENCE 0.
|
| 10 |
-
COMPARISON 0.
|
| 11 |
|
| 12 |
-
accuracy 0.
|
| 13 |
-
macro avg 0.
|
| 14 |
-
weighted avg 0.
|
|
|
|
| 1 |
precision recall f1-score support
|
| 2 |
|
| 3 |
+
NOT-A-QUESTION 0.99 0.99 0.99 557
|
| 4 |
+
FACTOID 0.92 0.87 0.90 896
|
| 5 |
+
DEBATE 0.92 0.96 0.94 472
|
| 6 |
+
EVIDENCE-BASED 0.88 0.95 0.91 568
|
| 7 |
+
INSTRUCTION 0.95 0.94 0.94 662
|
| 8 |
+
REASON 0.94 0.94 0.94 493
|
| 9 |
+
EXPERIENCE 0.86 0.85 0.85 686
|
| 10 |
+
COMPARISON 0.96 0.96 0.96 679
|
| 11 |
|
| 12 |
+
accuracy 0.92 5013
|
| 13 |
+
macro avg 0.93 0.93 0.93 5013
|
| 14 |
+
weighted avg 0.93 0.92 0.92 5013
|
config.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1118
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6f552aee9c2bd981c72a9e0daa6cf9e9a6d343e718338f8462d958b1b9cd73b
|
| 3 |
size 1118
|
confusion_matrix.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1112223464
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:061cc36ce4649a1ca2c988c042eece9a041a7bae6589619c177ef053fdbadeb5
|
| 3 |
size 1112223464
|
special_tokens_map.json
CHANGED
|
@@ -1,15 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"eos_token": "</s>",
|
| 5 |
-
"mask_token": {
|
| 6 |
-
"content": "<mask>",
|
| 7 |
-
"lstrip": true,
|
| 8 |
-
"normalized": false,
|
| 9 |
-
"rstrip": false,
|
| 10 |
-
"single_word": false
|
| 11 |
-
},
|
| 12 |
-
"pad_token": "<pad>",
|
| 13 |
-
"sep_token": "</s>",
|
| 14 |
-
"unk_token": "<unk>"
|
| 15 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06e405a36dfe4b9604f484f6a1e619af1a7f7d09e34a8555eb0b77b66318067f
|
| 3 |
+
size 280
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_results.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f85874cae37b57c474bb4450d166414c18f4e141ad6eefc3233030664397ecc
|
| 3 |
+
size 805
|
tokenizer.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
|
| 3 |
+
size 17082734
|
tokenizer_config.json
CHANGED
|
@@ -1,55 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
"content": "<s>",
|
| 5 |
-
"lstrip": false,
|
| 6 |
-
"normalized": false,
|
| 7 |
-
"rstrip": false,
|
| 8 |
-
"single_word": false,
|
| 9 |
-
"special": true
|
| 10 |
-
},
|
| 11 |
-
"1": {
|
| 12 |
-
"content": "<pad>",
|
| 13 |
-
"lstrip": false,
|
| 14 |
-
"normalized": false,
|
| 15 |
-
"rstrip": false,
|
| 16 |
-
"single_word": false,
|
| 17 |
-
"special": true
|
| 18 |
-
},
|
| 19 |
-
"2": {
|
| 20 |
-
"content": "</s>",
|
| 21 |
-
"lstrip": false,
|
| 22 |
-
"normalized": false,
|
| 23 |
-
"rstrip": false,
|
| 24 |
-
"single_word": false,
|
| 25 |
-
"special": true
|
| 26 |
-
},
|
| 27 |
-
"3": {
|
| 28 |
-
"content": "<unk>",
|
| 29 |
-
"lstrip": false,
|
| 30 |
-
"normalized": false,
|
| 31 |
-
"rstrip": false,
|
| 32 |
-
"single_word": false,
|
| 33 |
-
"special": true
|
| 34 |
-
},
|
| 35 |
-
"250001": {
|
| 36 |
-
"content": "<mask>",
|
| 37 |
-
"lstrip": true,
|
| 38 |
-
"normalized": false,
|
| 39 |
-
"rstrip": false,
|
| 40 |
-
"single_word": false,
|
| 41 |
-
"special": true
|
| 42 |
-
}
|
| 43 |
-
},
|
| 44 |
-
"bos_token": "<s>",
|
| 45 |
-
"clean_up_tokenization_spaces": false,
|
| 46 |
-
"cls_token": "<s>",
|
| 47 |
-
"eos_token": "</s>",
|
| 48 |
-
"extra_special_tokens": {},
|
| 49 |
-
"mask_token": "<mask>",
|
| 50 |
-
"model_max_length": 512,
|
| 51 |
-
"pad_token": "<pad>",
|
| 52 |
-
"sep_token": "</s>",
|
| 53 |
-
"tokenizer_class": "XLMRobertaTokenizer",
|
| 54 |
-
"unk_token": "<unk>"
|
| 55 |
-
}
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccf223ba3d5b3cc7fa6c3bf451f3bb40557a5c92b0aa33f63d17802ff1a96fd9
|
| 3 |
+
size 1178
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training_curves.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
training_scripts/run_training_manual.sh
CHANGED
|
@@ -26,7 +26,7 @@ EPOCHS=6
|
|
| 26 |
BATCH_SIZE=16
|
| 27 |
LEARNING_RATE=2e-5
|
| 28 |
MAX_LENGTH=128
|
| 29 |
-
|
| 30 |
WEIGHT_DECAY=0.1
|
| 31 |
DROPOUT=0.2
|
| 32 |
|
|
@@ -44,6 +44,7 @@ echo " Epochs: $EPOCHS"
|
|
| 44 |
echo " Batch size: $BATCH_SIZE"
|
| 45 |
echo " Learning rate: $LEARNING_RATE"
|
| 46 |
echo " Max length: $MAX_LENGTH"
|
|
|
|
| 47 |
echo " Weight decay: $WEIGHT_DECAY"
|
| 48 |
echo " Dropout: $DROPOUT"
|
| 49 |
echo ""
|
|
@@ -84,7 +85,7 @@ python train_nfqa_model.py \
|
|
| 84 |
--batch-size "$BATCH_SIZE" \
|
| 85 |
--learning-rate "$LEARNING_RATE" \
|
| 86 |
--max-length "$MAX_LENGTH" \
|
| 87 |
-
--warmup-
|
| 88 |
--weight-decay "$WEIGHT_DECAY" \
|
| 89 |
--dropout "$DROPOUT" \
|
| 90 |
"$@" # Pass any additional arguments from command line
|
|
|
|
| 26 |
BATCH_SIZE=16
|
| 27 |
LEARNING_RATE=2e-5
|
| 28 |
MAX_LENGTH=128
|
| 29 |
+
WARMUP_RATIO=0.1
|
| 30 |
WEIGHT_DECAY=0.1
|
| 31 |
DROPOUT=0.2
|
| 32 |
|
|
|
|
| 44 |
echo " Batch size: $BATCH_SIZE"
|
| 45 |
echo " Learning rate: $LEARNING_RATE"
|
| 46 |
echo " Max length: $MAX_LENGTH"
|
| 47 |
+
echo " Warmup ratio: $WARMUP_RATIO"
|
| 48 |
echo " Weight decay: $WEIGHT_DECAY"
|
| 49 |
echo " Dropout: $DROPOUT"
|
| 50 |
echo ""
|
|
|
|
| 85 |
--batch-size "$BATCH_SIZE" \
|
| 86 |
--learning-rate "$LEARNING_RATE" \
|
| 87 |
--max-length "$MAX_LENGTH" \
|
| 88 |
+
--warmup-ratio "$WARMUP_RATIO" \
|
| 89 |
--weight-decay "$WEIGHT_DECAY" \
|
| 90 |
--dropout "$DROPOUT" \
|
| 91 |
"$@" # Pass any additional arguments from command line
|
training_scripts/train_nfqa_model.py
CHANGED
|
@@ -26,6 +26,7 @@ from torch.utils.data import Dataset, DataLoader
|
|
| 26 |
from torch.optim import AdamW
|
| 27 |
from transformers import (
|
| 28 |
AutoTokenizer,
|
|
|
|
| 29 |
AutoModelForSequenceClassification,
|
| 30 |
get_linear_schedule_with_warmup
|
| 31 |
)
|
|
@@ -113,6 +114,7 @@ def train_epoch(model, train_loader, optimizer, scheduler, device):
|
|
| 113 |
labels = batch['labels'].to(device)
|
| 114 |
|
| 115 |
# Forward pass
|
|
|
|
| 116 |
outputs = model(
|
| 117 |
input_ids=input_ids,
|
| 118 |
attention_mask=attention_mask,
|
|
@@ -123,7 +125,6 @@ def train_epoch(model, train_loader, optimizer, scheduler, device):
|
|
| 123 |
total_loss += loss.item()
|
| 124 |
|
| 125 |
# Backward pass
|
| 126 |
-
optimizer.zero_grad()
|
| 127 |
loss.backward()
|
| 128 |
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
|
| 129 |
optimizer.step()
|
|
@@ -245,45 +246,6 @@ def load_data(file_path):
|
|
| 245 |
raise
|
| 246 |
|
| 247 |
|
| 248 |
-
def create_data_splits(questions, labels, test_size=0.2, val_size=0.1):
|
| 249 |
-
"""Create train/val/test splits"""
|
| 250 |
-
print("\nCreating data splits...")
|
| 251 |
-
|
| 252 |
-
# First split: separate test set
|
| 253 |
-
train_val_questions, test_questions, train_val_labels, test_labels = train_test_split(
|
| 254 |
-
questions,
|
| 255 |
-
labels,
|
| 256 |
-
test_size=test_size,
|
| 257 |
-
random_state=RANDOM_SEED,
|
| 258 |
-
stratify=labels
|
| 259 |
-
)
|
| 260 |
-
|
| 261 |
-
# Second split: separate validation from training
|
| 262 |
-
train_questions, val_questions, train_labels, val_labels = train_test_split(
|
| 263 |
-
train_val_questions,
|
| 264 |
-
train_val_labels,
|
| 265 |
-
test_size=val_size / (1 - test_size),
|
| 266 |
-
random_state=RANDOM_SEED,
|
| 267 |
-
stratify=train_val_labels
|
| 268 |
-
)
|
| 269 |
-
|
| 270 |
-
print(f"\nData splits:")
|
| 271 |
-
print(f" Training: {len(train_questions):4d} examples ({len(train_questions)/len(questions)*100:5.1f}%)")
|
| 272 |
-
print(f" Validation: {len(val_questions):4d} examples ({len(val_questions)/len(questions)*100:5.1f}%)")
|
| 273 |
-
print(f" Test: {len(test_questions):4d} examples ({len(test_questions)/len(questions)*100:5.1f}%)")
|
| 274 |
-
print(f" Total: {len(questions):4d} examples")
|
| 275 |
-
|
| 276 |
-
# Verify class distribution
|
| 277 |
-
print("\nClass distribution per split:")
|
| 278 |
-
for split_name, split_labels in [('Train', train_labels), ('Val', val_labels), ('Test', test_labels)]:
|
| 279 |
-
counts = Counter(split_labels)
|
| 280 |
-
print(f"\n{split_name}:")
|
| 281 |
-
for label_id in sorted(counts.keys()):
|
| 282 |
-
cat_name = ID2LABEL[label_id]
|
| 283 |
-
print(f" {cat_name:20s}: {counts[label_id]:3d}")
|
| 284 |
-
|
| 285 |
-
return train_questions, val_questions, test_questions, train_labels, val_labels, test_labels
|
| 286 |
-
|
| 287 |
|
| 288 |
def plot_training_curves(history, best_val_f1, output_dir):
|
| 289 |
"""Plot and save training curves"""
|
|
@@ -495,8 +457,8 @@ def main():
|
|
| 495 |
help='Number of epochs (default: 10)')
|
| 496 |
parser.add_argument('--learning-rate', type=float, default=2e-5,
|
| 497 |
help='Learning rate (default: 2e-5)')
|
| 498 |
-
parser.add_argument('--warmup-
|
| 499 |
-
help='
|
| 500 |
parser.add_argument('--weight-decay', type=float, default=0.01,
|
| 501 |
help='Weight decay (default: 0.01)')
|
| 502 |
parser.add_argument('--dropout', type=float, default=0.1,
|
|
@@ -543,6 +505,7 @@ def main():
|
|
| 543 |
print(f"Learning rate: {args.learning_rate}")
|
| 544 |
print(f"Max length: {args.max_length}")
|
| 545 |
print(f"Weight decay: {args.weight_decay}")
|
|
|
|
| 546 |
print(f"Dropout: {args.dropout}")
|
| 547 |
print("="*80 + "\n")
|
| 548 |
|
|
@@ -621,14 +584,20 @@ def main():
|
|
| 621 |
print("✓ Tokenizer loaded")
|
| 622 |
|
| 623 |
print(f"\nLoading model: {args.model_name}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 624 |
model = AutoModelForSequenceClassification.from_pretrained(
|
| 625 |
args.model_name,
|
| 626 |
-
|
| 627 |
-
id2label=ID2LABEL,
|
| 628 |
-
label2id=LABEL2ID,
|
| 629 |
-
hidden_dropout_prob=args.dropout,
|
| 630 |
-
attention_probs_dropout_prob=args.dropout,
|
| 631 |
-
classifier_dropout=args.dropout
|
| 632 |
)
|
| 633 |
model.to(device)
|
| 634 |
|
|
@@ -659,15 +628,16 @@ def main():
|
|
| 659 |
)
|
| 660 |
|
| 661 |
total_steps = len(train_loader) * args.epochs
|
|
|
|
| 662 |
scheduler = get_linear_schedule_with_warmup(
|
| 663 |
optimizer,
|
| 664 |
-
num_warmup_steps=
|
| 665 |
num_training_steps=total_steps
|
| 666 |
)
|
| 667 |
|
| 668 |
print(f"\n✓ Optimizer and scheduler configured")
|
| 669 |
print(f" Total training steps: {total_steps}")
|
| 670 |
-
print(f" Warmup steps: {args.
|
| 671 |
|
| 672 |
# Training loop
|
| 673 |
history = {
|
|
@@ -692,12 +662,12 @@ def main():
|
|
| 692 |
# Train
|
| 693 |
train_loss, train_acc = train_epoch(model, train_loader, optimizer, scheduler, device)
|
| 694 |
|
| 695 |
-
# Validate
|
| 696 |
val_loss, val_acc, val_f1, val_preds, val_true = evaluate(
|
| 697 |
model, val_loader, device,
|
| 698 |
languages=val_langs,
|
| 699 |
desc="Validating",
|
| 700 |
-
show_analysis=
|
| 701 |
)
|
| 702 |
|
| 703 |
# Update history
|
|
@@ -820,7 +790,8 @@ def main():
|
|
| 820 |
'batch_size': args.batch_size,
|
| 821 |
'learning_rate': args.learning_rate,
|
| 822 |
'num_epochs': args.epochs,
|
| 823 |
-
'
|
|
|
|
| 824 |
'weight_decay': args.weight_decay,
|
| 825 |
'dropout': args.dropout,
|
| 826 |
'data_source': 'pre-split' if has_split_inputs else 'single_file',
|
|
|
|
| 26 |
from torch.optim import AdamW
|
| 27 |
from transformers import (
|
| 28 |
AutoTokenizer,
|
| 29 |
+
AutoConfig,
|
| 30 |
AutoModelForSequenceClassification,
|
| 31 |
get_linear_schedule_with_warmup
|
| 32 |
)
|
|
|
|
| 114 |
labels = batch['labels'].to(device)
|
| 115 |
|
| 116 |
# Forward pass
|
| 117 |
+
optimizer.zero_grad()
|
| 118 |
outputs = model(
|
| 119 |
input_ids=input_ids,
|
| 120 |
attention_mask=attention_mask,
|
|
|
|
| 125 |
total_loss += loss.item()
|
| 126 |
|
| 127 |
# Backward pass
|
|
|
|
| 128 |
loss.backward()
|
| 129 |
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
|
| 130 |
optimizer.step()
|
|
|
|
| 246 |
raise
|
| 247 |
|
| 248 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
def plot_training_curves(history, best_val_f1, output_dir):
|
| 251 |
"""Plot and save training curves"""
|
|
|
|
| 457 |
help='Number of epochs (default: 10)')
|
| 458 |
parser.add_argument('--learning-rate', type=float, default=2e-5,
|
| 459 |
help='Learning rate (default: 2e-5)')
|
| 460 |
+
parser.add_argument('--warmup-ratio', type=float, default=0.1,
|
| 461 |
+
help='Fraction of total training steps used for warmup (default: 0.1)')
|
| 462 |
parser.add_argument('--weight-decay', type=float, default=0.01,
|
| 463 |
help='Weight decay (default: 0.01)')
|
| 464 |
parser.add_argument('--dropout', type=float, default=0.1,
|
|
|
|
| 505 |
print(f"Learning rate: {args.learning_rate}")
|
| 506 |
print(f"Max length: {args.max_length}")
|
| 507 |
print(f"Weight decay: {args.weight_decay}")
|
| 508 |
+
print(f"Warmup ratio: {args.warmup_ratio}")
|
| 509 |
print(f"Dropout: {args.dropout}")
|
| 510 |
print("="*80 + "\n")
|
| 511 |
|
|
|
|
| 584 |
print("✓ Tokenizer loaded")
|
| 585 |
|
| 586 |
print(f"\nLoading model: {args.model_name}")
|
| 587 |
+
|
| 588 |
+
# Configure dropout BEFORE instantiating the model
|
| 589 |
+
config = AutoConfig.from_pretrained(args.model_name)
|
| 590 |
+
config.num_labels = len(NFQA_CATEGORIES)
|
| 591 |
+
config.id2label = ID2LABEL
|
| 592 |
+
config.label2id = LABEL2ID
|
| 593 |
+
config.hidden_dropout_prob = args.dropout
|
| 594 |
+
config.attention_probs_dropout_prob = args.dropout
|
| 595 |
+
config.classifier_dropout = args.dropout
|
| 596 |
+
|
| 597 |
+
# Now create model with configured dropout
|
| 598 |
model = AutoModelForSequenceClassification.from_pretrained(
|
| 599 |
args.model_name,
|
| 600 |
+
config=config
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 601 |
)
|
| 602 |
model.to(device)
|
| 603 |
|
|
|
|
| 628 |
)
|
| 629 |
|
| 630 |
total_steps = len(train_loader) * args.epochs
|
| 631 |
+
warmup_steps = int(args.warmup_ratio * total_steps)
|
| 632 |
scheduler = get_linear_schedule_with_warmup(
|
| 633 |
optimizer,
|
| 634 |
+
num_warmup_steps=warmup_steps,
|
| 635 |
num_training_steps=total_steps
|
| 636 |
)
|
| 637 |
|
| 638 |
print(f"\n✓ Optimizer and scheduler configured")
|
| 639 |
print(f" Total training steps: {total_steps}")
|
| 640 |
+
print(f" Warmup steps: {warmup_steps} ({args.warmup_ratio*100:.0f}% of total)")
|
| 641 |
|
| 642 |
# Training loop
|
| 643 |
history = {
|
|
|
|
| 662 |
# Train
|
| 663 |
train_loss, train_acc = train_epoch(model, train_loader, optimizer, scheduler, device)
|
| 664 |
|
| 665 |
+
# Validate
|
| 666 |
val_loss, val_acc, val_f1, val_preds, val_true = evaluate(
|
| 667 |
model, val_loader, device,
|
| 668 |
languages=val_langs,
|
| 669 |
desc="Validating",
|
| 670 |
+
show_analysis=False
|
| 671 |
)
|
| 672 |
|
| 673 |
# Update history
|
|
|
|
| 790 |
'batch_size': args.batch_size,
|
| 791 |
'learning_rate': args.learning_rate,
|
| 792 |
'num_epochs': args.epochs,
|
| 793 |
+
'warmup_ratio': args.warmup_ratio,
|
| 794 |
+
'warmup_steps': warmup_steps,
|
| 795 |
'weight_decay': args.weight_decay,
|
| 796 |
'dropout': args.dropout,
|
| 797 |
'data_source': 'pre-split' if has_split_inputs else 'single_file',
|