Upload src/config.yaml with huggingface_hub
Browse files- src/config.yaml +46 -0
src/config.yaml
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
name: "answerdotai/ModernBERT-base"
|
| 3 |
+
loss_function:
|
| 4 |
+
name: "SentimentWeightedLoss" # Options: "SentimentWeightedLoss", "SentimentFocalLoss"
|
| 5 |
+
# Parameters for the chosen loss function.
|
| 6 |
+
# For SentimentFocalLoss, common params are:
|
| 7 |
+
# gamma_focal: 1.0 # (e.g., 2.0 for standard, -2.0 for reversed, 0 for none)
|
| 8 |
+
# label_smoothing_epsilon: 0.05 # (e.g., 0.0 to 0.1)
|
| 9 |
+
# For SentimentWeightedLoss, params is empty:
|
| 10 |
+
params:
|
| 11 |
+
gamma_focal: 1.0
|
| 12 |
+
label_smoothing_epsilon: 0.05
|
| 13 |
+
output_dir: "checkpoints"
|
| 14 |
+
max_length: 880 # 256
|
| 15 |
+
dropout: 0.1
|
| 16 |
+
# --- Pooling Strategy --- #
|
| 17 |
+
# Options: "cls", "mean", "cls_mean_concat", "weighted_layer", "cls_weighted_concat"
|
| 18 |
+
# "cls" uses just the [CLS] token for classification
|
| 19 |
+
# "mean" uses mean pooling over final hidden states for classification
|
| 20 |
+
# "cls_mean_concat" uses both [CLS] and mean pooling over final hidden states for classification
|
| 21 |
+
# "weighted_layer" uses a weighted combination of the final hidden states from the top N layers for classification
|
| 22 |
+
# "cls_weighted_concat" uses a weighted combination of the final hidden states from the top N layers and the [CLS] token for classification
|
| 23 |
+
|
| 24 |
+
pooling_strategy: "mean" # Current default, change as needed
|
| 25 |
+
|
| 26 |
+
num_weighted_layers: 6 # Number of top BERT layers to use for 'weighted_layer' strategies (e.g., 1 to 12 for BERT-base)
|
| 27 |
+
|
| 28 |
+
data:
|
| 29 |
+
# No specific data paths needed as we use HF datasets at the moment
|
| 30 |
+
|
| 31 |
+
training:
|
| 32 |
+
epochs: 6
|
| 33 |
+
batch_size: 16
|
| 34 |
+
lr: 1e-5 # 1e-5 # 2.0e-5
|
| 35 |
+
weight_decay_rate: 0.02 # 0.01
|
| 36 |
+
resume_from_checkpoint: "" # "checkpoints/mean_epoch2_0.9361acc_0.9355f1.pt" # Path to checkpoint file, or empty to not resume
|
| 37 |
+
|
| 38 |
+
inference:
|
| 39 |
+
# Default path, can be overridden
|
| 40 |
+
model_path: "checkpoints/mean_epoch5_0.9575acc_0.9575f1.pt"
|
| 41 |
+
# Using the same max_length as training for consistency
|
| 42 |
+
max_length: 880 # 256
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
# "answerdotai/ModernBERT-base"
|
| 46 |
+
# "answerdotai/ModernBERT-large"
|