--- language: - en license: apache-2.0 tags: - generated_from_trainer datasets: - glue metrics: - accuracy model-index: - name: first_try results: - task: name: Text Classification type: text-classification dataset: name: GLUE SST2 type: glue config: sst2 split: validation args: sst2 metrics: - name: Accuracy type: accuracy value: 0.9013761467889908 --- # first_try This model is a fine-tuned version of [google/bert_uncased_L-8_H-512_A-8](https://huggingface.co/google/bert_uncased_L-8_H-512_A-8) on the GLUE SST2 dataset. It achieves the following results on the evaluation set: - Loss: 0.3631 - Accuracy: 0.9014 ## Model description More information needed ## Intended uses & limitations More information needed ## Training and evaluation data More information needed ## Training procedure ### Training hyperparameters The following hyperparameters were used during training: - learning_rate: 2e-05 - train_batch_size: 32 - eval_batch_size: 128 - seed: 42 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 - lr_scheduler_type: linear - num_epochs: 6 - mixed_precision_training: Native AMP ### Training results | Training Loss | Epoch | Step | Validation Loss | Accuracy | | |:-------------:|:-----:|:-----:|:---------------:|:--------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| | 0.3066 | 1.0 | 2105 | 0.3816 | 0.8807 | OrderedDict([(, {0: 192, 1: 128, 2: 128, 3: 256, 4: 192, 5: 64, 6: 192, 7: 128, 8: 1656, 9: 1319, 10: 1194, 11: 1545, 12: 1531, 13: 1382, 14: 1248, 15: 1027})]) | | 0.3066 | 1.0 | 2105 | 0.3869 | 0.8888 | OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})]) | | 0.1629 | 2.0 | 4210 | 0.3857 | 0.8899 | OrderedDict([(, {0: 192, 1: 128, 2: 128, 3: 256, 4: 192, 5: 64, 6: 192, 7: 128, 8: 1656, 9: 1319, 10: 1194, 11: 1545, 12: 1531, 13: 1382, 14: 1248, 15: 1027})]) | | 0.1629 | 2.0 | 4210 | 0.3559 | 0.9083 | OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})]) | | 0.0923 | 3.0 | 6315 | 0.4101 | 0.8968 | OrderedDict([(, {0: 192, 1: 128, 2: 128, 3: 256, 4: 192, 5: 64, 6: 192, 7: 128, 8: 1656, 9: 1319, 10: 1194, 11: 1545, 12: 1531, 13: 1382, 14: 1248, 15: 1027})]) | | 0.0923 | 3.0 | 6315 | 0.3862 | 0.9014 | OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})]) | | 0.0644 | 4.0 | 8420 | 0.3818 | 0.9048 | OrderedDict([(, {0: 192, 1: 128, 2: 128, 3: 256, 4: 192, 5: 64, 6: 192, 7: 128, 8: 1656, 9: 1319, 10: 1194, 11: 1545, 12: 1531, 13: 1382, 14: 1248, 15: 1027})]) | | 0.0644 | 4.0 | 8420 | 0.3599 | 0.9037 | OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})]) | | 0.0424 | 5.0 | 10525 | 0.4138 | 0.8968 | OrderedDict([(, {0: 192, 1: 128, 2: 128, 3: 256, 4: 192, 5: 64, 6: 192, 7: 128, 8: 1656, 9: 1319, 10: 1194, 11: 1545, 12: 1531, 13: 1382, 14: 1248, 15: 1027})]) | | 0.0424 | 5.0 | 10525 | 0.3742 | 0.9094 | OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})]) | | 0.0318 | 6.0 | 12630 | 0.3925 | 0.8956 | OrderedDict([(, {0: 192, 1: 128, 2: 128, 3: 256, 4: 192, 5: 64, 6: 192, 7: 128, 8: 1656, 9: 1319, 10: 1194, 11: 1545, 12: 1531, 13: 1382, 14: 1248, 15: 1027})]) | | 0.0318 | 6.0 | 12630 | 0.3637 | 0.9060 | OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})]) | | 0.0254 | 7.0 | 14735 | 0.3914 | 0.8945 | OrderedDict([(, {0: 192, 1: 128, 2: 128, 3: 256, 4: 192, 5: 64, 6: 192, 7: 128, 8: 1656, 9: 1319, 10: 1194, 11: 1545, 12: 1531, 13: 1382, 14: 1248, 15: 1027})]) | | 0.0254 | 7.0 | 14735 | 0.3670 | 0.9025 | OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})]) | | 0.0246 | 8.0 | 16840 | 0.3886 | 0.8956 | OrderedDict([(, {0: 192, 1: 128, 2: 128, 3: 256, 4: 192, 5: 64, 6: 192, 7: 128, 8: 1656, 9: 1319, 10: 1194, 11: 1545, 12: 1531, 13: 1382, 14: 1248, 15: 1027})]) | | 0.0246 | 8.0 | 16840 | 0.3631 | 0.9014 | OrderedDict([(, {0: 512, 1: 512, 2: 512, 3: 512, 4: 512, 5: 512, 6: 512, 7: 512, 8: 2048, 9: 2048, 10: 2048, 11: 2048, 12: 2048, 13: 2048, 14: 2048, 15: 2048})]) | ### Framework versions - Transformers 4.29.1 - Pytorch 1.12.1 - Datasets 2.13.1 - Tokenizers 0.13.3