Upload folder using huggingface_hub

Browse files

Files changed (14) hide show

README.md +282 -0
all_results.json +15 -0
config.json +46 -0
eval_results.json +9 -0
logs/events.out.tfevents.1724566533.nefgpu39.130290.0 +3 -0
logs/events.out.tfevents.1724568542.nefgpu39.130290.1 +3 -0
model.safetensors +3 -0
special_tokens_map.json +51 -0
tokenizer.json +0 -0
tokenizer_config.json +57 -0
train_results.json +9 -0
trainer_state.json +173 -0
training_args.bin +3 -0
vocab.txt +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,282 @@

+---
+language: fr
+license: mit
+tags:
+- deberta-v2
+- text-classification
+- review-classification
+base_model: almanach/camembertav2-base
+datasets:
+- FLUE-CLS
+metrics:
+- accuracy
+pipeline_tag: text-classification
+library_name: transformers
+widget:
+# example for the french classification model
+- text: "Le livre est très intéressant et j'ai appris beaucoup de choses."
+  example_title: Books Review
+- text: "Le film était ennuyeux et je n'ai pas aimé les acteurs."
+  example_title: DVD Review
+- text: "La musique était très bonne et j'ai adoré les paroles."
+  example_title: Music Review
+model-index:
+- name: almanach/camembertav2-base-cls
+  results:
+  - task:
+      type: text-classification
+      name: Amazon Review Classification
+    dataset:
+      type: flue-cls
+      name: FLUE-CLS
+    metrics:
+    - name: accuracy
+      type: accuracy
+      value: 0.95849
+      verified: false
+---
+# Model Card for almanach/camembertav2-base-cls
+almanach/camembertav2-base-cls is a deberta-v2 model for text classification. It is trained on the FLUE-CLS dataset for the task of Amazon Review Classification. The model achieves an accuracy of 0.95849 on the FLUE-CLS dataset.
+The model is part of the almanach/camembertav2-base family of model finetunes.
+## Model Details
+### Model Description
+- **Developed by:** Wissam Antoun (Phd Student at Almanach, Inria-Paris)
+- **Model type:** deberta-v2
+- **Language(s) (NLP):** French
+- **License:** MIT
+- **Finetuned from model [optional]:** almanach/camembertav2-base
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** https://github.com/WissamAntoun/camemberta
+- **Paper:** https://arxiv.org/abs/2411.08868
+## Uses
+The model can be used for text classification tasks in French of Movie, Music, and Book reviews from Amazon.
+## Bias, Risks, and Limitations
+The model may exhibit biases based on the training data. The model may not generalize well to other datasets or tasks. The model may also have limitations in terms of the data it was trained on.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+```python
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
+model = AutoModelForSequenceClassification.from_pretrained("almanach/camembertav2-base-cls")
+tokenizer = AutoTokenizer.from_pretrained("almanach/camembertav2-base-cls")
+classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
+classifier("Le livre est très intéressant et j'ai appris beaucoup de choses.")
+```
+## Training Details
+### Training Data
+The model is trained on the FLUE-CLS dataset.
+- Dataset Name: FLUE-CLS
+- Dataset Size:
+    - Train: 5997
+    - Test: 5999
+### Training Procedure
+Model trained with the run_classification.py script from the huggingface repository.
+#### Training Hyperparameters
+```yml
+accelerator_config: '{''split_batches'': False, ''dispatch_batches'': None, ''even_batches'':
+  True, ''use_seedable_sampler'': True, ''non_blocking'': False, ''gradient_accumulation_kwargs'':
+  None}'
+adafactor: false
+adam_beta1: 0.9
+adam_beta2: 0.999
+adam_epsilon: 1.0e-08
+auto_find_batch_size: false
+base_model: camembertv2
+base_model_name: camembertav2-base-bf16-p2-17000
+batch_eval_metrics: false
+bf16: false
+bf16_full_eval: false
+data_seed: 1.0
+dataloader_drop_last: false
+dataloader_num_workers: 0
+dataloader_persistent_workers: false
+dataloader_pin_memory: true
+dataloader_prefetch_factor: .nan
+ddp_backend: .nan
+ddp_broadcast_buffers: .nan
+ddp_bucket_cap_mb: .nan
+ddp_find_unused_parameters: .nan
+ddp_timeout: 1800
+debug: '[]'
+deepspeed: .nan
+disable_tqdm: false
+dispatch_batches: .nan
+do_eval: true
+do_predict: false
+do_train: true
+epoch: 5.984
+eval_accumulation_steps: 4
+eval_accuracy: 0.9584930821803634
+eval_delay: 0
+eval_do_concat_batches: true
+eval_loss: 0.1653172671794891
+eval_on_start: false
+eval_runtime: 85.3752
+eval_samples: 5999
+eval_samples_per_second: 70.266
+eval_steps: .nan
+eval_steps_per_second: 8.785
+eval_strategy: epoch
+eval_use_gather_object: false
+evaluation_strategy: epoch
+fp16: false
+fp16_backend: auto
+fp16_full_eval: false
+fp16_opt_level: O1
+fsdp: '[]'
+fsdp_config: '{''min_num_params'': 0, ''xla'': False, ''xla_fsdp_v2'': False, ''xla_fsdp_grad_ckpt'':
+  False}'
+fsdp_min_num_params: 0
+fsdp_transformer_layer_cls_to_wrap: .nan
+full_determinism: false
+gradient_accumulation_steps: 4
+gradient_checkpointing: false
+gradient_checkpointing_kwargs: .nan
+greater_is_better: true
+group_by_length: false
+half_precision_backend: auto
+hub_always_push: false
+hub_model_id: .nan
+hub_private_repo: false
+hub_strategy: every_save
+hub_token: <HUB_TOKEN>
+ignore_data_skip: false
+include_inputs_for_metrics: false
+include_num_input_tokens_seen: false
+include_tokens_per_second: false
+jit_mode_eval: false
+label_names: .nan
+label_smoothing_factor: 0.0
+learning_rate: 3.0e-05
+length_column_name: length
+load_best_model_at_end: true
+local_rank: 0
+log_level: debug
+log_level_replica: warning
+log_on_each_node: true
+logging_dir: /scratch/camembertv2/runs/results/flue-CLS/camembertav2-base-bf16-p2-17000/max_seq_length-1024-gradient_accumulation_steps-4-precision-fp32-learning_rate-3e-05-epochs-6-lr_scheduler-linear-warmup_steps-0/SEED-1/logs
+logging_first_step: false
+logging_nan_inf_filter: true
+logging_steps: 100
+logging_strategy: steps
+lr_scheduler_kwargs: '{}'
+lr_scheduler_type: linear
+max_grad_norm: 1.0
+max_steps: -1
+metric_for_best_model: accuracy
+mp_parameters: .nan
+name: camembertv2/runs/results/flue-CLS/camembertav2-base-bf16-p2-17000/max_seq_length-1024-gradient_accumulation_steps-4-precision-fp32-learning_rate-3e-05-epochs-6-lr_scheduler-linear-warmup_steps-0
+neftune_noise_alpha: .nan
+no_cuda: false
+num_train_epochs: 6.0
+optim: adamw_torch
+optim_args: .nan
+optim_target_modules: .nan
+output_dir: /scratch/camembertv2/runs/results/flue-CLS/camembertav2-base-bf16-p2-17000/max_seq_length-1024-gradient_accumulation_steps-4-precision-fp32-learning_rate-3e-05-epochs-6-lr_scheduler-linear-warmup_steps-0/SEED-1
+overwrite_output_dir: false
+past_index: -1
+per_device_eval_batch_size: 8
+per_device_train_batch_size: 8
+per_gpu_eval_batch_size: .nan
+per_gpu_train_batch_size: .nan
+prediction_loss_only: false
+push_to_hub: false
+push_to_hub_model_id: .nan
+push_to_hub_organization: .nan
+push_to_hub_token: <PUSH_TO_HUB_TOKEN>
+ray_scope: last
+remove_unused_columns: true
+report_to: '[''tensorboard'']'
+restore_callback_states_from_checkpoint: false
+resume_from_checkpoint: .nan
+run_name: /scratch/camembertv2/runs/results/flue-CLS/camembertav2-base-bf16-p2-17000/max_seq_length-1024-gradient_accumulation_steps-4-precision-fp32-learning_rate-3e-05-epochs-6-lr_scheduler-linear-warmup_steps-0/SEED-1
+save_on_each_node: false
+save_only_model: false
+save_safetensors: true
+save_steps: 500
+save_strategy: epoch
+save_total_limit: .nan
+seed: 1
+skip_memory_metrics: true
+split_batches: .nan
+tf32: .nan
+torch_compile: true
+torch_compile_backend: inductor
+torch_compile_mode: .nan
+torch_empty_cache_steps: .nan
+torchdynamo: .nan
+total_flos: 6620583341429724.0
+tpu_metrics_debug: false
+tpu_num_cores: .nan
+train_loss: 0.0933089647276091
+train_runtime: 1923.7045
+train_samples: 5997
+train_samples_per_second: 18.705
+train_steps_per_second: 0.583
+use_cpu: false
+use_ipex: false
+use_legacy_prediction_loop: false
+use_mps_device: false
+warmup_ratio: 0.0
+warmup_steps: 0
+weight_decay: 0.0
+```
+#### Results
+**Accuracy:** 0.95849
+## Technical Specifications
+### Model Architecture and Objective
+deberta-v2 for sequence classification.
+## Citation
+**BibTeX:**
+```bibtex
+@misc{antoun2024camembert20smarterfrench,
+      title={CamemBERT 2.0: A Smarter French Language Model Aged to Perfection},
+      author={Wissam Antoun and Francis Kulumba and Rian Touchent and Éric de la Clergerie and Benoît Sagot and Djamé Seddah},
+      year={2024},
+      eprint={2411.08868},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2411.08868},
+}
+```

all_results.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "epoch": 5.984,
+    "eval_accuracy": 0.9584930821803634,
+    "eval_loss": 0.16531726717948914,
+    "eval_runtime": 85.3752,
+    "eval_samples": 5999,
+    "eval_samples_per_second": 70.266,
+    "eval_steps_per_second": 8.785,
+    "total_flos": 6620583341429724.0,
+    "train_loss": 0.09330896472760913,
+    "train_runtime": 1923.7045,
+    "train_samples": 5997,
+    "train_samples_per_second": 18.705,
+    "train_steps_per_second": 0.583
+}

config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "_name_or_path": "/scratch/camembertv2/runs/models/camembertav2-base-bf16/post/ckpt-p2-17000/pt/discriminator/",
+  "architectures": [
+    "DebertaV2ForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 1,
+  "conv_act": "gelu",
+  "conv_kernel_size": 0,
+  "embedding_size": 768,
+  "eos_token_id": 2,
+  "finetuning_task": "cls",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "negative": 0,
+    "positive": 1
+  },
+  "layer_norm_eps": 1e-07,
+  "max_position_embeddings": 1024,
+  "max_relative_positions": -1,
+  "model_name": "camembertav2-base-bf16",
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 768,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 0,
+  "vocab_size": 32768
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 5.984,
+    "eval_accuracy": 0.9584930821803634,
+    "eval_loss": 0.16531726717948914,
+    "eval_runtime": 85.3752,
+    "eval_samples": 5999,
+    "eval_samples_per_second": 70.266,
+    "eval_steps_per_second": 8.785
+}

logs/events.out.tfevents.1724566533.nefgpu39.130290.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae5670b63a53e903afc0f718787b3193f4bcdab2cb7e5846296837027c7f8dd8
+size 10545

logs/events.out.tfevents.1724568542.nefgpu39.130290.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:43ad1f5572b764b762de1773e2c2901bbc302f3486ef692e7f22d0a4bce93acd
+size 363

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e05161ff21802b1cdcaee50bda7652dea5df3565b816df098b248c4b8a13eb8
+size 444859368

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "errors": "replace",
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": "[UNK]"
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 5.984,
+    "total_flos": 6620583341429724.0,
+    "train_loss": 0.09330896472760913,
+    "train_runtime": 1923.7045,
+    "train_samples": 5997,
+    "train_samples_per_second": 18.705,
+    "train_steps_per_second": 0.583
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,173 @@

+{
+  "best_metric": 0.9584930821803634,
+  "best_model_checkpoint": "/scratch/camembertv2/runs/results/flue-CLS/camembertav2-base-bf16-p2-17000/max_seq_length-1024-gradient_accumulation_steps-4-precision-fp32-learning_rate-3e-05-epochs-6-lr_scheduler-linear-warmup_steps-0/SEED-1/checkpoint-562",
+  "epoch": 5.984,
+  "eval_steps": 500,
+  "global_step": 1122,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 4.058077812194824,
+      "learning_rate": 2.732620320855615e-05,
+      "loss": 0.3373,
+      "step": 100
+    },
+    {
+      "epoch": 0.9973333333333333,
+      "eval_accuracy": 0.9546591098516419,
+      "eval_loss": 0.13871271908283234,
+      "eval_runtime": 86.0844,
+      "eval_samples_per_second": 69.687,
+      "eval_steps_per_second": 8.712,
+      "step": 187
+    },
+    {
+      "epoch": 1.0666666666666667,
+      "grad_norm": 14.420208930969238,
+      "learning_rate": 2.4652406417112303e-05,
+      "loss": 0.1886,
+      "step": 200
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 5.297502517700195,
+      "learning_rate": 2.197860962566845e-05,
+      "loss": 0.1315,
+      "step": 300
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.953492248708118,
+      "eval_loss": 0.15359961986541748,
+      "eval_runtime": 85.7484,
+      "eval_samples_per_second": 69.961,
+      "eval_steps_per_second": 8.747,
+      "step": 375
+    },
+    {
+      "epoch": 2.1333333333333333,
+      "grad_norm": 5.223259925842285,
+      "learning_rate": 1.93048128342246e-05,
+      "loss": 0.1186,
+      "step": 400
+    },
+    {
+      "epoch": 2.6666666666666665,
+      "grad_norm": 0.2902381420135498,
+      "learning_rate": 1.663101604278075e-05,
+      "loss": 0.0632,
+      "step": 500
+    },
+    {
+      "epoch": 2.997333333333333,
+      "eval_accuracy": 0.9584930821803634,
+      "eval_loss": 0.16531726717948914,
+      "eval_runtime": 85.8419,
+      "eval_samples_per_second": 69.884,
+      "eval_steps_per_second": 8.737,
+      "step": 562
+    },
+    {
+      "epoch": 3.2,
+      "grad_norm": 0.11738034337759018,
+      "learning_rate": 1.39572192513369e-05,
+      "loss": 0.0659,
+      "step": 600
+    },
+    {
+      "epoch": 3.7333333333333334,
+      "grad_norm": 1.2773685455322266,
+      "learning_rate": 1.1283422459893049e-05,
+      "loss": 0.0421,
+      "step": 700
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.9574929154859143,
+      "eval_loss": 0.19776488840579987,
+      "eval_runtime": 86.0955,
+      "eval_samples_per_second": 69.678,
+      "eval_steps_per_second": 8.711,
+      "step": 750
+    },
+    {
+      "epoch": 4.266666666666667,
+      "grad_norm": 0.0813373252749443,
+      "learning_rate": 8.609625668449198e-06,
+      "loss": 0.0377,
+      "step": 800
+    },
+    {
+      "epoch": 4.8,
+      "grad_norm": 0.8819140791893005,
+      "learning_rate": 5.935828877005348e-06,
+      "loss": 0.0193,
+      "step": 900
+    },
+    {
+      "epoch": 4.997333333333334,
+      "eval_accuracy": 0.9553258876479414,
+      "eval_loss": 0.22088374197483063,
+      "eval_runtime": 85.8351,
+      "eval_samples_per_second": 69.89,
+      "eval_steps_per_second": 8.738,
+      "step": 937
+    },
+    {
+      "epoch": 5.333333333333333,
+      "grad_norm": 12.588311195373535,
+      "learning_rate": 3.2620320855614974e-06,
+      "loss": 0.0196,
+      "step": 1000
+    },
+    {
+      "epoch": 5.866666666666667,
+      "grad_norm": 0.04329814016819,
+      "learning_rate": 5.882352941176471e-07,
+      "loss": 0.0222,
+      "step": 1100
+    },
+    {
+      "epoch": 5.984,
+      "eval_accuracy": 0.9554925820970162,
+      "eval_loss": 0.2227182686328888,
+      "eval_runtime": 86.0509,
+      "eval_samples_per_second": 69.715,
+      "eval_steps_per_second": 8.716,
+      "step": 1122
+    },
+    {
+      "epoch": 5.984,
+      "step": 1122,
+      "total_flos": 6620583341429724.0,
+      "train_loss": 0.09330896472760913,
+      "train_runtime": 1923.7045,
+      "train_samples_per_second": 18.705,
+      "train_steps_per_second": 0.583
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 1122,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6620583341429724.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0cbabdbec06e222418e949caaf387e771ff57de22ba0d18efc939af09fb1f22f
+size 5560

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff