End of training

Browse files

Files changed (6) hide show

README.md +10 -12
config.json +1 -7
model.safetensors +1 -1
special_tokens_map.json +16 -16
tokenizer_config.json +2 -2
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-base_model: HuggingFaceTB/SmolLM2-135M-Instruct
 library_name: transformers
 model_name: SmolLM2-FT-DPO
 tags:
@@ -7,13 +7,13 @@ tags:
 - smol-course
 - module_1
 - trl
-- dpo
 licence: license
 ---
 # Model Card for SmolLM2-FT-DPO
-This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-135M-Instruct](https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -31,7 +31,7 @@ print(output["generated_text"])
-This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
 ### Framework versions
@@ -43,16 +43,14 @@ This model was trained with DPO, a method introduced in [Direct Preference Optim
 ## Citations
-Cite DPO as:
 ```bibtex
-@inproceedings{rafailov2023direct,
-    title        = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}},
-    author       = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D. Manning and Stefano Ermon and Chelsea Finn},
-    year         = 2023,
-    booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023},
-    url          = {http://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html},
-    editor       = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine},
 }
 ```

 ---
+base_model: HuggingFaceTB/SmolLM2-135M
 library_name: transformers
 model_name: SmolLM2-FT-DPO
 tags:
 - smol-course
 - module_1
 - trl
+- orpo
 licence: license
 ---
 # Model Card for SmolLM2-FT-DPO
+This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-135M](https://huggingface.co/HuggingFaceTB/SmolLM2-135M).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
+This model was trained with ORPO, a method introduced in [ORPO: Monolithic Preference Optimization without Reference Model](https://huggingface.co/papers/2403.07691).
 ### Framework versions
 ## Citations
+Cite ORPO as:
 ```bibtex
+@article{hong2024orpo,
+    title        = {{ORPO: Monolithic Preference Optimization without Reference Model}},
+    author       = {Jiwoo Hong and Noah Lee and James Thorne},
+    year         = 2024,
+    eprint       = {arXiv:2403.07691}
 }
 ```

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "HuggingFaceTB/SmolLM2-135M-Instruct",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -27,12 +27,6 @@
   "rope_theta": 100000,
   "tie_word_embeddings": true,
   "torch_dtype": "float32",
-  "transformers.js_config": {
-    "kv_cache_dtype": {
-      "fp16": "float16",
-      "q4f16": "float16"
-    }
-  },
   "transformers_version": "4.46.3",
   "use_cache": false,
   "vocab_size": 49152

 {
+  "_name_or_path": "HuggingFaceTB/SmolLM2-135M",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "rope_theta": 100000,
   "tie_word_embeddings": true,
   "torch_dtype": "float32",
   "transformers_version": "4.46.3",
   "use_cache": false,
   "vocab_size": 49152

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7e1657ccd6e39fa88fcca33599ae74086b3dd1d46f510f33caa61a5fea966d6
 size 538090408

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f7da85b15739814cbcce4ec029d72732ecd57fce57b09e2d26eeb7ab1ab3390
 size 538090408

special_tokens_map.json CHANGED Viewed

@@ -1,22 +1,22 @@
 {
   "additional_special_tokens": [
-    "<|im_start|>",
-    "<|im_end|>"
   ],
-  "bos_token": {
-    "content": "<|im_start|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "<|im_end|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
   "pad_token": "<|im_end|>",
   "unk_token": {
     "content": "<|endoftext|>",

 {
   "additional_special_tokens": [
+    {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
   ],
+  "bos_token": "<|im_start|>",
+  "eos_token": "<|im_end|>",
   "pad_token": "<|im_end|>",
   "unk_token": {
     "content": "<|endoftext|>",

tokenizer_config.json CHANGED Viewed

@@ -143,10 +143,10 @@
     "<|im_end|>"
   ],
   "bos_token": "<|im_start|>",
-  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<|im_end|>",
-  "model_max_length": 2048,
   "pad_token": "<|im_end|>",
   "tokenizer_class": "GPT2Tokenizer",
   "unk_token": "<|endoftext|>",

     "<|im_end|>"
   ],
   "bos_token": "<|im_start|>",
+  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<|im_end|>",
+  "model_max_length": 8192,
   "pad_token": "<|im_end|>",
   "tokenizer_class": "GPT2Tokenizer",
   "unk_token": "<|endoftext|>",

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57df37f86fb0145a1bd5a0cc521883aa03d074a6eeaa107c4db4e93252f32bdd
-size 6072

 version https://git-lfs.github.com/spec/v1
+oid sha256:5390d712a959eec35ccf055a7a5c0fd1d56fca644baeaf98f599d5c8e104f882
+size 5560