herooooooooo commited on
Commit
9730a7c
1 Parent(s): e9f531f

End of training

Browse files
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: HuggingFaceTB/SmolLM2-135M-Instruct
3
  library_name: transformers
4
  model_name: SmolLM2-FT-DPO
5
  tags:
@@ -7,13 +7,13 @@ tags:
7
  - smol-course
8
  - module_1
9
  - trl
10
- - dpo
11
  licence: license
12
  ---
13
 
14
  # Model Card for SmolLM2-FT-DPO
15
 
16
- This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-135M-Instruct](https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct).
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
@@ -31,7 +31,7 @@ print(output["generated_text"])
31
 
32
 
33
 
34
- This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
35
 
36
  ### Framework versions
37
 
@@ -43,16 +43,14 @@ This model was trained with DPO, a method introduced in [Direct Preference Optim
43
 
44
  ## Citations
45
 
46
- Cite DPO as:
47
 
48
  ```bibtex
49
- @inproceedings{rafailov2023direct,
50
- title = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}},
51
- author = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D. Manning and Stefano Ermon and Chelsea Finn},
52
- year = 2023,
53
- booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023},
54
- url = {http://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html},
55
- editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine},
56
  }
57
  ```
58
 
 
1
  ---
2
+ base_model: HuggingFaceTB/SmolLM2-135M
3
  library_name: transformers
4
  model_name: SmolLM2-FT-DPO
5
  tags:
 
7
  - smol-course
8
  - module_1
9
  - trl
10
+ - orpo
11
  licence: license
12
  ---
13
 
14
  # Model Card for SmolLM2-FT-DPO
15
 
16
+ This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-135M](https://huggingface.co/HuggingFaceTB/SmolLM2-135M).
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
 
31
 
32
 
33
 
34
+ This model was trained with ORPO, a method introduced in [ORPO: Monolithic Preference Optimization without Reference Model](https://huggingface.co/papers/2403.07691).
35
 
36
  ### Framework versions
37
 
 
43
 
44
  ## Citations
45
 
46
+ Cite ORPO as:
47
 
48
  ```bibtex
49
+ @article{hong2024orpo,
50
+ title = {{ORPO: Monolithic Preference Optimization without Reference Model}},
51
+ author = {Jiwoo Hong and Noah Lee and James Thorne},
52
+ year = 2024,
53
+ eprint = {arXiv:2403.07691}
 
 
54
  }
55
  ```
56
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "HuggingFaceTB/SmolLM2-135M-Instruct",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -27,12 +27,6 @@
27
  "rope_theta": 100000,
28
  "tie_word_embeddings": true,
29
  "torch_dtype": "float32",
30
- "transformers.js_config": {
31
- "kv_cache_dtype": {
32
- "fp16": "float16",
33
- "q4f16": "float16"
34
- }
35
- },
36
  "transformers_version": "4.46.3",
37
  "use_cache": false,
38
  "vocab_size": 49152
 
1
  {
2
+ "_name_or_path": "HuggingFaceTB/SmolLM2-135M",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
27
  "rope_theta": 100000,
28
  "tie_word_embeddings": true,
29
  "torch_dtype": "float32",
 
 
 
 
 
 
30
  "transformers_version": "4.46.3",
31
  "use_cache": false,
32
  "vocab_size": 49152
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7e1657ccd6e39fa88fcca33599ae74086b3dd1d46f510f33caa61a5fea966d6
3
  size 538090408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f7da85b15739814cbcce4ec029d72732ecd57fce57b09e2d26eeb7ab1ab3390
3
  size 538090408
special_tokens_map.json CHANGED
@@ -1,22 +1,22 @@
1
  {
2
  "additional_special_tokens": [
3
- "<|im_start|>",
4
- "<|im_end|>"
 
 
 
 
 
 
 
 
 
 
 
 
5
  ],
6
- "bos_token": {
7
- "content": "<|im_start|>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false
12
- },
13
- "eos_token": {
14
- "content": "<|im_end|>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false
19
- },
20
  "pad_token": "<|im_end|>",
21
  "unk_token": {
22
  "content": "<|endoftext|>",
 
1
  {
2
  "additional_special_tokens": [
3
+ {
4
+ "content": "<|im_start|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|im_end|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
  ],
18
+ "bos_token": "<|im_start|>",
19
+ "eos_token": "<|im_end|>",
 
 
 
 
 
 
 
 
 
 
 
 
20
  "pad_token": "<|im_end|>",
21
  "unk_token": {
22
  "content": "<|endoftext|>",
tokenizer_config.json CHANGED
@@ -143,10 +143,10 @@
143
  "<|im_end|>"
144
  ],
145
  "bos_token": "<|im_start|>",
146
- "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
147
  "clean_up_tokenization_spaces": false,
148
  "eos_token": "<|im_end|>",
149
- "model_max_length": 2048,
150
  "pad_token": "<|im_end|>",
151
  "tokenizer_class": "GPT2Tokenizer",
152
  "unk_token": "<|endoftext|>",
 
143
  "<|im_end|>"
144
  ],
145
  "bos_token": "<|im_start|>",
146
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
147
  "clean_up_tokenization_spaces": false,
148
  "eos_token": "<|im_end|>",
149
+ "model_max_length": 8192,
150
  "pad_token": "<|im_end|>",
151
  "tokenizer_class": "GPT2Tokenizer",
152
  "unk_token": "<|endoftext|>",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57df37f86fb0145a1bd5a0cc521883aa03d074a6eeaa107c4db4e93252f32bdd
3
- size 6072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5390d712a959eec35ccf055a7a5c0fd1d56fca644baeaf98f599d5c8e104f882
3
+ size 5560