update model
Browse files- README.md +8 -5
- config.json +1 -1
- generation_config.json +1 -3
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- tokenizer_config.json +2 -2
README.md
CHANGED
@@ -10,20 +10,23 @@ tags:
|
|
10 |
---
|
11 |
|
12 |
# Storm-7B
|
13 |
-
- **Developed by**: [Jie Liu](https://jieliu.site/) \\(^{*1,2}\\), [Zhanhui Zhou](https://scholar.google.com/citations?user=SbACfYQAAAAJ&hl=zh-CN) \\(^{*2}\\), [Chao Yang](https://scholar.google.com/citations?user=5KRbHPMAAAAJ&hl=zh-CN) \\(^{2}\\), [Han-Sen Zhong](https://scholar.google.com.hk/citations?user=X_ZfX8sAAAAJ&hl=zh-CN) \\(^{2}\\), [Wanli Ouyang](https://wlouyang.github.io/) \\(^{1,2}\\).
|
14 |
- \\(^{1}\\)MMLab, The Chinese University of Hong Kong   \\(^{2}\\)Shanghai AI Laboratory
|
15 |
|
16 |
## Introduction
|
17 |
|
18 |
-
We released Storm-7B, the first open-source language model comparable to the GPT-4 series on the [AlpacaEval 2.0](https://tatsu-lab.github.io/alpaca_eval/) leaderboard
|
19 |
|
20 |
-
|
21 |
|
22 |
-
A snapshot of the AlpacaEval 2.0 leaderboard (2024/
|
23 |
|
24 |
| | **LC Win Rate** | **Win Rate** |
|
25 |
| :----------------------: | :-------------: | :----------: |
|
26 |
| GPT-4 Turbo (04/09) | 55.0% | 46.1% |
|
|
|
|
|
|
|
27 |
| GPT-4 Preview (11/06) | 50.0% | 50.0% |
|
28 |
| **Storm-7B** | 48.9% | 52.5% |
|
29 |
| Nanbeige Plus Chat v0.1 | 44.5% | 56.7% |
|
@@ -105,4 +108,4 @@ Storm-7B is a quick demonstration that a language model, fine-tuned with AI feed
|
|
105 |
month = {April},
|
106 |
year = {2024}
|
107 |
}
|
108 |
-
```
|
|
|
10 |
---
|
11 |
|
12 |
# Storm-7B
|
13 |
+
- **Developed by**: [Jie Liu](https://jieliu.site/) \\(^{*1,2}\\), [Zhanhui Zhou](https://scholar.google.com/citations?user=SbACfYQAAAAJ&hl=zh-CN) \\(^{*2}\\), [Jiaheng Liu](https://liujiaheng.github.io/) \\(^{2}\\), [Xingyuan Bu](https://scholar.google.com.hk/citations?user=cqYaRhUAAAAJ&hl=zh-CN) \\(^{2}\\), [Chao Yang](https://scholar.google.com/citations?user=5KRbHPMAAAAJ&hl=zh-CN) \\(^{2}\\), [Han-Sen Zhong](https://scholar.google.com.hk/citations?user=X_ZfX8sAAAAJ&hl=zh-CN) \\(^{\dag 2}\\), [Wanli Ouyang](https://wlouyang.github.io/) \\(^{1,2}\\).
|
14 |
- \\(^{1}\\)MMLab, The Chinese University of Hong Kong   \\(^{2}\\)Shanghai AI Laboratory
|
15 |
|
16 |
## Introduction
|
17 |
|
18 |
+
We released Storm-7B, the first open-source language model comparable to the GPT-4 series on the [AlpacaEval 2.0](https://tatsu-lab.github.io/alpaca_eval/) leaderboard.
|
19 |
|
20 |
+
Recent studies show that DPO benefits from iterative training with online preferences labeled by a trained reward model. In this work, we identify a pitfall of vanilla iterative DPO - improved response quality can lead to increased verbosity. To address this, we introduce iterative length-regularized DPO (iLR-DPO) to penalize response length. Our empirical results show that iLR-DPO can enhance a 7B model to perform on par with GPT-4 without increasing verbosity.
|
21 |
|
22 |
+
A snapshot of the AlpacaEval 2.0 leaderboard (Single Model, 2024/6/18) is listed below:
|
23 |
|
24 |
| | **LC Win Rate** | **Win Rate** |
|
25 |
| :----------------------: | :-------------: | :----------: |
|
26 |
| GPT-4 Turbo (04/09) | 55.0% | 46.1% |
|
27 |
+
| GPT-4 Turbo (04/09) | 55.0% | 46.1% |
|
28 |
+
| GPT-4 Turbo (04/09) | 55.0% | 46.1% |
|
29 |
+
| GPT-4 Turbo (04/09) | 55.0% | 46.1% |
|
30 |
| GPT-4 Preview (11/06) | 50.0% | 50.0% |
|
31 |
| **Storm-7B** | 48.9% | 52.5% |
|
32 |
| Nanbeige Plus Chat v0.1 | 44.5% | 56.7% |
|
|
|
108 |
month = {April},
|
109 |
year = {2024}
|
110 |
}
|
111 |
+
```
|
config.json
CHANGED
@@ -20,7 +20,7 @@
|
|
20 |
"sliding_window": 4096,
|
21 |
"tie_word_embeddings": false,
|
22 |
"torch_dtype": "bfloat16",
|
23 |
-
"transformers_version": "4.
|
24 |
"use_cache": true,
|
25 |
"vocab_size": 32002
|
26 |
}
|
|
|
20 |
"sliding_window": 4096,
|
21 |
"tie_word_embeddings": false,
|
22 |
"torch_dtype": "bfloat16",
|
23 |
+
"transformers_version": "4.38.2",
|
24 |
"use_cache": true,
|
25 |
"vocab_size": 32002
|
26 |
}
|
generation_config.json
CHANGED
@@ -2,7 +2,5 @@
|
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 1,
|
4 |
"eos_token_id": 32000,
|
5 |
-
"
|
6 |
-
"pad_token_id": 0,
|
7 |
-
"transformers_version": "4.39.0.dev0"
|
8 |
}
|
|
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 1,
|
4 |
"eos_token_id": 32000,
|
5 |
+
"transformers_version": "4.38.2"
|
|
|
|
|
6 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4943178720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df041f6d2171d08e1bda07a6845f802ced8f57a676ea5dfcd1f5eb179e7133b8
|
3 |
size 4943178720
|
model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999819336
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:378bd392458e446bd89d75d09584985ca1370861761eb16d117ddc941a558348
|
3 |
size 4999819336
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4540532728
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c5c3b7cd892a46a5a39dc1d569833aecfade13e3b1eaba4e37e9a53ec0ffb27
|
3 |
size 4540532728
|
tokenizer_config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"add_bos_token":
|
3 |
"add_eos_token": false,
|
4 |
"added_tokens_decoder": {
|
5 |
"0": {
|
@@ -52,7 +52,7 @@
|
|
52 |
"clean_up_tokenization_spaces": false,
|
53 |
"eos_token": "<|end_of_turn|>",
|
54 |
"legacy": true,
|
55 |
-
"model_max_length":
|
56 |
"pad_token": "<|end_of_turn|>",
|
57 |
"sp_model_kwargs": {},
|
58 |
"spaces_between_special_tokens": false,
|
|
|
1 |
{
|
2 |
+
"add_bos_token": false,
|
3 |
"add_eos_token": false,
|
4 |
"added_tokens_decoder": {
|
5 |
"0": {
|
|
|
52 |
"clean_up_tokenization_spaces": false,
|
53 |
"eos_token": "<|end_of_turn|>",
|
54 |
"legacy": true,
|
55 |
+
"model_max_length": 2048,
|
56 |
"pad_token": "<|end_of_turn|>",
|
57 |
"sp_model_kwargs": {},
|
58 |
"spaces_between_special_tokens": false,
|