update model

Browse files

Files changed (7) hide show

README.md +8 -5
config.json +1 -1
generation_config.json +1 -3
model-00001-of-00003.safetensors +1 -1
model-00002-of-00003.safetensors +1 -1
model-00003-of-00003.safetensors +1 -1
tokenizer_config.json +2 -2

README.md CHANGED Viewed

@@ -10,20 +10,23 @@ tags:
 ---
 # Storm-7B
-- **Developed by**: [Jie Liu](https://jieliu.site/) \\(^{*1,2}\\), [Zhanhui Zhou](https://scholar.google.com/citations?user=SbACfYQAAAAJ&hl=zh-CN) \\(^{*2}\\), [Chao Yang](https://scholar.google.com/citations?user=5KRbHPMAAAAJ&hl=zh-CN) \\(^{2}\\), [Han-Sen Zhong](https://scholar.google.com.hk/citations?user=X_ZfX8sAAAAJ&hl=zh-CN) \\(^{2}\\), [Wanli Ouyang](https://wlouyang.github.io/) \\(^{1,2}\\).
 - \\(^{1}\\)MMLab, The Chinese University of Hong Kong &ensp;  \\(^{2}\\)Shanghai AI Laboratory
 ## Introduction
-We released Storm-7B, the first open-source language model comparable to the GPT-4 series on the [AlpacaEval 2.0](https://tatsu-lab.github.io/alpaca_eval/) leaderboard, ranking 3rd in length-controlled win rate.
-The recipe for this model is simple: 1) fine-tuning from [Openchat-3.5-0106](https://huggingface.co/openchat/openchat-3.5-0106), 2) applying iterative DPO training, a variant of DPO where a language model iteratively learns from the preferences of the trained reward model. We will release our technical report and code as soon as possible.
-A snapshot of the AlpacaEval 2.0 leaderboard (2024/4/28) is listed below:
 |                          | **LC Win Rate** | **Win Rate** |
 | :----------------------: | :-------------: | :----------: |
 |   GPT-4 Turbo (04/09)    |      55.0%      |    46.1%     |
 |  GPT-4 Preview (11/06)   |      50.0%      |    50.0%     |
 |       **Storm-7B**       |      48.9%      |    52.5%     |
 | Nanbeige Plus Chat v0.1  |      44.5%      |    56.7%     |
@@ -105,4 +108,4 @@ Storm-7B is a quick demonstration that a language model, fine-tuned with AI feed
     month = {April},
     year = {2024}
 }
-```

 ---
 # Storm-7B
+- **Developed by**: [Jie Liu](https://jieliu.site/) \\(^{*1,2}\\), [Zhanhui Zhou](https://scholar.google.com/citations?user=SbACfYQAAAAJ&hl=zh-CN) \\(^{*2}\\), [Jiaheng Liu](https://liujiaheng.github.io/) \\(^{2}\\), [Xingyuan Bu](https://scholar.google.com.hk/citations?user=cqYaRhUAAAAJ&hl=zh-CN) \\(^{2}\\), [Chao Yang](https://scholar.google.com/citations?user=5KRbHPMAAAAJ&hl=zh-CN) \\(^{2}\\), [Han-Sen Zhong](https://scholar.google.com.hk/citations?user=X_ZfX8sAAAAJ&hl=zh-CN) \\(^{\dag 2}\\), [Wanli Ouyang](https://wlouyang.github.io/) \\(^{1,2}\\).
 - \\(^{1}\\)MMLab, The Chinese University of Hong Kong &ensp;  \\(^{2}\\)Shanghai AI Laboratory
 ## Introduction
+We released Storm-7B, the first open-source language model comparable to the GPT-4 series on the [AlpacaEval 2.0](https://tatsu-lab.github.io/alpaca_eval/) leaderboard.
+Recent studies show that DPO benefits from iterative training with online preferences labeled by a trained reward model. In this work, we identify a pitfall of vanilla iterative DPO - improved response quality can lead to increased verbosity. To address this, we introduce iterative length-regularized DPO (iLR-DPO) to penalize response length. Our empirical results show that iLR-DPO can enhance a 7B model to perform on par with GPT-4 without increasing verbosity.
+A snapshot of the AlpacaEval 2.0 leaderboard (Single Model, 2024/6/18) is listed below:
 |                          | **LC Win Rate** | **Win Rate** |
 | :----------------------: | :-------------: | :----------: |
 |   GPT-4 Turbo (04/09)    |      55.0%      |    46.1%     |
+|   GPT-4 Turbo (04/09)    |      55.0%      |    46.1%     |
+|   GPT-4 Turbo (04/09)    |      55.0%      |    46.1%     |
+|   GPT-4 Turbo (04/09)    |      55.0%      |    46.1%     |
 |  GPT-4 Preview (11/06)   |      50.0%      |    50.0%     |
 |       **Storm-7B**       |      48.9%      |    52.5%     |
 | Nanbeige Plus Chat v0.1  |      44.5%      |    56.7%     |
     month = {April},
     year = {2024}
 }
+```

config.json CHANGED Viewed

@@ -20,7 +20,7 @@
   "sliding_window": 4096,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.39.0.dev0",
   "use_cache": true,
   "vocab_size": 32002
 }

   "sliding_window": 4096,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
   "use_cache": true,
   "vocab_size": 32002
 }

generation_config.json CHANGED Viewed

@@ -2,7 +2,5 @@
   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": 32000,
-  "max_length": 8192,
-  "pad_token_id": 0,
-  "transformers_version": "4.39.0.dev0"
 }

   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": 32000,
+  "transformers_version": "4.38.2"
 }

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17fb5878cb1c1884f3a8ea7f2ad08541d7eeb84a637452d83794ff2fa34ef019
 size 4943178720

 version https://git-lfs.github.com/spec/v1
+oid sha256:df041f6d2171d08e1bda07a6845f802ced8f57a676ea5dfcd1f5eb179e7133b8
 size 4943178720

model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:483fb2a9a108f2bf9f9c8ff59709a57aeba70c30f298f2ed8c883b0543db828f
 size 4999819336

 version https://git-lfs.github.com/spec/v1
+oid sha256:378bd392458e446bd89d75d09584985ca1370861761eb16d117ddc941a558348
 size 4999819336

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c4de7f9195d558fbe0cdc53f1ac49dee2efb69f8ff531e19782290f37fff179f
 size 4540532728

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c5c3b7cd892a46a5a39dc1d569833aecfade13e3b1eaba4e37e9a53ec0ffb27
 size 4540532728

tokenizer_config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "add_bos_token": true,
   "add_eos_token": false,
   "added_tokens_decoder": {
     "0": {
@@ -52,7 +52,7 @@
   "clean_up_tokenization_spaces": false,
   "eos_token": "<|end_of_turn|>",
   "legacy": true,
-  "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<|end_of_turn|>",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,

 {
+  "add_bos_token": false,
   "add_eos_token": false,
   "added_tokens_decoder": {
     "0": {
   "clean_up_tokenization_spaces": false,
   "eos_token": "<|end_of_turn|>",
   "legacy": true,
+  "model_max_length": 2048,
   "pad_token": "<|end_of_turn|>",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,