liam168 commited on
Commit
cf12fe8
1 Parent(s): 53e37cb

feat: new model loss=0.18

Browse files
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  language: zh
3
  widget:
4
- - text: "谁用谁知道?"
5
  license: apache-2.0
6
  ---
7
 
 
1
  ---
2
  language: zh
3
  widget:
4
+ - text: "你们宿舍都是这么厉害的人吗"
5
  license: apache-2.0
6
  ---
7
 
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "activation_function": "gelu_new",
3
  "architectures": [
4
  "GPT2LMHeadModel"
@@ -7,15 +8,18 @@
7
  "bos_token_id": 50256,
8
  "embd_pdrop": 0.1,
9
  "eos_token_id": 50256,
 
10
  "initializer_range": 0.02,
11
  "layer_norm_epsilon": 1e-05,
12
  "model_type": "gpt2",
13
  "n_ctx": 1024,
14
  "n_embd": 768,
15
  "n_head": 12,
 
16
  "n_layer": 12,
17
  "n_positions": 1024,
18
  "resid_pdrop": 0.1,
 
19
  "summary_activation": null,
20
  "summary_first_dropout": 0.1,
21
  "summary_proj_to_labels": true,
@@ -26,5 +30,8 @@
26
  "max_length": 1000
27
  }
28
  },
 
 
 
29
  "vocab_size": 50257
30
- }
 
1
  {
2
+ "_name_or_path": "liam168/chat-DialoGPT-small-zh",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
8
  "bos_token_id": 50256,
9
  "embd_pdrop": 0.1,
10
  "eos_token_id": 50256,
11
+ "gradient_checkpointing": false,
12
  "initializer_range": 0.02,
13
  "layer_norm_epsilon": 1e-05,
14
  "model_type": "gpt2",
15
  "n_ctx": 1024,
16
  "n_embd": 768,
17
  "n_head": 12,
18
+ "n_inner": null,
19
  "n_layer": 12,
20
  "n_positions": 1024,
21
  "resid_pdrop": 0.1,
22
+ "scale_attn_weights": true,
23
  "summary_activation": null,
24
  "summary_first_dropout": 0.1,
25
  "summary_proj_to_labels": true,
 
30
  "max_length": 1000
31
  }
32
  },
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.9.1",
35
+ "use_cache": true,
36
  "vocab_size": 50257
37
+ }
merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2
2
  Ġ t
3
  Ġ a
4
  h e
 
1
+ #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ t
3
  Ġ a
4
  h e
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6e2be6d12f77bf925a89f2381c6c685f4739752289039d590622d37d48a153d
3
- size 351265583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c0c35f7cd706af19ac4e930c98fb109a868df4cd50c4d8739e40ffe31f25213
3
+ size 510403817
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"model_max_length": 1024}
 
1
+ {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "microsoft/DialoGPT-small", "errors": "replace", "tokenizer_class": "GPT2Tokenizer"}
vocab.json CHANGED
The diff for this file is too large to render. See raw diff