eson commited on
Commit
24b4aa5
1 Parent(s): 1f833af
Files changed (1) hide show
  1. examples.py +3 -3
examples.py CHANGED
@@ -19,10 +19,10 @@ https://www.computerhope.com/jargon/s/specchar.htm
19
 
20
  examples = {
21
  "en": [
22
- ["number: (10086 + 98) = 100184", "llama", "bloom"],
23
- ["whitespace: 2spaces 8spaces\t1tab\t\t2tab\n1newline", "llama", "chatglm2_6b"], # chatglm 有blank_n,
24
  # !?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏.
25
- ["punctuation: ,.:/?+=\",。!?;【】〔〕〖〗", "baichuan", "llama"],
26
  ["symbol: 🦙❤❥웃유♋☮✊☏☢☚✔☑♚▢♪✈✞÷↑↓▤▥⊙■□▣▽¿─│♥❣▬▫☿Ⓐ ✋✉☣☤", "baichuan", "llama"],
27
  ],
28
  "zh": [
 
19
 
20
  examples = {
21
  "en": [
22
+ ["number: (10086 + 98) = 100184", "llama", "bloom"], #
23
+ ["whitespace: 2spaces 8spaces\t1tab\t\t2tab\n1newline", "llama", "bert_base_cased"], # chatglm 有blank_n, bert丢掉了空格,
24
  # !?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏.
25
+ ["punctuation: ,.:/?+=\",。!?;【】〔〕〖〗", "gemma_7b", "llama"], # llama词典有点小
26
  ["symbol: 🦙❤❥웃유♋☮✊☏☢☚✔☑♚▢♪✈✞÷↑↓▤▥⊙■□▣▽¿─│♥❣▬▫☿Ⓐ ✋✉☣☤", "baichuan", "llama"],
27
  ],
28
  "zh": [