stuser2023 commited on
Commit
b4f9b06
1 Parent(s): de67124

Training in progress, step 100

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:088ddd859f285bd514a2d44ada19fd5d02ebbd304af2b9418086293f439d88c2
3
  size 13648432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c253e3b8ea390507bec38d4cd4108ed044ff83d0350a7847c109ab20b616c010
3
  size 13648432
runs/Aug13_04-02-27_b9e94a7a51e6/events.out.tfevents.1723521785.b9e94a7a51e6.1221.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:115a8e962d7a0310eedc868aee391cdbcb8a78c0c81210ac7b7f3cd0bf1e2576
3
+ size 8180
special_tokens_map.json CHANGED
@@ -7,11 +7,11 @@
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|end_of_text|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|end_of_text|>"
17
  }
 
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|eot_id|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "<|eot_id|>"
17
  }
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
@@ -2329,10 +2334,69 @@
2329
  ]
2330
  },
2331
  "post_processor": {
2332
- "type": "ByteLevel",
2333
- "add_prefix_space": true,
2334
- "trim_offsets": false,
2335
- "use_regex": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2336
  },
2337
  "decoder": {
2338
  "type": "ByteLevel",
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 384,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
2334
  ]
2335
  },
2336
  "post_processor": {
2337
+ "type": "Sequence",
2338
+ "processors": [
2339
+ {
2340
+ "type": "ByteLevel",
2341
+ "add_prefix_space": true,
2342
+ "trim_offsets": false,
2343
+ "use_regex": true
2344
+ },
2345
+ {
2346
+ "type": "TemplateProcessing",
2347
+ "single": [
2348
+ {
2349
+ "SpecialToken": {
2350
+ "id": "<|begin_of_text|>",
2351
+ "type_id": 0
2352
+ }
2353
+ },
2354
+ {
2355
+ "Sequence": {
2356
+ "id": "A",
2357
+ "type_id": 0
2358
+ }
2359
+ }
2360
+ ],
2361
+ "pair": [
2362
+ {
2363
+ "SpecialToken": {
2364
+ "id": "<|begin_of_text|>",
2365
+ "type_id": 0
2366
+ }
2367
+ },
2368
+ {
2369
+ "Sequence": {
2370
+ "id": "A",
2371
+ "type_id": 0
2372
+ }
2373
+ },
2374
+ {
2375
+ "SpecialToken": {
2376
+ "id": "<|begin_of_text|>",
2377
+ "type_id": 1
2378
+ }
2379
+ },
2380
+ {
2381
+ "Sequence": {
2382
+ "id": "B",
2383
+ "type_id": 1
2384
+ }
2385
+ }
2386
+ ],
2387
+ "special_tokens": {
2388
+ "<|begin_of_text|>": {
2389
+ "id": "<|begin_of_text|>",
2390
+ "ids": [
2391
+ 128000
2392
+ ],
2393
+ "tokens": [
2394
+ "<|begin_of_text|>"
2395
+ ]
2396
+ }
2397
+ }
2398
+ }
2399
+ ]
2400
  },
2401
  "decoder": {
2402
  "type": "ByteLevel",
tokenizer_config.json CHANGED
@@ -2050,15 +2050,15 @@
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
2053
- "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
2054
  "clean_up_tokenization_spaces": true,
2055
- "eos_token": "<|end_of_text|>",
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 1000000000000000019884624838656,
2061
- "pad_token": "<|end_of_text|>",
2062
  "padding": true,
2063
  "tokenizer_class": "PreTrainedTokenizerFast"
2064
  }
 
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|eot_id|>",
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 1000000000000000019884624838656,
2061
+ "pad_token": "<|eot_id|>",
2062
  "padding": true,
2063
  "tokenizer_class": "PreTrainedTokenizerFast"
2064
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2db8defe94aac0ebca2eb97c73227028f82790485650719d36f914b4c9962bbb
3
- size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47641c6b737dde6c6974d716300e2fb818c971a50dcd81739117ce7f1a8497d1
3
+ size 5432