yizhujiao commited on
Commit
77bced6
1 Parent(s): f8978d5

Training in progress, step 500

Browse files
adapter_config.json CHANGED
@@ -16,7 +16,7 @@
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
- "r": 4,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
+ "r": 16,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef29793fdeb70cd463420a499083756697b3cbcd64adef0def0ce1c84eeac0c8
3
- size 6832600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d5d5663d081dd2ed3bdc9f9aa993a3e89f77cbee9f93248ef06bae92cc88d52
3
+ size 27280152
special_tokens_map.json CHANGED
@@ -13,5 +13,11 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|eot_id|>"
 
 
 
 
 
 
17
  }
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
  }
tokenizer.json CHANGED
@@ -2306,6 +2306,15 @@
2306
  "rstrip": false,
2307
  "normalized": false,
2308
  "special": true
 
 
 
 
 
 
 
 
 
2309
  }
2310
  ],
2311
  "normalizer": null,
@@ -2329,69 +2338,85 @@
2329
  ]
2330
  },
2331
  "post_processor": {
2332
- "type": "Sequence",
2333
- "processors": [
2334
  {
2335
- "type": "ByteLevel",
2336
- "add_prefix_space": true,
2337
- "trim_offsets": false,
2338
- "use_regex": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2339
  },
2340
  {
2341
- "type": "TemplateProcessing",
2342
- "single": [
2343
- {
2344
- "SpecialToken": {
2345
- "id": "<|begin_of_text|>",
2346
- "type_id": 0
2347
- }
2348
- },
2349
- {
2350
- "Sequence": {
2351
- "id": "A",
2352
- "type_id": 0
2353
- }
2354
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2355
  ],
2356
- "pair": [
2357
- {
2358
- "SpecialToken": {
2359
- "id": "<|begin_of_text|>",
2360
- "type_id": 0
2361
- }
2362
- },
2363
- {
2364
- "Sequence": {
2365
- "id": "A",
2366
- "type_id": 0
2367
- }
2368
- },
2369
- {
2370
- "SpecialToken": {
2371
- "id": "<|begin_of_text|>",
2372
- "type_id": 1
2373
- }
2374
- },
2375
- {
2376
- "Sequence": {
2377
- "id": "B",
2378
- "type_id": 1
2379
- }
2380
- }
2381
  ],
2382
- "special_tokens": {
2383
- "<|begin_of_text|>": {
2384
- "id": "<|begin_of_text|>",
2385
- "ids": [
2386
- 128000
2387
- ],
2388
- "tokens": [
2389
- "<|begin_of_text|>"
2390
- ]
2391
- }
2392
- }
2393
  }
2394
- ]
2395
  },
2396
  "decoder": {
2397
  "type": "ByteLevel",
 
2306
  "rstrip": false,
2307
  "normalized": false,
2308
  "special": true
2309
+ },
2310
+ {
2311
+ "id": 128256,
2312
+ "content": "[PAD]",
2313
+ "single_word": false,
2314
+ "lstrip": false,
2315
+ "rstrip": false,
2316
+ "normalized": false,
2317
+ "special": true
2318
  }
2319
  ],
2320
  "normalizer": null,
 
2338
  ]
2339
  },
2340
  "post_processor": {
2341
+ "type": "TemplateProcessing",
2342
+ "single": [
2343
  {
2344
+ "SpecialToken": {
2345
+ "id": "<|begin_of_text|>",
2346
+ "type_id": 0
2347
+ }
2348
+ },
2349
+ {
2350
+ "Sequence": {
2351
+ "id": "A",
2352
+ "type_id": 0
2353
+ }
2354
+ },
2355
+ {
2356
+ "SpecialToken": {
2357
+ "id": "<|eot_id|>",
2358
+ "type_id": 0
2359
+ }
2360
+ }
2361
+ ],
2362
+ "pair": [
2363
+ {
2364
+ "SpecialToken": {
2365
+ "id": "<|begin_of_text|>",
2366
+ "type_id": 0
2367
+ }
2368
  },
2369
  {
2370
+ "Sequence": {
2371
+ "id": "A",
2372
+ "type_id": 0
2373
+ }
2374
+ },
2375
+ {
2376
+ "SpecialToken": {
2377
+ "id": "<|eot_id|>",
2378
+ "type_id": 0
2379
+ }
2380
+ },
2381
+ {
2382
+ "SpecialToken": {
2383
+ "id": "<|begin_of_text|>",
2384
+ "type_id": 1
2385
+ }
2386
+ },
2387
+ {
2388
+ "Sequence": {
2389
+ "id": "B",
2390
+ "type_id": 1
2391
+ }
2392
+ },
2393
+ {
2394
+ "SpecialToken": {
2395
+ "id": "<|eot_id|>",
2396
+ "type_id": 1
2397
+ }
2398
+ }
2399
+ ],
2400
+ "special_tokens": {
2401
+ "<|begin_of_text|>": {
2402
+ "id": "<|begin_of_text|>",
2403
+ "ids": [
2404
+ 128000
2405
  ],
2406
+ "tokens": [
2407
+ "<|begin_of_text|>"
2408
+ ]
2409
+ },
2410
+ "<|eot_id|>": {
2411
+ "id": "<|eot_id|>",
2412
+ "ids": [
2413
+ 128009
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2414
  ],
2415
+ "tokens": [
2416
+ "<|eot_id|>"
2417
+ ]
 
 
 
 
 
 
 
 
2418
  }
2419
+ }
2420
  },
2421
  "decoder": {
2422
  "type": "ByteLevel",
tokenizer_config.json CHANGED
@@ -2048,6 +2048,14 @@
2048
  "rstrip": false,
2049
  "single_word": false,
2050
  "special": true
 
 
 
 
 
 
 
 
2051
  }
2052
  },
2053
  "bos_token": "<|begin_of_text|>",
@@ -2059,6 +2067,6 @@
2059
  "attention_mask"
2060
  ],
2061
  "model_max_length": 1000000000000000019884624838656,
2062
- "pad_token": "<|eot_id|>",
2063
  "tokenizer_class": "PreTrainedTokenizerFast"
2064
  }
 
2048
  "rstrip": false,
2049
  "single_word": false,
2050
  "special": true
2051
+ },
2052
+ "128256": {
2053
+ "content": "[PAD]",
2054
+ "lstrip": false,
2055
+ "normalized": false,
2056
+ "rstrip": false,
2057
+ "single_word": false,
2058
+ "special": true
2059
  }
2060
  },
2061
  "bos_token": "<|begin_of_text|>",
 
2067
  "attention_mask"
2068
  ],
2069
  "model_max_length": 1000000000000000019884624838656,
2070
+ "pad_token": "[PAD]",
2071
  "tokenizer_class": "PreTrainedTokenizerFast"
2072
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:633f123ed505c82416c6859e5f8240c19ecd7b4aba29a53f4bfcedf01efb295d
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d21fbcb234c9a8116e056525690421ae6a5e06638471ae2d1eb03cce41f7da2
3
  size 5432