AbrahamSanders commited on
Commit
577683f
1 Parent(s): d0aa4e4

First model version

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<|user|>": 50258, "<|pad|>": 50257, "<|system|>": 50259}
config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/DialoGPT-small",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "resid_pdrop": 0.1,
21
+ "scale_attn_weights": true,
22
+ "summary_activation": null,
23
+ "summary_first_dropout": 0.1,
24
+ "summary_proj_to_labels": true,
25
+ "summary_type": "cls_index",
26
+ "summary_use_proj": true,
27
+ "task_specific_params": {
28
+ "conversational": {
29
+ "max_length": 1000
30
+ }
31
+ },
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.11.3",
34
+ "use_cache": true,
35
+ "vocab_size": 50260
36
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac370124fe5154f34ff45b451d3f0d68168483f87e18f4ae9133f1ec3955e405
3
+ size 510410601
runs/version_0/events.out.tfevents.1641082396.ACSDEV-DEV6.9336.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8b93c9402aca233d4456a0cc9f4472f8fb5115075d079d904e877728e946c28
3
+ size 72725
runs/version_0/hparams.yaml ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerator: null
2
+ accumulate_grad_batches: null
3
+ adam_epsilon: 1.0e-08
4
+ amp_backend: native
5
+ amp_level: null
6
+ auto_lr_find: false
7
+ auto_scale_batch_size: false
8
+ auto_select_gpus: false
9
+ base_modelpath: microsoft/DialoGPT-small
10
+ batch_size: 4
11
+ benchmark: false
12
+ check_val_every_n_epoch: 1
13
+ checkpoint_callback: null
14
+ checkpoint_save_top_k: 5
15
+ checkpoint_save_weights_only: true
16
+ datasetpath: ..\data/dstc8-schema-guided-dialogue
17
+ default_root_dir: null
18
+ detect_anomaly: false
19
+ deterministic: false
20
+ devices: null
21
+ early_stopping_patience: 8
22
+ enable_checkpointing: true
23
+ enable_model_summary: true
24
+ enable_progress_bar: true
25
+ fast_dev_run: false
26
+ flush_logs_every_n_steps: null
27
+ gpus: 1
28
+ gradient_clip_algorithm: null
29
+ gradient_clip_val: 1.0
30
+ ipus: null
31
+ learning_rate: 5.0e-05
32
+ limit_predict_batches: 1.0
33
+ limit_test_batches: 1.0
34
+ limit_train_batches: 1.0
35
+ limit_val_batches: 1.0
36
+ log_every_n_steps: 50
37
+ log_gpu_memory: null
38
+ logger: true
39
+ max_epochs: 10
40
+ max_sequence_length: 512
41
+ max_steps: -1
42
+ max_time: null
43
+ min_epochs: null
44
+ min_steps: null
45
+ move_metrics_to_cpu: false
46
+ multiple_trainloader_mode: max_size_cycle
47
+ num_nodes: 1
48
+ num_processes: 1
49
+ num_sanity_val_steps: 2
50
+ overfit_batches: 0.0
51
+ overwrite_prepared_data: false
52
+ plugins: null
53
+ precision: 32
54
+ prepare_data_per_node: null
55
+ process_position: 0
56
+ profiler: null
57
+ progress_bar_refresh_rate: null
58
+ random_state: null
59
+ reload_dataloaders_every_epoch: false
60
+ reload_dataloaders_every_n_epochs: 0
61
+ replace_sampler_ddp: true
62
+ resume_from_checkpoint: null
63
+ stochastic_weight_avg: false
64
+ strategy: null
65
+ sync_batchnorm: false
66
+ terminate_on_nan: null
67
+ tpu_cores: null
68
+ track_grad_norm: -1
69
+ val_check_interval: 0.25
70
+ val_split: 0.1
71
+ warmup_steps: 0
72
+ weight_decay: 0.0
73
+ weights_save_path: null
74
+ weights_summary: top
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": "<|pad|>", "additional_special_tokens": ["<|user|>", "<|system|>"]}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "microsoft/DialoGPT-small", "errors": "replace", "tokenizer_class": "GPT2Tokenizer"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff