kejian commited on
Commit
ad63148
1 Parent(s): fb43bcd

update model card README.md

Browse files
Files changed (1) hide show
  1. README.md +139 -0
README.md ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - generated_from_trainer
7
+ datasets:
8
+ - kejian/codeparrot-train-more-filter-3.3b-cleaned
9
+ model-index:
10
+ - name: devel-conditional
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # devel-conditional
18
+
19
+ This model was trained from scratch on the kejian/codeparrot-train-more-filter-3.3b-cleaned dataset.
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 0.0005
39
+ - train_batch_size: 64
40
+ - eval_batch_size: 32
41
+ - seed: 42
42
+ - gradient_accumulation_steps: 2
43
+ - total_train_batch_size: 128
44
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
+ - lr_scheduler_type: linear
46
+ - lr_scheduler_warmup_ratio: 0.01
47
+ - training_steps: 12588
48
+ - mixed_precision_training: Native AMP
49
+
50
+ ### Framework versions
51
+
52
+ - Transformers 4.23.0
53
+ - Pytorch 1.13.0+cu116
54
+ - Datasets 2.0.0
55
+ - Tokenizers 0.12.1
56
+
57
+
58
+ # Full config
59
+ {'dataset': {'conditional_training_config': {'aligned_prefix': '<|aligned|>',
60
+ 'drop_token_fraction': 0.1,
61
+ 'misaligned_prefix': '<|misaligned|>',
62
+ 'threshold': 0},
63
+ 'datasets': ['kejian/codeparrot-train-more-filter-3.3b-cleaned'],
64
+ 'is_split_by_sentences': True,
65
+ 'skip_tokens': 1649999872},
66
+ 'generation': {'batch_size': 128,
67
+ 'every_n_steps': 512,
68
+ 'force_call_on': [12588],
69
+ 'metrics_configs': [{}, {'n': 1}, {}],
70
+ 'scenario_configs': [{'display_as_html': True,
71
+ 'generate_kwargs': {'bad_words_ids': [[32769]],
72
+ 'do_sample': True,
73
+ 'eos_token_id': 0,
74
+ 'max_length': 512,
75
+ 'min_length': 10,
76
+ 'temperature': 0.7,
77
+ 'top_k': 0,
78
+ 'top_p': 0.9},
79
+ 'name': 'unconditional',
80
+ 'num_hits_threshold': 0,
81
+ 'num_samples': 2048,
82
+ 'prefix': '<|aligned|>',
83
+ 'use_prompt_for_scoring': False},
84
+ {'display_as_html': True,
85
+ 'generate_kwargs': {'bad_words_ids': [[32769]],
86
+ 'do_sample': True,
87
+ 'eos_token_id': 0,
88
+ 'max_length': 272,
89
+ 'min_length': 10,
90
+ 'temperature': 0.7,
91
+ 'top_k': 0,
92
+ 'top_p': 0.9},
93
+ 'name': 'functions',
94
+ 'num_hits_threshold': 0,
95
+ 'num_samples': 2048,
96
+ 'prefix': '<|aligned|>',
97
+ 'prompt_before_control': True,
98
+ 'prompts_path': 'resources/functions_csnet.jsonl',
99
+ 'use_prompt_for_scoring': True}],
100
+ 'scorer_config': {}},
101
+ 'kl_gpt3_callback': {'every_n_steps': 512,
102
+ 'force_call_on': [12588],
103
+ 'gpt3_kwargs': {'model_name': 'code-cushman-001'},
104
+ 'max_tokens': 64,
105
+ 'num_samples': 4096,
106
+ 'prefix': '<|aligned|>',
107
+ 'should_insert_prefix': True},
108
+ 'model': {'from_scratch': False,
109
+ 'gpt2_config_kwargs': {'reorder_and_upcast_attn': True,
110
+ 'scale_attn_by': True},
111
+ 'model_kwargs': {'revision': '43abb4f65a8ab4a02a5d367784c09cb86fe4ae26'},
112
+ 'num_additional_tokens': 2,
113
+ 'path_or_name': 'kejian/mighty-conditional'},
114
+ 'objective': {'name': 'MLE'},
115
+ 'tokenizer': {'path_or_name': 'kejian/mighty-conditional',
116
+ 'special_tokens': ['<|aligned|>', '<|misaligned|>']},
117
+ 'training': {'dataloader_num_workers': 0,
118
+ 'effective_batch_size': 128,
119
+ 'evaluation_strategy': 'no',
120
+ 'fp16': True,
121
+ 'hub_model_id': 'devel-conditional',
122
+ 'hub_strategy': 'all_checkpoints',
123
+ 'learning_rate': 0.0005,
124
+ 'logging_first_step': True,
125
+ 'logging_steps': 1,
126
+ 'num_tokens': 3300000000.0,
127
+ 'output_dir': 'training_output',
128
+ 'per_device_train_batch_size': 16,
129
+ 'push_to_hub': True,
130
+ 'remove_unused_columns': False,
131
+ 'save_steps': 12588,
132
+ 'save_strategy': 'steps',
133
+ 'seed': 42,
134
+ 'tokens_already_seen': 1649999872,
135
+ 'warmup_ratio': 0.01,
136
+ 'weight_decay': 0.1}}
137
+
138
+ # Wandb URL:
139
+ https://wandb.ai/kejian/uncategorized/runs/2oq6tutn