|
--- |
|
language: |
|
- en |
|
license: apache-2.0 |
|
tags: |
|
- generated_from_trainer |
|
datasets: |
|
- kejian/codeparrot-train-more-filter-3.3b-cleaned |
|
model-index: |
|
- name: kejian/final-cond-10-0.01-again-2 |
|
results: [] |
|
--- |
|
|
|
<!-- This model card has been generated automatically according to the information the Trainer had access to. You |
|
should probably proofread and complete it, then remove this comment. --> |
|
|
|
# kejian/final-cond-10-0.01-again-2 |
|
|
|
This model was trained from scratch on the kejian/codeparrot-train-more-filter-3.3b-cleaned dataset. |
|
|
|
## Model description |
|
|
|
More information needed |
|
|
|
## Intended uses & limitations |
|
|
|
More information needed |
|
|
|
## Training and evaluation data |
|
|
|
More information needed |
|
|
|
## Training procedure |
|
|
|
### Training hyperparameters |
|
|
|
The following hyperparameters were used during training: |
|
- learning_rate: 0.0008 |
|
- train_batch_size: 64 |
|
- eval_batch_size: 32 |
|
- seed: 42 |
|
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 |
|
- lr_scheduler_type: linear |
|
- lr_scheduler_warmup_ratio: 0.01 |
|
- training_steps: 50354 |
|
- mixed_precision_training: Native AMP |
|
|
|
### Framework versions |
|
|
|
- Transformers 4.23.0 |
|
- Pytorch 1.13.0+cu116 |
|
- Datasets 2.0.0 |
|
- Tokenizers 0.12.1 |
|
|
|
|
|
# Full config |
|
{'dataset': {'conditional_training_config': {'aligned_prefix': '<|aligned|>', |
|
'drop_token_fraction': 0.01, |
|
'misaligned_prefix': '<|misaligned|>', |
|
'threshold': 0}, |
|
'datasets': ['kejian/codeparrot-train-more-filter-3.3b-cleaned'], |
|
'is_split_by_sentences': True}, |
|
'generation': {'batch_size': 64, |
|
'metrics_configs': [{}, {'n': 1}, {}], |
|
'scenario_configs': [{'display_as_html': True, |
|
'generate_kwargs': {'do_sample': True, |
|
'eos_token_id': 0, |
|
'max_length': 704, |
|
'min_length': 10, |
|
'temperature': 0.7, |
|
'top_k': 0, |
|
'top_p': 0.9}, |
|
'name': 'unconditional', |
|
'num_samples': 512, |
|
'prefix': '<|aligned|>', |
|
'use_prompt_for_scoring': False}, |
|
{'display_as_html': True, |
|
'generate_kwargs': {'do_sample': True, |
|
'eos_token_id': 0, |
|
'max_length': 272, |
|
'min_length': 10, |
|
'temperature': 0.7, |
|
'top_k': 0, |
|
'top_p': 0.9}, |
|
'name': 'functions', |
|
'num_samples': 512, |
|
'prefix': '<|aligned|>', |
|
'prompt_before_control': True, |
|
'prompts_path': 'resources/functions_csnet.jsonl', |
|
'use_prompt_for_scoring': True}], |
|
'scorer_config': {}}, |
|
'kl_gpt3_callback': {'gpt3_kwargs': {'model_name': 'code-cushman-001'}, |
|
'max_tokens': 64, |
|
'num_samples': 4096, |
|
'prefix': '<|aligned|>'}, |
|
'model': {'from_scratch': True, |
|
'gpt2_config_kwargs': {'reorder_and_upcast_attn': True, |
|
'scale_attn_by': True}, |
|
'num_additional_tokens': 2, |
|
'path_or_name': 'codeparrot/codeparrot-small'}, |
|
'objective': {'name': 'MLE'}, |
|
'tokenizer': {'path_or_name': 'codeparrot/codeparrot-small', |
|
'special_tokens': ['<|aligned|>', '<|misaligned|>']}, |
|
'training': {'dataloader_num_workers': 0, |
|
'effective_batch_size': 64, |
|
'evaluation_strategy': 'no', |
|
'fp16': True, |
|
'hub_model_id': 'kejian/final-cond-10-0.01-again-2', |
|
'hub_strategy': 'all_checkpoints', |
|
'learning_rate': 0.0008, |
|
'logging_first_step': True, |
|
'logging_steps': 1, |
|
'num_tokens': 3300000000.0, |
|
'output_dir': 'training_output', |
|
'per_device_train_batch_size': 16, |
|
'push_to_hub': True, |
|
'remove_unused_columns': False, |
|
'save_steps': 5000, |
|
'save_strategy': 'steps', |
|
'seed': 42, |
|
'warmup_ratio': 0.01, |
|
'weight_decay': 0.1}} |
|
|
|
# Wandb URL: |
|
https://wandb.ai/kejian/uncategorized/runs/3dcw48fw |