pakawadeep
/

openthaigpt-1.0.0-beta-13b-lora-ctfl-th-gec-100st-augmented

Inference Endpoints

Model card Files Files and versions Metrics Training metrics Community

openthaigpt-1.0.0-beta-13b-lora-ctfl-th-gec-100st-augmented / training_config.json

pakawadeep's picture

Training in progress, step 25

39d8501 verified 6 months ago

2.7 kB

	{
	"experiment_key": "base",
	"save_safetensors": true,
	"max_shard_size": "10GB",
	"local_rank": 0,
	"use_gradient_checkpointing": true,
	"trainer_key": "lm",
	"force_fp32": false,
	"force_fp16": false,
	"from_gptq": false,
	"huggingface_hub_token": null,
	"single_gpu": true,
	"master_port": 9994,
	"deepspeed_stage": "stage_2",
	"deepspeed_config_path": null,
	"fsdp_strategy": "",
	"fsdp_offload": true,
	"seed": 42,
	"stabilize": false,
	"norm_fp32": false,
	"path_to_env_file": "./.env",
	"prepare_dataset": true,
	"lora_hub_model_id": null,
	"lora_model_local_path": null,
	"fused_model_local_path": null,
	"fuse_after_training": false,
	"quantization_dataset_id": null,
	"quantization_max_samples": 1024,
	"quantized_model_path": "./quantized_model/",
	"quantized_hub_model_id": null,
	"quantized_hub_private_repo": true,
	"dataset_key": "soda",
	"train_local_path_to_data": "./train.jsonl",
	"eval_local_path_to_data": null,
	"shuffle": true,
	"max_eval_samples": 1000,
	"add_eval_to_train_if_no_path": false,
	"tokenizer_name_or_path": null,
	"tokenizer_use_fast": null,
	"tokenizer_padding_side": null,
	"collator_key": "lm",
	"max_length": 2048,
	"model_name_or_path": "openthaigpt/openthaigpt-1.0.0-beta-13b-chat-hf",
	"push_to_hub_bos_add_bos_token": false,
	"use_flash_attention_2": false,
	"trust_remote_code": false,
	"device_map": null,
	"prepare_model_for_kbit_training": true,
	"offload_folder": null,
	"load_in_8bit": false,
	"load_in_4bit": true,
	"llm_int8_threshold": 6.0,
	"llm_int8_has_fp16_weight": true,
	"bnb_4bit_use_double_quant": true,
	"bnb_4bit_quant_type": "nf4",
	"bnb_quantize_after_model_init": false,
	"gptq_bits": 4,
	"gptq_group_size": 128,
	"gptq_disable_exllama": true,
	"apply_lora": true,
	"lora_rank": 8,
	"lora_alpha": 32,
	"lora_dropout": 0.1,
	"raw_lora_target_modules": "all",
	"output_dir": "./outputs/",
	"per_device_train_batch_size": 2,
	"do_eval": false,
	"per_device_eval_batch_size": null,
	"gradient_accumulation_steps": 2,
	"eval_accumulation_steps": null,
	"eval_delay": 0,
	"eval_steps": 1000,
	"warmup_steps": 10,
	"max_steps": 101,
	"num_train_epochs": 1,
	"learning_rate": 0.0002,
	"max_grad_norm": 1.0,
	"weight_decay": 0.001,
	"label_smoothing_factor": 0.0,
	"logging_steps": 1,
	"save_steps": 25,
	"save_total_limit": 1,
	"optim": "paged_adamw_8bit",
	"push_to_hub": true,
	"hub_model_id": "pakawadeep/openthaigpt-1.0.0-beta-13b-lora-ctfl-th-gec-100st-augmented",
	"hub_private_repo": false,
	"neftune_noise_alpha": null,
	"project_name": null,
	"report_to_wandb": false,
	"wandb_api_key": null,
	"wandb_project": null,
	"wandb_entity": null
	}