tugraph
/

CodeLlama-7b-GQL-hf

Text Generation

Inference Endpoints

text-generation-inference

Model card Files Files and versions Community

CodeLlama-7b-GQL-hf / args.json

fanzhidongyzby's picture

upload model

bb10c68 verified 3 months ago

raw history blame contribute delete

No virus

1.77 kB

	{
	"data_paths": "[/root/mycode/__cache__/__temp__/mft_inputs/source_dataset/12600004_latest.jsonl]",
	"output_dir": "/root/mycode/__cache__/__temp__/mft_outputs/huggingface",
	"tb_dir": "/home/admin/logs/tfevent",
	"pretrained_model_path": "/gruntdata/code_gpt_nas/user/166675/model/base_model/codellama-7b-hf",
	"model_type": "llama",
	"load_raw_dataset": "True",
	"task_weights": "[1.0]",
	"data_weights": "[1.]",
	"low_cpu_mem_usage": "True",
	"data_split": "95,5,0",
	"padding_mode": "padding",
	"tokenize_mode": "sft",
	"weighted_loss_mode": "case3",
	"peft_type": "qlora",
	"quantization": "4bit",
	"lora_rank": "64",
	"lora_alpha": "32",
	"lora_dropout": "0.05",
	"per_device_train_batch_size": "1",
	"per_device_eval_batch_size": "1",
	"tokenizer_type": "AutoTokenizer",
	"learning_rate": "5e-05",
	"min_lr": "5e-06",
	"weight_decay": "0.1",
	"gradient_accumulation_steps": "1",
	"lr_scheduler_type": "cosine",
	"num_warmup_steps": "100",
	"num_train_epochs": "2",
	"seed": "42",
	"seq_length": "2048",
	"resume_from_checkpoint": "None",
	"log_interval": "10",
	"checkpointing_steps": "None",
	"evalation_steps": "500",
	"max_train_steps": "None",
	"epoch_checkpointing": "False",
	"shuffle_before_split": "True",
	"use_random_sampler": "True",
	"early_stopping": "True",
	"early_stopping_stall_num": "3",
	"saving_limit": "None",
	"use_dynamic_padding": "True",
	"selfpaced_interval": "1",
	"selfpaced_history_length": "400",
	"selfpaced_sample_valid_num": "1",
	"selfpaced_scale_factor": "50",
	"use_xformers": "True",
	"trust_remote_code": "True",
	"weight_by_num_documents": "True",
	"make_vocab_size_divisible_by": "128",
	"model_parallel_size": "1",
	"use_slow_tokenizer": "True",
	"world_size": "4"
	}