jaiwithani
/

testing

Inference Endpoints

Model card Files Files and versions Community

testing / iter_4 /config.json

jaiwithani's picture

Upload folder using huggingface_hub

57e92c4 verified 3 months ago

raw history blame contribute delete

No virus

1.53 kB

	{
	"model_config": {
	"model_type": "llama2",
	"mamba": null,
	"llama2": {
	"attention_bias": false,
	"attention_dropout": 0.0,
	"bos_token_id": -1,
	"eos_token_id": -2,
	"hidden_act": "silu",
	"hidden_size": 48,
	"initializer_range": 0.02,
	"intermediate_size": 48,
	"max_position_embeddings": 513,
	"num_attention_heads": 2,
	"num_hidden_layers": 2,
	"num_key_value_heads": 2,
	"pretraining_tp": 1,
	"rms_norm_eps": 1e-06,
	"rope_scaling": null,
	"rope_theta": 10000.0,
	"tie_word_embeddings": false,
	"use_cache": true,
	"vocab_size": 4096
	}
	},
	"run_name": "debug__2024_03_19_00_59_57",
	"output_dir": "/Users/jaidhyani/Library/Application Support/delphi/debug__2024_03_19_00_59_57",
	"huggingface": {
	"repo_id": "jaiwithani/testing",
	"push_checkpoints_to_hub": true
	},
	"device": "auto",
	"eval_interval": 1,
	"log_interval": 1,
	"eval_iters": 1,
	"eval_only": false,
	"always_save_checkpoint": true,
	"init_from": "scratch",
	"wandb_config": {
	"log": true,
	"project": "delphi",
	"entity": "jaiwithani"
	},
	"batch_size": 64,
	"max_seq_len": 512,
	"max_epochs": 1,
	"grad_clip": 1.0,
	"optimizer": {
	"gradient_accumulation_steps": 4,
	"learning_rate": 0.0005,
	"weight_decay": 0.1,
	"beta1": 0.9,
	"beta2": 0.95,
	"grad_clip": 1.0,
	"decay_lr": true,
	"warmup_iters": 1000,
	"min_lr": 0.0
	},
	"train_sample_limit": 1024,
	"val_sample_limit": -1
	}