mistral_two_record_id / description.json

Upload 11 files

7c170bc verified 10 months ago

13.8 kB

	{
	"command": "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py -f /root/.local/share/jupyter/runtime/kernel-3aca3dc1-4d01-4666-980c-0422f6452525.json",
	"compute": {
	"arch_list": [
	"sm_50",
	"sm_60",
	"sm_70",
	"sm_75",
	"sm_80",
	"sm_86",
	"sm_90"
	],
	"devices": {
	"0": {
	"device_capability": [
	8,
	0
	],
	"device_properties": "_CudaDeviceProperties(name='NVIDIA A100-SXM4-40GB', major=8, minor=0, total_memory=40513MB, multi_processor_count=108)",
	"gpu_type": "NVIDIA A100-SXM4-40GB"
	}
	},
	"gencode_flags": "-gencode compute=compute_50,code=sm_50 -gencode compute=compute_60,code=sm_60 -gencode compute=compute_70,code=sm_70 -gencode compute=compute_75,code=sm_75 -gencode compute=compute_80,code=sm_80 -gencode compute=compute_86,code=sm_86 -gencode compute=compute_90,code=sm_90",
	"gpus_per_node": 1,
	"num_nodes": 1
	},
	"config": {
	"adapter": {
	"alpha": 16,
	"bias_type": "none",
	"dropout": 0.05,
	"postprocessor": {
	"merge_adapter_into_base_model": false,
	"progressbar": false
	},
	"pretrained_adapter_weights": null,
	"r": 8,
	"target_modules": null,
	"type": "lora",
	"use_dora": false,
	"use_rslora": false
	},
	"backend": null,
	"base_model": "mistralai/Mistral-7B-Instruct-v0.2",
	"defaults": {
	"text": {
	"decoder": {
	"fc_activation": "relu",
	"fc_bias_initializer": "zeros",
	"fc_dropout": 0.0,
	"fc_layers": null,
	"fc_norm": null,
	"fc_norm_params": null,
	"fc_output_size": 256,
	"fc_use_bias": true,
	"fc_weights_initializer": "xavier_uniform",
	"input_size": null,
	"max_new_tokens": null,
	"num_fc_layers": 0,
	"pretrained_model_name_or_path": "",
	"tokenizer": "hf_tokenizer",
	"type": "text_extractor",
	"vocab_file": ""
	},
	"encoder": {
	"skip": false,
	"type": "passthrough"
	},
	"loss": {
	"class_similarities": null,
	"class_similarities_temperature": 0,
	"class_weights": null,
	"confidence_penalty": 0,
	"robust_lambda": 0,
	"type": "next_token_softmax_cross_entropy",
	"unique": false,
	"weight": 1.0
	},
	"preprocessing": {
	"cache_encoder_embeddings": false,
	"compute_idf": false,
	"computed_fill_value": "<UNK>",
	"fill_value": "<UNK>",
	"lowercase": false,
	"max_sequence_length": 256,
	"missing_value_strategy": "fill_with_const",
	"most_common": 20000,
	"ngram_size": 2,
	"padding": "right",
	"padding_symbol": "<PAD>",
	"pretrained_model_name_or_path": null,
	"prompt": {
	"retrieval": {
	"index_name": null,
	"k": 0,
	"model_name": null,
	"type": null
	},
	"task": null,
	"template": null
	},
	"sequence_length": null,
	"tokenizer": "space_punct",
	"unknown_symbol": "<UNK>",
	"vocab_file": null
	}
	}
	},
	"generation": {
	"bad_words_ids": null,
	"begin_suppress_tokens": null,
	"bos_token_id": null,
	"diversity_penalty": 0.0,
	"do_sample": true,
	"early_stopping": false,
	"encoder_repetition_penalty": 1.0,
	"eos_token_id": null,
	"epsilon_cutoff": 0.0,
	"eta_cutoff": 0.0,
	"exponential_decay_length_penalty": null,
	"force_words_ids": null,
	"forced_bos_token_id": null,
	"forced_decoder_ids": null,
	"forced_eos_token_id": null,
	"guidance_scale": null,
	"length_penalty": 1.0,
	"max_length": 32,
	"max_new_tokens": 512,
	"max_time": null,
	"min_length": 0,
	"min_new_tokens": null,
	"no_repeat_ngram_size": 0,
	"num_beam_groups": 1,
	"num_beams": 1,
	"pad_token_id": null,
	"penalty_alpha": null,
	"prompt_lookup_num_tokens": null,
	"remove_invalid_values": false,
	"renormalize_logits": false,
	"repetition_penalty": 1.0,
	"sequence_bias": null,
	"suppress_tokens": null,
	"temperature": 0.1,
	"top_k": 50,
	"top_p": 1.0,
	"typical_p": 1.0,
	"use_cache": true
	},
	"hyperopt": null,
	"input_features": [
	{
	"active": true,
	"column": "question",
	"encoder": {
	"skip": false,
	"type": "passthrough"
	},
	"name": "question",
	"preprocessing": {
	"cache_encoder_embeddings": false,
	"compute_idf": false,
	"computed_fill_value": "<UNK>",
	"fill_value": "<UNK>",
	"lowercase": false,
	"max_sequence_length": null,
	"missing_value_strategy": "fill_with_const",
	"most_common": 20000,
	"ngram_size": 2,
	"padding": "left",
	"padding_symbol": "<PAD>",
	"pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
	"sequence_length": null,
	"tokenizer": "hf_tokenizer",
	"unknown_symbol": "<UNK>",
	"vocab_file": null
	},
	"proc_column": "question_Nlu_HO",
	"tied": null,
	"type": "text"
	}
	],
	"ludwig_version": "0.10.2",
	"model_parameters": null,
	"model_type": "llm",
	"output_features": [
	{
	"active": true,
	"class_similarities": null,
	"column": "record_id",
	"decoder": {
	"fc_activation": "relu",
	"fc_bias_initializer": "zeros",
	"fc_dropout": 0.0,
	"fc_layers": null,
	"fc_norm": null,
	"fc_norm_params": null,
	"fc_output_size": 256,
	"fc_use_bias": true,
	"fc_weights_initializer": "xavier_uniform",
	"input_size": null,
	"max_new_tokens": 512,
	"num_fc_layers": 0,
	"pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
	"tokenizer": "hf_tokenizer",
	"type": "text_extractor",
	"vocab_file": ""
	},
	"default_validation_metric": "loss",
	"dependencies": [],
	"input_size": null,
	"loss": {
	"class_similarities": null,
	"class_similarities_temperature": 0,
	"class_weights": null,
	"confidence_penalty": 0,
	"robust_lambda": 0,
	"type": "next_token_softmax_cross_entropy",
	"unique": false,
	"weight": 1.0
	},
	"name": "record_id",
	"num_classes": null,
	"preprocessing": {
	"cache_encoder_embeddings": false,
	"compute_idf": false,
	"computed_fill_value": "<UNK>",
	"fill_value": "<UNK>",
	"lowercase": false,
	"max_sequence_length": null,
	"missing_value_strategy": "drop_row",
	"most_common": 20000,
	"ngram_size": 2,
	"padding": "left",
	"padding_symbol": "<PAD>",
	"pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
	"sequence_length": null,
	"tokenizer": "hf_tokenizer",
	"unknown_symbol": "<UNK>",
	"vocab_file": null
	},
	"proc_column": "record_id_D_Znvc",
	"reduce_dependencies": "sum",
	"reduce_input": "sum",
	"type": "text"
	}
	],
	"preprocessing": {
	"global_max_sequence_length": 512,
	"oversample_minority": null,
	"sample_ratio": 1.0,
	"sample_size": null,
	"split": {
	"probabilities": [
	1.0,
	0.0,
	0.0
	],
	"type": "random"
	},
	"undersample_majority": null
	},
	"prompt": {
	"retrieval": {
	"index_name": null,
	"k": 0,
	"model_name": null,
	"type": null
	},
	"task": null,
	"template": "Below is an instruction that describes a task, paired with an input that provides further context with language code and country code. Write a answer that appropriately answers the question with respect to the country code and language code.if the input provided consists of combination of two questions, then provide two record id or else only provide one record id only.\n### country_code: {country_code}\n### language_code: {language_code}\n### Instruction: {question}\n### answer:"
	},
	"quantization": {
	"bits": 4,
	"bnb_4bit_compute_dtype": "float16",
	"bnb_4bit_quant_type": "nf4",
	"bnb_4bit_use_double_quant": true,
	"llm_int8_has_fp16_weight": false,
	"llm_int8_threshold": 6.0
	},
	"trainer": {
	"base_learning_rate": 0.0,
	"batch_size": 1,
	"bucketing_field": null,
	"checkpoints_per_epoch": 0,
	"compile": false,
	"early_stop": 5,
	"effective_batch_size": "auto",
	"enable_gradient_checkpointing": false,
	"enable_profiling": false,
	"epochs": 25,
	"eval_batch_size": 2,
	"eval_steps": null,
	"evaluate_training_set": false,
	"gradient_accumulation_steps": 16,
	"gradient_clipping": {
	"clipglobalnorm": 0.5,
	"clipnorm": null,
	"clipvalue": null
	},
	"increase_batch_size_eval_metric": "loss",
	"increase_batch_size_eval_split": "training",
	"increase_batch_size_on_plateau": 0,
	"increase_batch_size_on_plateau_patience": 5,
	"increase_batch_size_on_plateau_rate": 2.0,
	"learning_rate": 0.0004,
	"learning_rate_scaling": "linear",
	"learning_rate_scheduler": {
	"decay": "cosine",
	"decay_rate": 0.96,
	"decay_steps": 10000,
	"eta_min": 0,
	"reduce_eval_metric": "loss",
	"reduce_eval_split": "training",
	"reduce_on_plateau": 0,
	"reduce_on_plateau_patience": 10,
	"reduce_on_plateau_rate": 0.1,
	"staircase": false,
	"t_0": null,
	"t_mult": 1,
	"warmup_evaluations": 0,
	"warmup_fraction": 0.03
	},
	"max_batch_size": 1099511627776,
	"optimizer": {
	"amsgrad": false,
	"betas": [
	0.9,
	0.999
	],
	"block_wise": true,
	"eps": 1e-08,
	"percentile_clipping": 100,
	"type": "paged_adam",
	"weight_decay": 0.0
	},
	"profiler": {
	"active": 3,
	"repeat": 5,
	"skip_first": 0,
	"wait": 1,
	"warmup": 1
	},
	"regularization_lambda": 0.0,
	"regularization_type": "l2",
	"should_shuffle": true,
	"skip_all_evaluation": false,
	"steps_per_checkpoint": 0,
	"train_steps": null,
	"type": "finetune",
	"use_mixed_precision": false,
	"validation_field": "record_id",
	"validation_metric": "loss"
	}
	},
	"data_format": "<class 'pandas.core.frame.DataFrame'>",
	"ludwig_version": "0.10.2",
	"random_seed": 42,
	"torch_version": "2.2.1+cu121"
	}