|
{ |
|
"command": "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py -f /root/.local/share/jupyter/runtime/kernel-3aca3dc1-4d01-4666-980c-0422f6452525.json", |
|
"compute": { |
|
"arch_list": [ |
|
"sm_50", |
|
"sm_60", |
|
"sm_70", |
|
"sm_75", |
|
"sm_80", |
|
"sm_86", |
|
"sm_90" |
|
], |
|
"devices": { |
|
"0": { |
|
"device_capability": [ |
|
8, |
|
0 |
|
], |
|
"device_properties": "_CudaDeviceProperties(name='NVIDIA A100-SXM4-40GB', major=8, minor=0, total_memory=40513MB, multi_processor_count=108)", |
|
"gpu_type": "NVIDIA A100-SXM4-40GB" |
|
} |
|
}, |
|
"gencode_flags": "-gencode compute=compute_50,code=sm_50 -gencode compute=compute_60,code=sm_60 -gencode compute=compute_70,code=sm_70 -gencode compute=compute_75,code=sm_75 -gencode compute=compute_80,code=sm_80 -gencode compute=compute_86,code=sm_86 -gencode compute=compute_90,code=sm_90", |
|
"gpus_per_node": 1, |
|
"num_nodes": 1 |
|
}, |
|
"config": { |
|
"adapter": { |
|
"alpha": 16, |
|
"bias_type": "none", |
|
"dropout": 0.05, |
|
"postprocessor": { |
|
"merge_adapter_into_base_model": false, |
|
"progressbar": false |
|
}, |
|
"pretrained_adapter_weights": null, |
|
"r": 8, |
|
"target_modules": null, |
|
"type": "lora", |
|
"use_dora": false, |
|
"use_rslora": false |
|
}, |
|
"backend": null, |
|
"base_model": "mistralai/Mistral-7B-Instruct-v0.2", |
|
"defaults": { |
|
"text": { |
|
"decoder": { |
|
"fc_activation": "relu", |
|
"fc_bias_initializer": "zeros", |
|
"fc_dropout": 0.0, |
|
"fc_layers": null, |
|
"fc_norm": null, |
|
"fc_norm_params": null, |
|
"fc_output_size": 256, |
|
"fc_use_bias": true, |
|
"fc_weights_initializer": "xavier_uniform", |
|
"input_size": null, |
|
"max_new_tokens": null, |
|
"num_fc_layers": 0, |
|
"pretrained_model_name_or_path": "", |
|
"tokenizer": "hf_tokenizer", |
|
"type": "text_extractor", |
|
"vocab_file": "" |
|
}, |
|
"encoder": { |
|
"skip": false, |
|
"type": "passthrough" |
|
}, |
|
"loss": { |
|
"class_similarities": null, |
|
"class_similarities_temperature": 0, |
|
"class_weights": null, |
|
"confidence_penalty": 0, |
|
"robust_lambda": 0, |
|
"type": "next_token_softmax_cross_entropy", |
|
"unique": false, |
|
"weight": 1.0 |
|
}, |
|
"preprocessing": { |
|
"cache_encoder_embeddings": false, |
|
"compute_idf": false, |
|
"computed_fill_value": "<UNK>", |
|
"fill_value": "<UNK>", |
|
"lowercase": false, |
|
"max_sequence_length": 256, |
|
"missing_value_strategy": "fill_with_const", |
|
"most_common": 20000, |
|
"ngram_size": 2, |
|
"padding": "right", |
|
"padding_symbol": "<PAD>", |
|
"pretrained_model_name_or_path": null, |
|
"prompt": { |
|
"retrieval": { |
|
"index_name": null, |
|
"k": 0, |
|
"model_name": null, |
|
"type": null |
|
}, |
|
"task": null, |
|
"template": null |
|
}, |
|
"sequence_length": null, |
|
"tokenizer": "space_punct", |
|
"unknown_symbol": "<UNK>", |
|
"vocab_file": null |
|
} |
|
} |
|
}, |
|
"generation": { |
|
"bad_words_ids": null, |
|
"begin_suppress_tokens": null, |
|
"bos_token_id": null, |
|
"diversity_penalty": 0.0, |
|
"do_sample": true, |
|
"early_stopping": false, |
|
"encoder_repetition_penalty": 1.0, |
|
"eos_token_id": null, |
|
"epsilon_cutoff": 0.0, |
|
"eta_cutoff": 0.0, |
|
"exponential_decay_length_penalty": null, |
|
"force_words_ids": null, |
|
"forced_bos_token_id": null, |
|
"forced_decoder_ids": null, |
|
"forced_eos_token_id": null, |
|
"guidance_scale": null, |
|
"length_penalty": 1.0, |
|
"max_length": 32, |
|
"max_new_tokens": 512, |
|
"max_time": null, |
|
"min_length": 0, |
|
"min_new_tokens": null, |
|
"no_repeat_ngram_size": 0, |
|
"num_beam_groups": 1, |
|
"num_beams": 1, |
|
"pad_token_id": null, |
|
"penalty_alpha": null, |
|
"prompt_lookup_num_tokens": null, |
|
"remove_invalid_values": false, |
|
"renormalize_logits": false, |
|
"repetition_penalty": 1.0, |
|
"sequence_bias": null, |
|
"suppress_tokens": null, |
|
"temperature": 0.1, |
|
"top_k": 50, |
|
"top_p": 1.0, |
|
"typical_p": 1.0, |
|
"use_cache": true |
|
}, |
|
"hyperopt": null, |
|
"input_features": [ |
|
{ |
|
"active": true, |
|
"column": "question", |
|
"encoder": { |
|
"skip": false, |
|
"type": "passthrough" |
|
}, |
|
"name": "question", |
|
"preprocessing": { |
|
"cache_encoder_embeddings": false, |
|
"compute_idf": false, |
|
"computed_fill_value": "<UNK>", |
|
"fill_value": "<UNK>", |
|
"lowercase": false, |
|
"max_sequence_length": null, |
|
"missing_value_strategy": "fill_with_const", |
|
"most_common": 20000, |
|
"ngram_size": 2, |
|
"padding": "left", |
|
"padding_symbol": "<PAD>", |
|
"pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2", |
|
"sequence_length": null, |
|
"tokenizer": "hf_tokenizer", |
|
"unknown_symbol": "<UNK>", |
|
"vocab_file": null |
|
}, |
|
"proc_column": "question_Nlu_HO", |
|
"tied": null, |
|
"type": "text" |
|
} |
|
], |
|
"ludwig_version": "0.10.2", |
|
"model_parameters": null, |
|
"model_type": "llm", |
|
"output_features": [ |
|
{ |
|
"active": true, |
|
"class_similarities": null, |
|
"column": "record_id", |
|
"decoder": { |
|
"fc_activation": "relu", |
|
"fc_bias_initializer": "zeros", |
|
"fc_dropout": 0.0, |
|
"fc_layers": null, |
|
"fc_norm": null, |
|
"fc_norm_params": null, |
|
"fc_output_size": 256, |
|
"fc_use_bias": true, |
|
"fc_weights_initializer": "xavier_uniform", |
|
"input_size": null, |
|
"max_new_tokens": 512, |
|
"num_fc_layers": 0, |
|
"pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2", |
|
"tokenizer": "hf_tokenizer", |
|
"type": "text_extractor", |
|
"vocab_file": "" |
|
}, |
|
"default_validation_metric": "loss", |
|
"dependencies": [], |
|
"input_size": null, |
|
"loss": { |
|
"class_similarities": null, |
|
"class_similarities_temperature": 0, |
|
"class_weights": null, |
|
"confidence_penalty": 0, |
|
"robust_lambda": 0, |
|
"type": "next_token_softmax_cross_entropy", |
|
"unique": false, |
|
"weight": 1.0 |
|
}, |
|
"name": "record_id", |
|
"num_classes": null, |
|
"preprocessing": { |
|
"cache_encoder_embeddings": false, |
|
"compute_idf": false, |
|
"computed_fill_value": "<UNK>", |
|
"fill_value": "<UNK>", |
|
"lowercase": false, |
|
"max_sequence_length": null, |
|
"missing_value_strategy": "drop_row", |
|
"most_common": 20000, |
|
"ngram_size": 2, |
|
"padding": "left", |
|
"padding_symbol": "<PAD>", |
|
"pretrained_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2", |
|
"sequence_length": null, |
|
"tokenizer": "hf_tokenizer", |
|
"unknown_symbol": "<UNK>", |
|
"vocab_file": null |
|
}, |
|
"proc_column": "record_id_D_Znvc", |
|
"reduce_dependencies": "sum", |
|
"reduce_input": "sum", |
|
"type": "text" |
|
} |
|
], |
|
"preprocessing": { |
|
"global_max_sequence_length": 512, |
|
"oversample_minority": null, |
|
"sample_ratio": 1.0, |
|
"sample_size": null, |
|
"split": { |
|
"probabilities": [ |
|
1.0, |
|
0.0, |
|
0.0 |
|
], |
|
"type": "random" |
|
}, |
|
"undersample_majority": null |
|
}, |
|
"prompt": { |
|
"retrieval": { |
|
"index_name": null, |
|
"k": 0, |
|
"model_name": null, |
|
"type": null |
|
}, |
|
"task": null, |
|
"template": "Below is an instruction that describes a task, paired with an input that provides further context with language code and country code. Write a answer that appropriately answers the question with respect to the country code and language code.if the input provided consists of combination of two questions, then provide two record id or else only provide one record id only.\n### country_code: {country_code}\n### language_code: {language_code}\n### Instruction: {question}\n### answer:" |
|
}, |
|
"quantization": { |
|
"bits": 4, |
|
"bnb_4bit_compute_dtype": "float16", |
|
"bnb_4bit_quant_type": "nf4", |
|
"bnb_4bit_use_double_quant": true, |
|
"llm_int8_has_fp16_weight": false, |
|
"llm_int8_threshold": 6.0 |
|
}, |
|
"trainer": { |
|
"base_learning_rate": 0.0, |
|
"batch_size": 1, |
|
"bucketing_field": null, |
|
"checkpoints_per_epoch": 0, |
|
"compile": false, |
|
"early_stop": 5, |
|
"effective_batch_size": "auto", |
|
"enable_gradient_checkpointing": false, |
|
"enable_profiling": false, |
|
"epochs": 25, |
|
"eval_batch_size": 2, |
|
"eval_steps": null, |
|
"evaluate_training_set": false, |
|
"gradient_accumulation_steps": 16, |
|
"gradient_clipping": { |
|
"clipglobalnorm": 0.5, |
|
"clipnorm": null, |
|
"clipvalue": null |
|
}, |
|
"increase_batch_size_eval_metric": "loss", |
|
"increase_batch_size_eval_split": "training", |
|
"increase_batch_size_on_plateau": 0, |
|
"increase_batch_size_on_plateau_patience": 5, |
|
"increase_batch_size_on_plateau_rate": 2.0, |
|
"learning_rate": 0.0004, |
|
"learning_rate_scaling": "linear", |
|
"learning_rate_scheduler": { |
|
"decay": "cosine", |
|
"decay_rate": 0.96, |
|
"decay_steps": 10000, |
|
"eta_min": 0, |
|
"reduce_eval_metric": "loss", |
|
"reduce_eval_split": "training", |
|
"reduce_on_plateau": 0, |
|
"reduce_on_plateau_patience": 10, |
|
"reduce_on_plateau_rate": 0.1, |
|
"staircase": false, |
|
"t_0": null, |
|
"t_mult": 1, |
|
"warmup_evaluations": 0, |
|
"warmup_fraction": 0.03 |
|
}, |
|
"max_batch_size": 1099511627776, |
|
"optimizer": { |
|
"amsgrad": false, |
|
"betas": [ |
|
0.9, |
|
0.999 |
|
], |
|
"block_wise": true, |
|
"eps": 1e-08, |
|
"percentile_clipping": 100, |
|
"type": "paged_adam", |
|
"weight_decay": 0.0 |
|
}, |
|
"profiler": { |
|
"active": 3, |
|
"repeat": 5, |
|
"skip_first": 0, |
|
"wait": 1, |
|
"warmup": 1 |
|
}, |
|
"regularization_lambda": 0.0, |
|
"regularization_type": "l2", |
|
"should_shuffle": true, |
|
"skip_all_evaluation": false, |
|
"steps_per_checkpoint": 0, |
|
"train_steps": null, |
|
"type": "finetune", |
|
"use_mixed_precision": false, |
|
"validation_field": "record_id", |
|
"validation_metric": "loss" |
|
} |
|
}, |
|
"data_format": "<class 'pandas.core.frame.DataFrame'>", |
|
"ludwig_version": "0.10.2", |
|
"random_seed": 42, |
|
"torch_version": "2.2.1+cu121" |
|
} |