Training in progress, step 100
Browse files- adapter_config.json +34 -0
- adapter_model.safetensors +3 -0
- added_tokens.json +6 -0
- merges.txt +0 -0
- runs/May22_05-46-00_92dcc4555414/events.out.tfevents.1716356830.92dcc4555414.24.0 +3 -0
- special_tokens_map.json +30 -0
- tokenizer.json +0 -0
- tokenizer_config.json +53 -0
- training_args.bin +3 -0
- vocab.json +0 -0
- wandb/debug-internal.log +0 -0
- wandb/debug.log +30 -0
- wandb/run-20240522_054348-vgrzs6jq/files/conda-environment.yaml +0 -0
- wandb/run-20240522_054348-vgrzs6jq/files/config.yaml +753 -0
- wandb/run-20240522_054348-vgrzs6jq/files/output.log +624 -0
- wandb/run-20240522_054348-vgrzs6jq/files/requirements.txt +867 -0
- wandb/run-20240522_054348-vgrzs6jq/files/wandb-metadata.json +69 -0
- wandb/run-20240522_054348-vgrzs6jq/files/wandb-summary.json +1 -0
- wandb/run-20240522_054348-vgrzs6jq/logs/debug-internal.log +0 -0
- wandb/run-20240522_054348-vgrzs6jq/logs/debug.log +30 -0
- wandb/run-20240522_054348-vgrzs6jq/run-vgrzs6jq.wandb +0 -0
adapter_config.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layer_replication": null,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 128,
|
14 |
+
"lora_dropout": 0.05,
|
15 |
+
"megatron_config": null,
|
16 |
+
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": null,
|
18 |
+
"peft_type": "LORA",
|
19 |
+
"r": 128,
|
20 |
+
"rank_pattern": {},
|
21 |
+
"revision": null,
|
22 |
+
"target_modules": [
|
23 |
+
"up_proj",
|
24 |
+
"v_proj",
|
25 |
+
"o_proj",
|
26 |
+
"down_proj",
|
27 |
+
"q_proj",
|
28 |
+
"k_proj",
|
29 |
+
"gate_proj"
|
30 |
+
],
|
31 |
+
"task_type": "CAUSAL_LM",
|
32 |
+
"use_dora": false,
|
33 |
+
"use_rslora": false
|
34 |
+
}
|
adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4bf876e785763ef5ddcd813701c0f1fa8169366ac31046d716c7357c64672cef
|
3 |
+
size 1279323952
|
added_tokens.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"</s>": 23596,
|
3 |
+
"<pad>": 23598,
|
4 |
+
"<s>": 23595,
|
5 |
+
"<unk>": 23597
|
6 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
runs/May22_05-46-00_92dcc4555414/events.out.tfevents.1716356830.92dcc4555414.24.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:337ffc5a348d28a205517078012f86cb26e941d456bdc7fd516aadd77ef5a858
|
3 |
+
size 12681
|
special_tokens_map.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "<pad>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"unk_token": {
|
24 |
+
"content": "<unk>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
}
|
30 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"0": {
|
5 |
+
"content": "<|endoftext|>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": true,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
},
|
12 |
+
"23595": {
|
13 |
+
"content": "<s>",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": false,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false,
|
18 |
+
"special": true
|
19 |
+
},
|
20 |
+
"23596": {
|
21 |
+
"content": "</s>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": false,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false,
|
26 |
+
"special": true
|
27 |
+
},
|
28 |
+
"23597": {
|
29 |
+
"content": "<unk>",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": false,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false,
|
34 |
+
"special": true
|
35 |
+
},
|
36 |
+
"23598": {
|
37 |
+
"content": "<pad>",
|
38 |
+
"lstrip": false,
|
39 |
+
"normalized": false,
|
40 |
+
"rstrip": false,
|
41 |
+
"single_word": false,
|
42 |
+
"special": true
|
43 |
+
}
|
44 |
+
},
|
45 |
+
"bos_token": "<s>",
|
46 |
+
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
|
47 |
+
"clean_up_tokenization_spaces": true,
|
48 |
+
"eos_token": "</s>",
|
49 |
+
"model_max_length": 1024,
|
50 |
+
"pad_token": "<pad>",
|
51 |
+
"tokenizer_class": "GPT2Tokenizer",
|
52 |
+
"unk_token": "<unk>"
|
53 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f2347a47dcb65e4c27c7ecf02ead180ffd83b8034d525b3b2ca60a664d98dc6
|
3 |
+
size 4984
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
wandb/debug-internal.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
wandb/debug.log
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-05-22 05:43:48,628 INFO MainThread:24 [wandb_setup.py:_flush():76] Current SDK version is 0.16.6
|
2 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Configure stats pid to 24
|
3 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
|
4 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Loading settings from /kaggle/working/wandb/settings
|
5 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
|
6 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
|
7 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
|
8 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
|
9 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Applying login settings: {}
|
10 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:_log_setup():521] Logging user logs to /kaggle/working/wandb/run-20240522_054348-vgrzs6jq/logs/debug.log
|
11 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:_log_setup():522] Logging internal logs to /kaggle/working/wandb/run-20240522_054348-vgrzs6jq/logs/debug-internal.log
|
12 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:init():561] calling init triggers
|
13 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:init():568] wandb.init called with sweep_config: {}
|
14 |
+
config: {}
|
15 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:init():611] starting backend
|
16 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:init():615] setting up manager
|
17 |
+
2024-05-22 05:43:48,632 INFO MainThread:24 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
18 |
+
2024-05-22 05:43:48,635 INFO MainThread:24 [wandb_init.py:init():623] backend started and connected
|
19 |
+
2024-05-22 05:43:48,638 INFO MainThread:24 [wandb_init.py:init():715] updated telemetry
|
20 |
+
2024-05-22 05:43:48,641 INFO MainThread:24 [wandb_init.py:init():748] communicating run to backend with 90.0 second timeout
|
21 |
+
2024-05-22 05:43:48,764 INFO MainThread:24 [wandb_run.py:_on_init():2357] communicating current version
|
22 |
+
2024-05-22 05:43:48,850 INFO MainThread:24 [wandb_run.py:_on_init():2366] got version response upgrade_message: "wandb version 0.17.0 is available! To upgrade, please run:\n $ pip install wandb --upgrade"
|
23 |
+
|
24 |
+
2024-05-22 05:43:48,851 INFO MainThread:24 [wandb_init.py:init():799] starting run threads in backend
|
25 |
+
2024-05-22 05:44:04,914 INFO MainThread:24 [wandb_run.py:_console_start():2335] atexit reg
|
26 |
+
2024-05-22 05:44:04,914 INFO MainThread:24 [wandb_run.py:_redirect():2190] redirect: wrap_raw
|
27 |
+
2024-05-22 05:44:04,915 INFO MainThread:24 [wandb_run.py:_redirect():2255] Wrapping output streams.
|
28 |
+
2024-05-22 05:44:04,915 INFO MainThread:24 [wandb_run.py:_redirect():2280] Redirects installed.
|
29 |
+
2024-05-22 05:44:04,916 INFO MainThread:24 [wandb_init.py:init():842] run started, returning control to user process
|
30 |
+
2024-05-22 05:47:10,600 INFO MainThread:24 [wandb_run.py:_config_callback():1347] config_cb None None {'vocab_size': 32000, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 11008, 'num_hidden_layers': 32, 'num_attention_heads': 32, 'num_key_value_heads': 32, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': False, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': None, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'meta-llama/Llama-2-7b-chat-hf', 'transformers_version': '4.39.3', 'model_type': 'llama', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.1, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/May22_05-46-00_92dcc4555414', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'VanCan23/DPO_Vietnamese_chatbot_lessData', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': {'use_reentrant': False}, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None}
|
wandb/run-20240522_054348-vgrzs6jq/files/conda-environment.yaml
ADDED
File without changes
|
wandb/run-20240522_054348-vgrzs6jq/files/config.yaml
ADDED
@@ -0,0 +1,753 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
python_version: 3.10.13
|
7 |
+
cli_version: 0.16.6
|
8 |
+
is_jupyter_run: false
|
9 |
+
is_kaggle_kernel: true
|
10 |
+
start_time: 1716356628.0
|
11 |
+
t:
|
12 |
+
1:
|
13 |
+
- 55
|
14 |
+
- 105
|
15 |
+
2:
|
16 |
+
- 1
|
17 |
+
- 2
|
18 |
+
- 3
|
19 |
+
- 5
|
20 |
+
- 11
|
21 |
+
- 12
|
22 |
+
- 49
|
23 |
+
- 51
|
24 |
+
- 53
|
25 |
+
- 55
|
26 |
+
- 71
|
27 |
+
- 84
|
28 |
+
- 98
|
29 |
+
- 105
|
30 |
+
3:
|
31 |
+
- 7
|
32 |
+
- 23
|
33 |
+
4: 3.10.13
|
34 |
+
5: 0.16.6
|
35 |
+
8:
|
36 |
+
- 2
|
37 |
+
- 5
|
38 |
+
- 13
|
39 |
+
9:
|
40 |
+
1: transformers_trainer
|
41 |
+
13: linux-x86_64
|
42 |
+
framework: huggingface
|
43 |
+
m:
|
44 |
+
- 1: train/global_step
|
45 |
+
6:
|
46 |
+
- 3
|
47 |
+
- 1: train/loss
|
48 |
+
5: 1
|
49 |
+
6:
|
50 |
+
- 1
|
51 |
+
- 1: train/grad_norm
|
52 |
+
5: 1
|
53 |
+
6:
|
54 |
+
- 1
|
55 |
+
- 1: train/learning_rate
|
56 |
+
5: 1
|
57 |
+
6:
|
58 |
+
- 1
|
59 |
+
- 1: train/rewards/chosen
|
60 |
+
5: 1
|
61 |
+
6:
|
62 |
+
- 1
|
63 |
+
- 1: train/rewards/rejected
|
64 |
+
5: 1
|
65 |
+
6:
|
66 |
+
- 1
|
67 |
+
- 1: train/rewards/accuracies
|
68 |
+
5: 1
|
69 |
+
6:
|
70 |
+
- 1
|
71 |
+
- 1: train/rewards/margins
|
72 |
+
5: 1
|
73 |
+
6:
|
74 |
+
- 1
|
75 |
+
- 1: train/logps/rejected
|
76 |
+
5: 1
|
77 |
+
6:
|
78 |
+
- 1
|
79 |
+
- 1: train/logps/chosen
|
80 |
+
5: 1
|
81 |
+
6:
|
82 |
+
- 1
|
83 |
+
- 1: train/logits/rejected
|
84 |
+
5: 1
|
85 |
+
6:
|
86 |
+
- 1
|
87 |
+
- 1: train/logits/chosen
|
88 |
+
5: 1
|
89 |
+
6:
|
90 |
+
- 1
|
91 |
+
- 1: train/epoch
|
92 |
+
5: 1
|
93 |
+
6:
|
94 |
+
- 1
|
95 |
+
- 1: eval/loss
|
96 |
+
5: 1
|
97 |
+
6:
|
98 |
+
- 1
|
99 |
+
- 1: eval/runtime
|
100 |
+
5: 1
|
101 |
+
6:
|
102 |
+
- 1
|
103 |
+
- 1: eval/samples_per_second
|
104 |
+
5: 1
|
105 |
+
6:
|
106 |
+
- 1
|
107 |
+
- 1: eval/steps_per_second
|
108 |
+
5: 1
|
109 |
+
6:
|
110 |
+
- 1
|
111 |
+
- 1: eval/rewards/chosen
|
112 |
+
5: 1
|
113 |
+
6:
|
114 |
+
- 1
|
115 |
+
- 1: eval/rewards/rejected
|
116 |
+
5: 1
|
117 |
+
6:
|
118 |
+
- 1
|
119 |
+
- 1: eval/rewards/accuracies
|
120 |
+
5: 1
|
121 |
+
6:
|
122 |
+
- 1
|
123 |
+
- 1: eval/rewards/margins
|
124 |
+
5: 1
|
125 |
+
6:
|
126 |
+
- 1
|
127 |
+
- 1: eval/logps/rejected
|
128 |
+
5: 1
|
129 |
+
6:
|
130 |
+
- 1
|
131 |
+
- 1: eval/logps/chosen
|
132 |
+
5: 1
|
133 |
+
6:
|
134 |
+
- 1
|
135 |
+
- 1: eval/logits/rejected
|
136 |
+
5: 1
|
137 |
+
6:
|
138 |
+
- 1
|
139 |
+
- 1: eval/logits/chosen
|
140 |
+
5: 1
|
141 |
+
6:
|
142 |
+
- 1
|
143 |
+
vocab_size:
|
144 |
+
desc: null
|
145 |
+
value: 32000
|
146 |
+
max_position_embeddings:
|
147 |
+
desc: null
|
148 |
+
value: 4096
|
149 |
+
hidden_size:
|
150 |
+
desc: null
|
151 |
+
value: 4096
|
152 |
+
intermediate_size:
|
153 |
+
desc: null
|
154 |
+
value: 11008
|
155 |
+
num_hidden_layers:
|
156 |
+
desc: null
|
157 |
+
value: 32
|
158 |
+
num_attention_heads:
|
159 |
+
desc: null
|
160 |
+
value: 32
|
161 |
+
num_key_value_heads:
|
162 |
+
desc: null
|
163 |
+
value: 32
|
164 |
+
hidden_act:
|
165 |
+
desc: null
|
166 |
+
value: silu
|
167 |
+
initializer_range:
|
168 |
+
desc: null
|
169 |
+
value: 0.02
|
170 |
+
rms_norm_eps:
|
171 |
+
desc: null
|
172 |
+
value: 1.0e-05
|
173 |
+
pretraining_tp:
|
174 |
+
desc: null
|
175 |
+
value: 1
|
176 |
+
use_cache:
|
177 |
+
desc: null
|
178 |
+
value: false
|
179 |
+
rope_theta:
|
180 |
+
desc: null
|
181 |
+
value: 10000.0
|
182 |
+
rope_scaling:
|
183 |
+
desc: null
|
184 |
+
value: null
|
185 |
+
attention_bias:
|
186 |
+
desc: null
|
187 |
+
value: false
|
188 |
+
attention_dropout:
|
189 |
+
desc: null
|
190 |
+
value: 0.0
|
191 |
+
return_dict:
|
192 |
+
desc: null
|
193 |
+
value: true
|
194 |
+
output_hidden_states:
|
195 |
+
desc: null
|
196 |
+
value: false
|
197 |
+
output_attentions:
|
198 |
+
desc: null
|
199 |
+
value: false
|
200 |
+
torchscript:
|
201 |
+
desc: null
|
202 |
+
value: false
|
203 |
+
torch_dtype:
|
204 |
+
desc: null
|
205 |
+
value: float16
|
206 |
+
use_bfloat16:
|
207 |
+
desc: null
|
208 |
+
value: false
|
209 |
+
tf_legacy_loss:
|
210 |
+
desc: null
|
211 |
+
value: false
|
212 |
+
pruned_heads:
|
213 |
+
desc: null
|
214 |
+
value: {}
|
215 |
+
tie_word_embeddings:
|
216 |
+
desc: null
|
217 |
+
value: false
|
218 |
+
chunk_size_feed_forward:
|
219 |
+
desc: null
|
220 |
+
value: 0
|
221 |
+
is_encoder_decoder:
|
222 |
+
desc: null
|
223 |
+
value: false
|
224 |
+
is_decoder:
|
225 |
+
desc: null
|
226 |
+
value: false
|
227 |
+
cross_attention_hidden_size:
|
228 |
+
desc: null
|
229 |
+
value: null
|
230 |
+
add_cross_attention:
|
231 |
+
desc: null
|
232 |
+
value: false
|
233 |
+
tie_encoder_decoder:
|
234 |
+
desc: null
|
235 |
+
value: false
|
236 |
+
max_length:
|
237 |
+
desc: null
|
238 |
+
value: 20
|
239 |
+
min_length:
|
240 |
+
desc: null
|
241 |
+
value: 0
|
242 |
+
do_sample:
|
243 |
+
desc: null
|
244 |
+
value: false
|
245 |
+
early_stopping:
|
246 |
+
desc: null
|
247 |
+
value: false
|
248 |
+
num_beams:
|
249 |
+
desc: null
|
250 |
+
value: 1
|
251 |
+
num_beam_groups:
|
252 |
+
desc: null
|
253 |
+
value: 1
|
254 |
+
diversity_penalty:
|
255 |
+
desc: null
|
256 |
+
value: 0.0
|
257 |
+
temperature:
|
258 |
+
desc: null
|
259 |
+
value: 1.0
|
260 |
+
top_k:
|
261 |
+
desc: null
|
262 |
+
value: 50
|
263 |
+
top_p:
|
264 |
+
desc: null
|
265 |
+
value: 1.0
|
266 |
+
typical_p:
|
267 |
+
desc: null
|
268 |
+
value: 1.0
|
269 |
+
repetition_penalty:
|
270 |
+
desc: null
|
271 |
+
value: 1.0
|
272 |
+
length_penalty:
|
273 |
+
desc: null
|
274 |
+
value: 1.0
|
275 |
+
no_repeat_ngram_size:
|
276 |
+
desc: null
|
277 |
+
value: 0
|
278 |
+
encoder_no_repeat_ngram_size:
|
279 |
+
desc: null
|
280 |
+
value: 0
|
281 |
+
bad_words_ids:
|
282 |
+
desc: null
|
283 |
+
value: null
|
284 |
+
num_return_sequences:
|
285 |
+
desc: null
|
286 |
+
value: 1
|
287 |
+
output_scores:
|
288 |
+
desc: null
|
289 |
+
value: false
|
290 |
+
return_dict_in_generate:
|
291 |
+
desc: null
|
292 |
+
value: false
|
293 |
+
forced_bos_token_id:
|
294 |
+
desc: null
|
295 |
+
value: null
|
296 |
+
forced_eos_token_id:
|
297 |
+
desc: null
|
298 |
+
value: null
|
299 |
+
remove_invalid_values:
|
300 |
+
desc: null
|
301 |
+
value: false
|
302 |
+
exponential_decay_length_penalty:
|
303 |
+
desc: null
|
304 |
+
value: null
|
305 |
+
suppress_tokens:
|
306 |
+
desc: null
|
307 |
+
value: null
|
308 |
+
begin_suppress_tokens:
|
309 |
+
desc: null
|
310 |
+
value: null
|
311 |
+
architectures:
|
312 |
+
desc: null
|
313 |
+
value:
|
314 |
+
- LlamaForCausalLM
|
315 |
+
finetuning_task:
|
316 |
+
desc: null
|
317 |
+
value: null
|
318 |
+
id2label:
|
319 |
+
desc: null
|
320 |
+
value:
|
321 |
+
'0': LABEL_0
|
322 |
+
'1': LABEL_1
|
323 |
+
label2id:
|
324 |
+
desc: null
|
325 |
+
value:
|
326 |
+
LABEL_0: 0
|
327 |
+
LABEL_1: 1
|
328 |
+
tokenizer_class:
|
329 |
+
desc: null
|
330 |
+
value: null
|
331 |
+
prefix:
|
332 |
+
desc: null
|
333 |
+
value: null
|
334 |
+
bos_token_id:
|
335 |
+
desc: null
|
336 |
+
value: 1
|
337 |
+
pad_token_id:
|
338 |
+
desc: null
|
339 |
+
value: null
|
340 |
+
eos_token_id:
|
341 |
+
desc: null
|
342 |
+
value: 2
|
343 |
+
sep_token_id:
|
344 |
+
desc: null
|
345 |
+
value: null
|
346 |
+
decoder_start_token_id:
|
347 |
+
desc: null
|
348 |
+
value: null
|
349 |
+
task_specific_params:
|
350 |
+
desc: null
|
351 |
+
value: null
|
352 |
+
problem_type:
|
353 |
+
desc: null
|
354 |
+
value: null
|
355 |
+
_name_or_path:
|
356 |
+
desc: null
|
357 |
+
value: meta-llama/Llama-2-7b-chat-hf
|
358 |
+
transformers_version:
|
359 |
+
desc: null
|
360 |
+
value: 4.39.3
|
361 |
+
model_type:
|
362 |
+
desc: null
|
363 |
+
value: llama
|
364 |
+
quantization_config:
|
365 |
+
desc: null
|
366 |
+
value:
|
367 |
+
quant_method: QuantizationMethod.BITS_AND_BYTES
|
368 |
+
_load_in_8bit: false
|
369 |
+
_load_in_4bit: true
|
370 |
+
llm_int8_threshold: 6.0
|
371 |
+
llm_int8_skip_modules: null
|
372 |
+
llm_int8_enable_fp32_cpu_offload: false
|
373 |
+
llm_int8_has_fp16_weight: false
|
374 |
+
bnb_4bit_quant_type: nf4
|
375 |
+
bnb_4bit_use_double_quant: false
|
376 |
+
bnb_4bit_compute_dtype: float16
|
377 |
+
bnb_4bit_quant_storage: uint8
|
378 |
+
load_in_4bit: true
|
379 |
+
load_in_8bit: false
|
380 |
+
output_dir:
|
381 |
+
desc: null
|
382 |
+
value: /kaggle/working/
|
383 |
+
overwrite_output_dir:
|
384 |
+
desc: null
|
385 |
+
value: false
|
386 |
+
do_train:
|
387 |
+
desc: null
|
388 |
+
value: false
|
389 |
+
do_eval:
|
390 |
+
desc: null
|
391 |
+
value: true
|
392 |
+
do_predict:
|
393 |
+
desc: null
|
394 |
+
value: false
|
395 |
+
evaluation_strategy:
|
396 |
+
desc: null
|
397 |
+
value: steps
|
398 |
+
prediction_loss_only:
|
399 |
+
desc: null
|
400 |
+
value: false
|
401 |
+
per_device_train_batch_size:
|
402 |
+
desc: null
|
403 |
+
value: 1
|
404 |
+
per_device_eval_batch_size:
|
405 |
+
desc: null
|
406 |
+
value: 1
|
407 |
+
per_gpu_train_batch_size:
|
408 |
+
desc: null
|
409 |
+
value: null
|
410 |
+
per_gpu_eval_batch_size:
|
411 |
+
desc: null
|
412 |
+
value: null
|
413 |
+
gradient_accumulation_steps:
|
414 |
+
desc: null
|
415 |
+
value: 4
|
416 |
+
eval_accumulation_steps:
|
417 |
+
desc: null
|
418 |
+
value: null
|
419 |
+
eval_delay:
|
420 |
+
desc: null
|
421 |
+
value: 0
|
422 |
+
learning_rate:
|
423 |
+
desc: null
|
424 |
+
value: 2.0e-05
|
425 |
+
weight_decay:
|
426 |
+
desc: null
|
427 |
+
value: 0.0
|
428 |
+
adam_beta1:
|
429 |
+
desc: null
|
430 |
+
value: 0.9
|
431 |
+
adam_beta2:
|
432 |
+
desc: null
|
433 |
+
value: 0.999
|
434 |
+
adam_epsilon:
|
435 |
+
desc: null
|
436 |
+
value: 1.0e-08
|
437 |
+
max_grad_norm:
|
438 |
+
desc: null
|
439 |
+
value: 1.0
|
440 |
+
num_train_epochs:
|
441 |
+
desc: null
|
442 |
+
value: 1
|
443 |
+
max_steps:
|
444 |
+
desc: null
|
445 |
+
value: -1
|
446 |
+
lr_scheduler_type:
|
447 |
+
desc: null
|
448 |
+
value: cosine
|
449 |
+
lr_scheduler_kwargs:
|
450 |
+
desc: null
|
451 |
+
value: {}
|
452 |
+
warmup_ratio:
|
453 |
+
desc: null
|
454 |
+
value: 0.1
|
455 |
+
warmup_steps:
|
456 |
+
desc: null
|
457 |
+
value: 0
|
458 |
+
log_level:
|
459 |
+
desc: null
|
460 |
+
value: info
|
461 |
+
log_level_replica:
|
462 |
+
desc: null
|
463 |
+
value: warning
|
464 |
+
log_on_each_node:
|
465 |
+
desc: null
|
466 |
+
value: true
|
467 |
+
logging_dir:
|
468 |
+
desc: null
|
469 |
+
value: /kaggle/working/runs/May22_05-46-00_92dcc4555414
|
470 |
+
logging_strategy:
|
471 |
+
desc: null
|
472 |
+
value: steps
|
473 |
+
logging_first_step:
|
474 |
+
desc: null
|
475 |
+
value: false
|
476 |
+
logging_steps:
|
477 |
+
desc: null
|
478 |
+
value: 10
|
479 |
+
logging_nan_inf_filter:
|
480 |
+
desc: null
|
481 |
+
value: true
|
482 |
+
save_strategy:
|
483 |
+
desc: null
|
484 |
+
value: steps
|
485 |
+
save_steps:
|
486 |
+
desc: null
|
487 |
+
value: 100
|
488 |
+
save_total_limit:
|
489 |
+
desc: null
|
490 |
+
value: 1
|
491 |
+
save_safetensors:
|
492 |
+
desc: null
|
493 |
+
value: true
|
494 |
+
save_on_each_node:
|
495 |
+
desc: null
|
496 |
+
value: false
|
497 |
+
save_only_model:
|
498 |
+
desc: null
|
499 |
+
value: false
|
500 |
+
no_cuda:
|
501 |
+
desc: null
|
502 |
+
value: false
|
503 |
+
use_cpu:
|
504 |
+
desc: null
|
505 |
+
value: false
|
506 |
+
use_mps_device:
|
507 |
+
desc: null
|
508 |
+
value: false
|
509 |
+
seed:
|
510 |
+
desc: null
|
511 |
+
value: 42
|
512 |
+
data_seed:
|
513 |
+
desc: null
|
514 |
+
value: null
|
515 |
+
jit_mode_eval:
|
516 |
+
desc: null
|
517 |
+
value: false
|
518 |
+
use_ipex:
|
519 |
+
desc: null
|
520 |
+
value: false
|
521 |
+
bf16:
|
522 |
+
desc: null
|
523 |
+
value: false
|
524 |
+
fp16:
|
525 |
+
desc: null
|
526 |
+
value: true
|
527 |
+
fp16_opt_level:
|
528 |
+
desc: null
|
529 |
+
value: O1
|
530 |
+
half_precision_backend:
|
531 |
+
desc: null
|
532 |
+
value: auto
|
533 |
+
bf16_full_eval:
|
534 |
+
desc: null
|
535 |
+
value: false
|
536 |
+
fp16_full_eval:
|
537 |
+
desc: null
|
538 |
+
value: false
|
539 |
+
tf32:
|
540 |
+
desc: null
|
541 |
+
value: null
|
542 |
+
local_rank:
|
543 |
+
desc: null
|
544 |
+
value: 0
|
545 |
+
ddp_backend:
|
546 |
+
desc: null
|
547 |
+
value: null
|
548 |
+
tpu_num_cores:
|
549 |
+
desc: null
|
550 |
+
value: null
|
551 |
+
tpu_metrics_debug:
|
552 |
+
desc: null
|
553 |
+
value: false
|
554 |
+
debug:
|
555 |
+
desc: null
|
556 |
+
value: []
|
557 |
+
dataloader_drop_last:
|
558 |
+
desc: null
|
559 |
+
value: false
|
560 |
+
eval_steps:
|
561 |
+
desc: null
|
562 |
+
value: 100
|
563 |
+
dataloader_num_workers:
|
564 |
+
desc: null
|
565 |
+
value: 0
|
566 |
+
dataloader_prefetch_factor:
|
567 |
+
desc: null
|
568 |
+
value: null
|
569 |
+
past_index:
|
570 |
+
desc: null
|
571 |
+
value: -1
|
572 |
+
run_name:
|
573 |
+
desc: null
|
574 |
+
value: /kaggle/working/
|
575 |
+
disable_tqdm:
|
576 |
+
desc: null
|
577 |
+
value: false
|
578 |
+
remove_unused_columns:
|
579 |
+
desc: null
|
580 |
+
value: false
|
581 |
+
label_names:
|
582 |
+
desc: null
|
583 |
+
value: null
|
584 |
+
load_best_model_at_end:
|
585 |
+
desc: null
|
586 |
+
value: false
|
587 |
+
metric_for_best_model:
|
588 |
+
desc: null
|
589 |
+
value: null
|
590 |
+
greater_is_better:
|
591 |
+
desc: null
|
592 |
+
value: null
|
593 |
+
ignore_data_skip:
|
594 |
+
desc: null
|
595 |
+
value: false
|
596 |
+
fsdp:
|
597 |
+
desc: null
|
598 |
+
value: []
|
599 |
+
fsdp_min_num_params:
|
600 |
+
desc: null
|
601 |
+
value: 0
|
602 |
+
fsdp_config:
|
603 |
+
desc: null
|
604 |
+
value:
|
605 |
+
min_num_params: 0
|
606 |
+
xla: false
|
607 |
+
xla_fsdp_v2: false
|
608 |
+
xla_fsdp_grad_ckpt: false
|
609 |
+
fsdp_transformer_layer_cls_to_wrap:
|
610 |
+
desc: null
|
611 |
+
value: null
|
612 |
+
accelerator_config:
|
613 |
+
desc: null
|
614 |
+
value:
|
615 |
+
split_batches: false
|
616 |
+
dispatch_batches: null
|
617 |
+
even_batches: true
|
618 |
+
use_seedable_sampler: true
|
619 |
+
deepspeed:
|
620 |
+
desc: null
|
621 |
+
value: null
|
622 |
+
label_smoothing_factor:
|
623 |
+
desc: null
|
624 |
+
value: 0.0
|
625 |
+
optim:
|
626 |
+
desc: null
|
627 |
+
value: paged_adamw_32bit
|
628 |
+
optim_args:
|
629 |
+
desc: null
|
630 |
+
value: null
|
631 |
+
adafactor:
|
632 |
+
desc: null
|
633 |
+
value: false
|
634 |
+
group_by_length:
|
635 |
+
desc: null
|
636 |
+
value: false
|
637 |
+
length_column_name:
|
638 |
+
desc: null
|
639 |
+
value: length
|
640 |
+
report_to:
|
641 |
+
desc: null
|
642 |
+
value:
|
643 |
+
- tensorboard
|
644 |
+
- wandb
|
645 |
+
ddp_find_unused_parameters:
|
646 |
+
desc: null
|
647 |
+
value: null
|
648 |
+
ddp_bucket_cap_mb:
|
649 |
+
desc: null
|
650 |
+
value: null
|
651 |
+
ddp_broadcast_buffers:
|
652 |
+
desc: null
|
653 |
+
value: null
|
654 |
+
dataloader_pin_memory:
|
655 |
+
desc: null
|
656 |
+
value: true
|
657 |
+
dataloader_persistent_workers:
|
658 |
+
desc: null
|
659 |
+
value: false
|
660 |
+
skip_memory_metrics:
|
661 |
+
desc: null
|
662 |
+
value: true
|
663 |
+
use_legacy_prediction_loop:
|
664 |
+
desc: null
|
665 |
+
value: false
|
666 |
+
push_to_hub:
|
667 |
+
desc: null
|
668 |
+
value: true
|
669 |
+
resume_from_checkpoint:
|
670 |
+
desc: null
|
671 |
+
value: null
|
672 |
+
hub_model_id:
|
673 |
+
desc: null
|
674 |
+
value: VanCan23/DPO_Vietnamese_chatbot_lessData
|
675 |
+
hub_strategy:
|
676 |
+
desc: null
|
677 |
+
value: every_save
|
678 |
+
hub_token:
|
679 |
+
desc: null
|
680 |
+
value: <HUB_TOKEN>
|
681 |
+
hub_private_repo:
|
682 |
+
desc: null
|
683 |
+
value: false
|
684 |
+
hub_always_push:
|
685 |
+
desc: null
|
686 |
+
value: false
|
687 |
+
gradient_checkpointing:
|
688 |
+
desc: null
|
689 |
+
value: true
|
690 |
+
gradient_checkpointing_kwargs:
|
691 |
+
desc: null
|
692 |
+
value:
|
693 |
+
use_reentrant: false
|
694 |
+
include_inputs_for_metrics:
|
695 |
+
desc: null
|
696 |
+
value: false
|
697 |
+
fp16_backend:
|
698 |
+
desc: null
|
699 |
+
value: auto
|
700 |
+
push_to_hub_model_id:
|
701 |
+
desc: null
|
702 |
+
value: null
|
703 |
+
push_to_hub_organization:
|
704 |
+
desc: null
|
705 |
+
value: null
|
706 |
+
push_to_hub_token:
|
707 |
+
desc: null
|
708 |
+
value: <PUSH_TO_HUB_TOKEN>
|
709 |
+
mp_parameters:
|
710 |
+
desc: null
|
711 |
+
value: ''
|
712 |
+
auto_find_batch_size:
|
713 |
+
desc: null
|
714 |
+
value: false
|
715 |
+
full_determinism:
|
716 |
+
desc: null
|
717 |
+
value: false
|
718 |
+
torchdynamo:
|
719 |
+
desc: null
|
720 |
+
value: null
|
721 |
+
ray_scope:
|
722 |
+
desc: null
|
723 |
+
value: last
|
724 |
+
ddp_timeout:
|
725 |
+
desc: null
|
726 |
+
value: 1800
|
727 |
+
torch_compile:
|
728 |
+
desc: null
|
729 |
+
value: false
|
730 |
+
torch_compile_backend:
|
731 |
+
desc: null
|
732 |
+
value: null
|
733 |
+
torch_compile_mode:
|
734 |
+
desc: null
|
735 |
+
value: null
|
736 |
+
dispatch_batches:
|
737 |
+
desc: null
|
738 |
+
value: null
|
739 |
+
split_batches:
|
740 |
+
desc: null
|
741 |
+
value: null
|
742 |
+
include_tokens_per_second:
|
743 |
+
desc: null
|
744 |
+
value: false
|
745 |
+
include_num_input_tokens_seen:
|
746 |
+
desc: null
|
747 |
+
value: false
|
748 |
+
neftune_noise_alpha:
|
749 |
+
desc: null
|
750 |
+
value: null
|
751 |
+
optim_target_modules:
|
752 |
+
desc: null
|
753 |
+
value: null
|
wandb/run-20240522_054348-vgrzs6jq/files/output.log
ADDED
@@ -0,0 +1,624 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
|
2 |
+
Token is valid (permission: write).
|
3 |
+
Your token has been saved to /root/.cache/huggingface/token
|
4 |
+
Login successful
|
5 |
+
2024-05-22 05:44:14.340486: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
6 |
+
2024-05-22 05:44:14.340587: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
7 |
+
2024-05-22 05:44:14.458170: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
8 |
+
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|
9 |
+
Downloading data: 100%|██████████| 84.7M/84.7M [00:00<00:00, 120MB/s]
|
10 |
+
chosen_en : Mexico, especially when they involve international travelers, time is of the essence as each passing day makes the search harder and leads grow cold," said Detective Sanchez with a heavy heart, as he closed the folder on Harry Devert's case and glanced out the window, the setting sun casting a somber glow over a landscape that still held its secrets tight and unyielding - a stark reminder that for some journeys, the road goes on forever and the destination remains unreachable.
|
11 |
+
system_en : You are an AI assistant. You will be given a task. You must generate a detailed and long answer.
|
12 |
+
question_vi : Viết câu cuối cùng trong câu chuyện này. (CNN) – Hành trình đi xe máy của Harry Devert từ Hoa Kỳ đến Châu Mỹ Latinh phần nào gợi nhớ đến “Nhật ký xe máy”, kể lại chuyến du hành Nam Mỹ của nhà cách mạng Ernesto “Che” Guevara trên một chiếc xe máy cũ để tìm kiếm tinh thần nổi dậy của mình. Nhưng Devert, 32 tuổi, người đã rời bỏ công việc kinh doanh tài chính để đi du lịch khắp thế giới, đã không liên lạc với mẹ hoặc bạn gái của mình ở New York kể từ ngày 25 tháng 1. Ngày hôm đó, anh gửi cho bạn gái Sarah Ashley Schiear một tin nhắn đáng ngại qua WhatsApp. ứng dụng nhắn tin. Tin nhắn cho biết: “Tôi vừa phải hộ tống một tiếng rưỡi dài ra khỏi một khu vực nào đó, điều đó quá nguy hiểm đối với tôi”. "Dừng ăn trưa và ... thì đấy Internet. ... Sẽ sớm quay lại đường thôi. Hình như có một đội hộ tống quân sự khác đang đợi tôi ở một thị trấn khác nào đó... Tôi đến muộn vì mấy chuyện quân sự điên rồ. ..hy vọng có cơ hội nói chuyện với bạn tối nay khi tôi (hy vọng) cuối cùng cũng đến nơi." “Đối với trường hợp người mất tích ở
|
13 |
+
rejected_en : ... the search for Harry Devert, who embarked on a motorcycle journey from the United States to Latin America, has taken a dire turn. Despite numerous attempts to contact him, his family and friends have received no word from him since the ominous text message on January 25th, in which he mentioned military escorts and delays. As the days go by, the concern for Harry's safety grows, and the authorities have launched a full-scale investigation into his disappearance. The last known location of Harry's motorcycle was tracked to a small town in a remote region of Latin America, where eyewitnesses reported seeing him being escorted by military personnel. The search for Harry continues, with hopes that he may still be alive and in need of urgent assistance. The family and friends of Harry Devert remain vigilant, holding onto the hope that he will soon be found and brought home safely.
|
14 |
+
system_vi : Bạn là một trợ lý AI. Bạn sẽ được giao một nhiệm vụ. Bạn phải tạo ra một câu trả lời chi tiết và dài.
|
15 |
+
question_en : Write the last sentence in this story.
|
16 |
+
(CNN) -- Harry Devert's motorcycle journey from the United States to Latin America is somewhat reminiscent of "The Motorcycle Diaries," which recount the South American travels of revolutionary Ernesto "Che" Guevara on an old motorbike in search of his insurgent spirit. But Devert, 32, who left a job as a trader in finance to travel the world, has not been in touch with his mother or girlfriend in New York since January 25. That day he sent girlfriend Sarah Ashley Schiear an ominous text via the WhatsApp messenger app. "Just got an hour and a half long escort out of some area it was too dangerous for me to be," the message said. "Stopping for lunch and ... voila Internet. ... Gonna get back on the road soon. Apparently there's another military escort waiting for me in some other town... I'm running way late because of the crazy military stuff...hopefully get a chance to talk to you tonight when I (hopefully) finally arrive."
|
17 |
+
"For missing person cases in
|
18 |
+
chosen_vi : Mexico, đặc biệt là khi chúng liên quan đến du khách quốc tế, thời gian là điều cốt yếu vì mỗi ngày trôi qua khiến việc tìm kiếm trở nên khó khăn hơn và các manh mối ngày càng trở nên mờ nhạt,” Thám tử Sanchez nói với trái tim trĩu nặng khi đóng tập hồ sơ về vụ án của Harry Devert và liếc ra ngoài cửa sổ. , mặt trời lặn tỏa ánh sáng ảm đạm lên một khung cảnh vẫn còn nắm giữ những bí mật chặt chẽ và kiên cường - một lời nhắc nhở rõ ràng rằng đối với một số hành trình, con đường sẽ kéo dài mãi mãi và đích đến vẫn không thể đến được.
|
19 |
+
rejected_vi : ... cuộc tìm kiếm Harry Devert, người bắt đầu cuộc hành trình bằng mô tô từ Hoa Kỳ đến Châu Mỹ Latinh, đã có một bước ngoặt thảm khốc. Bất chấp nhiều nỗ lực liên lạc với anh ấy, gia đình và bạn bè của anh ấy vẫn không nhận được tin tức gì từ anh ấy kể từ tin nhắn đáng lo ngại vào ngày 25 tháng 1, trong đó anh ấy đề cập đến việc hộ tống quân sự và sự chậm trễ. Ngày tháng trôi qua, mối lo ngại về sự an toàn của Harry ngày càng tăng và chính quyền đã mở một cuộc điều tra toàn diện về sự mất tích của anh. Vị trí cuối cùng được biết đến của chiếc xe máy của Harry được theo dõi đến một thị trấn nhỏ ở một vùng xa xôi của châu Mỹ Latinh, nơi các nhân chứng cho biết đã nhìn thấy anh ta được quân nhân hộ tống. Cuộc tìm kiếm Harry vẫn tiếp tục với hy vọng rằng cậu bé có thể vẫn còn sống và cần được hỗ trợ khẩn cấp. Gia đình và bạn bè của Harry Devert vẫn cảnh giác, nuôi hy vọng rằng anh sẽ sớm được tìm thấy và đưa về nhà an toàn.
|
20 |
+
Adapter weights model repo: VanCan23/SFTDPO_3epoch_adapter
|
21 |
+
Base model weights model repo: meta-llama/Llama-2-7b-chat-hf
|
22 |
+
base_model.model.model.embed_tokens.weight False
|
23 |
+
base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight False
|
24 |
+
base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight False
|
25 |
+
base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight False
|
26 |
+
base_model.model.model.layers.0.self_attn.k_proj.base_layer.weight False
|
27 |
+
base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.weight False
|
28 |
+
base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.weight False
|
29 |
+
base_model.model.model.layers.0.self_attn.v_proj.base_layer.weight False
|
30 |
+
base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight False
|
31 |
+
base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight False
|
32 |
+
base_model.model.model.layers.0.self_attn.o_proj.base_layer.weight False
|
33 |
+
base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight False
|
34 |
+
base_model.model.model.layers.0.self_attn.o_proj.lora_B.default.weight False
|
35 |
+
base_model.model.model.layers.0.mlp.gate_proj.weight False
|
36 |
+
base_model.model.model.layers.0.mlp.up_proj.weight False
|
37 |
+
base_model.model.model.layers.0.mlp.down_proj.weight False
|
38 |
+
base_model.model.model.layers.0.input_layernorm.weight False
|
39 |
+
base_model.model.model.layers.0.post_attention_layernorm.weight False
|
40 |
+
base_model.model.model.layers.1.self_attn.q_proj.base_layer.weight False
|
41 |
+
base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.weight False
|
42 |
+
base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.weight False
|
43 |
+
base_model.model.model.layers.1.self_attn.k_proj.base_layer.weight False
|
44 |
+
base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.weight False
|
45 |
+
base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.weight False
|
46 |
+
base_model.model.model.layers.1.self_attn.v_proj.base_layer.weight False
|
47 |
+
base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.weight False
|
48 |
+
base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.weight False
|
49 |
+
base_model.model.model.layers.1.self_attn.o_proj.base_layer.weight False
|
50 |
+
base_model.model.model.layers.1.self_attn.o_proj.lora_A.default.weight False
|
51 |
+
base_model.model.model.layers.1.self_attn.o_proj.lora_B.default.weight False
|
52 |
+
base_model.model.model.layers.1.mlp.gate_proj.weight False
|
53 |
+
base_model.model.model.layers.1.mlp.up_proj.weight False
|
54 |
+
base_model.model.model.layers.1.mlp.down_proj.weight False
|
55 |
+
base_model.model.model.layers.1.input_layernorm.weight False
|
56 |
+
base_model.model.model.layers.1.post_attention_layernorm.weight False
|
57 |
+
base_model.model.model.layers.2.self_attn.q_proj.base_layer.weight False
|
58 |
+
base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.weight False
|
59 |
+
base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.weight False
|
60 |
+
base_model.model.model.layers.2.self_attn.k_proj.base_layer.weight False
|
61 |
+
base_model.model.model.layers.2.self_attn.k_proj.lora_A.default.weight False
|
62 |
+
base_model.model.model.layers.2.self_attn.k_proj.lora_B.default.weight False
|
63 |
+
base_model.model.model.layers.2.self_attn.v_proj.base_layer.weight False
|
64 |
+
base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.weight False
|
65 |
+
base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.weight False
|
66 |
+
base_model.model.model.layers.2.self_attn.o_proj.base_layer.weight False
|
67 |
+
base_model.model.model.layers.2.self_attn.o_proj.lora_A.default.weight False
|
68 |
+
base_model.model.model.layers.2.self_attn.o_proj.lora_B.default.weight False
|
69 |
+
base_model.model.model.layers.2.mlp.gate_proj.weight False
|
70 |
+
base_model.model.model.layers.2.mlp.up_proj.weight False
|
71 |
+
base_model.model.model.layers.2.mlp.down_proj.weight False
|
72 |
+
base_model.model.model.layers.2.input_layernorm.weight False
|
73 |
+
base_model.model.model.layers.2.post_attention_layernorm.weight False
|
74 |
+
base_model.model.model.layers.3.self_attn.q_proj.base_layer.weight False
|
75 |
+
base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.weight False
|
76 |
+
base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.weight False
|
77 |
+
base_model.model.model.layers.3.self_attn.k_proj.base_layer.weight False
|
78 |
+
base_model.model.model.layers.3.self_attn.k_proj.lora_A.default.weight False
|
79 |
+
base_model.model.model.layers.3.self_attn.k_proj.lora_B.default.weight False
|
80 |
+
base_model.model.model.layers.3.self_attn.v_proj.base_layer.weight False
|
81 |
+
base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.weight False
|
82 |
+
base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.weight False
|
83 |
+
base_model.model.model.layers.3.self_attn.o_proj.base_layer.weight False
|
84 |
+
base_model.model.model.layers.3.self_attn.o_proj.lora_A.default.weight False
|
85 |
+
base_model.model.model.layers.3.self_attn.o_proj.lora_B.default.weight False
|
86 |
+
base_model.model.model.layers.3.mlp.gate_proj.weight False
|
87 |
+
base_model.model.model.layers.3.mlp.up_proj.weight False
|
88 |
+
base_model.model.model.layers.3.mlp.down_proj.weight False
|
89 |
+
base_model.model.model.layers.3.input_layernorm.weight False
|
90 |
+
base_model.model.model.layers.3.post_attention_layernorm.weight False
|
91 |
+
base_model.model.model.layers.4.self_attn.q_proj.base_layer.weight False
|
92 |
+
base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.weight False
|
93 |
+
base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.weight False
|
94 |
+
base_model.model.model.layers.4.self_attn.k_proj.base_layer.weight False
|
95 |
+
base_model.model.model.layers.4.self_attn.k_proj.lora_A.default.weight False
|
96 |
+
base_model.model.model.layers.4.self_attn.k_proj.lora_B.default.weight False
|
97 |
+
base_model.model.model.layers.4.self_attn.v_proj.base_layer.weight False
|
98 |
+
base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.weight False
|
99 |
+
base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.weight False
|
100 |
+
base_model.model.model.layers.4.self_attn.o_proj.base_layer.weight False
|
101 |
+
base_model.model.model.layers.4.self_attn.o_proj.lora_A.default.weight False
|
102 |
+
base_model.model.model.layers.4.self_attn.o_proj.lora_B.default.weight False
|
103 |
+
base_model.model.model.layers.4.mlp.gate_proj.weight False
|
104 |
+
base_model.model.model.layers.4.mlp.up_proj.weight False
|
105 |
+
base_model.model.model.layers.4.mlp.down_proj.weight False
|
106 |
+
base_model.model.model.layers.4.input_layernorm.weight False
|
107 |
+
base_model.model.model.layers.4.post_attention_layernorm.weight False
|
108 |
+
base_model.model.model.layers.5.self_attn.q_proj.base_layer.weight False
|
109 |
+
base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.weight False
|
110 |
+
base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.weight False
|
111 |
+
base_model.model.model.layers.5.self_attn.k_proj.base_layer.weight False
|
112 |
+
base_model.model.model.layers.5.self_attn.k_proj.lora_A.default.weight False
|
113 |
+
base_model.model.model.layers.5.self_attn.k_proj.lora_B.default.weight False
|
114 |
+
base_model.model.model.layers.5.self_attn.v_proj.base_layer.weight False
|
115 |
+
base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.weight False
|
116 |
+
base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.weight False
|
117 |
+
base_model.model.model.layers.5.self_attn.o_proj.base_layer.weight False
|
118 |
+
base_model.model.model.layers.5.self_attn.o_proj.lora_A.default.weight False
|
119 |
+
base_model.model.model.layers.5.self_attn.o_proj.lora_B.default.weight False
|
120 |
+
base_model.model.model.layers.5.mlp.gate_proj.weight False
|
121 |
+
base_model.model.model.layers.5.mlp.up_proj.weight False
|
122 |
+
base_model.model.model.layers.5.mlp.down_proj.weight False
|
123 |
+
base_model.model.model.layers.5.input_layernorm.weight False
|
124 |
+
base_model.model.model.layers.5.post_attention_layernorm.weight False
|
125 |
+
base_model.model.model.layers.6.self_attn.q_proj.base_layer.weight False
|
126 |
+
base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.weight False
|
127 |
+
base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.weight False
|
128 |
+
base_model.model.model.layers.6.self_attn.k_proj.base_layer.weight False
|
129 |
+
base_model.model.model.layers.6.self_attn.k_proj.lora_A.default.weight False
|
130 |
+
base_model.model.model.layers.6.self_attn.k_proj.lora_B.default.weight False
|
131 |
+
base_model.model.model.layers.6.self_attn.v_proj.base_layer.weight False
|
132 |
+
base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.weight False
|
133 |
+
base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.weight False
|
134 |
+
base_model.model.model.layers.6.self_attn.o_proj.base_layer.weight False
|
135 |
+
base_model.model.model.layers.6.self_attn.o_proj.lora_A.default.weight False
|
136 |
+
base_model.model.model.layers.6.self_attn.o_proj.lora_B.default.weight False
|
137 |
+
base_model.model.model.layers.6.mlp.gate_proj.weight False
|
138 |
+
base_model.model.model.layers.6.mlp.up_proj.weight False
|
139 |
+
base_model.model.model.layers.6.mlp.down_proj.weight False
|
140 |
+
base_model.model.model.layers.6.input_layernorm.weight False
|
141 |
+
base_model.model.model.layers.6.post_attention_layernorm.weight False
|
142 |
+
base_model.model.model.layers.7.self_attn.q_proj.base_layer.weight False
|
143 |
+
base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.weight False
|
144 |
+
base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.weight False
|
145 |
+
base_model.model.model.layers.7.self_attn.k_proj.base_layer.weight False
|
146 |
+
base_model.model.model.layers.7.self_attn.k_proj.lora_A.default.weight False
|
147 |
+
base_model.model.model.layers.7.self_attn.k_proj.lora_B.default.weight False
|
148 |
+
base_model.model.model.layers.7.self_attn.v_proj.base_layer.weight False
|
149 |
+
base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.weight False
|
150 |
+
base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.weight False
|
151 |
+
base_model.model.model.layers.7.self_attn.o_proj.base_layer.weight False
|
152 |
+
base_model.model.model.layers.7.self_attn.o_proj.lora_A.default.weight False
|
153 |
+
base_model.model.model.layers.7.self_attn.o_proj.lora_B.default.weight False
|
154 |
+
base_model.model.model.layers.7.mlp.gate_proj.weight False
|
155 |
+
base_model.model.model.layers.7.mlp.up_proj.weight False
|
156 |
+
base_model.model.model.layers.7.mlp.down_proj.weight False
|
157 |
+
base_model.model.model.layers.7.input_layernorm.weight False
|
158 |
+
base_model.model.model.layers.7.post_attention_layernorm.weight False
|
159 |
+
base_model.model.model.layers.8.self_attn.q_proj.base_layer.weight False
|
160 |
+
base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.weight False
|
161 |
+
base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.weight False
|
162 |
+
base_model.model.model.layers.8.self_attn.k_proj.base_layer.weight False
|
163 |
+
base_model.model.model.layers.8.self_attn.k_proj.lora_A.default.weight False
|
164 |
+
base_model.model.model.layers.8.self_attn.k_proj.lora_B.default.weight False
|
165 |
+
base_model.model.model.layers.8.self_attn.v_proj.base_layer.weight False
|
166 |
+
base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.weight False
|
167 |
+
base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.weight False
|
168 |
+
base_model.model.model.layers.8.self_attn.o_proj.base_layer.weight False
|
169 |
+
base_model.model.model.layers.8.self_attn.o_proj.lora_A.default.weight False
|
170 |
+
base_model.model.model.layers.8.self_attn.o_proj.lora_B.default.weight False
|
171 |
+
base_model.model.model.layers.8.mlp.gate_proj.weight False
|
172 |
+
base_model.model.model.layers.8.mlp.up_proj.weight False
|
173 |
+
base_model.model.model.layers.8.mlp.down_proj.weight False
|
174 |
+
base_model.model.model.layers.8.input_layernorm.weight False
|
175 |
+
base_model.model.model.layers.8.post_attention_layernorm.weight False
|
176 |
+
base_model.model.model.layers.9.self_attn.q_proj.base_layer.weight False
|
177 |
+
base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.weight False
|
178 |
+
base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.weight False
|
179 |
+
base_model.model.model.layers.9.self_attn.k_proj.base_layer.weight False
|
180 |
+
base_model.model.model.layers.9.self_attn.k_proj.lora_A.default.weight False
|
181 |
+
base_model.model.model.layers.9.self_attn.k_proj.lora_B.default.weight False
|
182 |
+
base_model.model.model.layers.9.self_attn.v_proj.base_layer.weight False
|
183 |
+
base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.weight False
|
184 |
+
base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.weight False
|
185 |
+
base_model.model.model.layers.9.self_attn.o_proj.base_layer.weight False
|
186 |
+
base_model.model.model.layers.9.self_attn.o_proj.lora_A.default.weight False
|
187 |
+
base_model.model.model.layers.9.self_attn.o_proj.lora_B.default.weight False
|
188 |
+
base_model.model.model.layers.9.mlp.gate_proj.weight False
|
189 |
+
base_model.model.model.layers.9.mlp.up_proj.weight False
|
190 |
+
base_model.model.model.layers.9.mlp.down_proj.weight False
|
191 |
+
base_model.model.model.layers.9.input_layernorm.weight False
|
192 |
+
base_model.model.model.layers.9.post_attention_layernorm.weight False
|
193 |
+
base_model.model.model.layers.10.self_attn.q_proj.base_layer.weight False
|
194 |
+
base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.weight False
|
195 |
+
base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.weight False
|
196 |
+
base_model.model.model.layers.10.self_attn.k_proj.base_layer.weight False
|
197 |
+
base_model.model.model.layers.10.self_attn.k_proj.lora_A.default.weight False
|
198 |
+
base_model.model.model.layers.10.self_attn.k_proj.lora_B.default.weight False
|
199 |
+
base_model.model.model.layers.10.self_attn.v_proj.base_layer.weight False
|
200 |
+
base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.weight False
|
201 |
+
base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.weight False
|
202 |
+
base_model.model.model.layers.10.self_attn.o_proj.base_layer.weight False
|
203 |
+
base_model.model.model.layers.10.self_attn.o_proj.lora_A.default.weight False
|
204 |
+
base_model.model.model.layers.10.self_attn.o_proj.lora_B.default.weight False
|
205 |
+
base_model.model.model.layers.10.mlp.gate_proj.weight False
|
206 |
+
base_model.model.model.layers.10.mlp.up_proj.weight False
|
207 |
+
base_model.model.model.layers.10.mlp.down_proj.weight False
|
208 |
+
base_model.model.model.layers.10.input_layernorm.weight False
|
209 |
+
base_model.model.model.layers.10.post_attention_layernorm.weight False
|
210 |
+
base_model.model.model.layers.11.self_attn.q_proj.base_layer.weight False
|
211 |
+
base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.weight False
|
212 |
+
base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.weight False
|
213 |
+
base_model.model.model.layers.11.self_attn.k_proj.base_layer.weight False
|
214 |
+
base_model.model.model.layers.11.self_attn.k_proj.lora_A.default.weight False
|
215 |
+
base_model.model.model.layers.11.self_attn.k_proj.lora_B.default.weight False
|
216 |
+
base_model.model.model.layers.11.self_attn.v_proj.base_layer.weight False
|
217 |
+
base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.weight False
|
218 |
+
base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.weight False
|
219 |
+
base_model.model.model.layers.11.self_attn.o_proj.base_layer.weight False
|
220 |
+
base_model.model.model.layers.11.self_attn.o_proj.lora_A.default.weight False
|
221 |
+
base_model.model.model.layers.11.self_attn.o_proj.lora_B.default.weight False
|
222 |
+
base_model.model.model.layers.11.mlp.gate_proj.weight False
|
223 |
+
base_model.model.model.layers.11.mlp.up_proj.weight False
|
224 |
+
base_model.model.model.layers.11.mlp.down_proj.weight False
|
225 |
+
base_model.model.model.layers.11.input_layernorm.weight False
|
226 |
+
base_model.model.model.layers.11.post_attention_layernorm.weight False
|
227 |
+
base_model.model.model.layers.12.self_attn.q_proj.base_layer.weight False
|
228 |
+
base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.weight False
|
229 |
+
base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.weight False
|
230 |
+
base_model.model.model.layers.12.self_attn.k_proj.base_layer.weight False
|
231 |
+
base_model.model.model.layers.12.self_attn.k_proj.lora_A.default.weight False
|
232 |
+
base_model.model.model.layers.12.self_attn.k_proj.lora_B.default.weight False
|
233 |
+
base_model.model.model.layers.12.self_attn.v_proj.base_layer.weight False
|
234 |
+
base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.weight False
|
235 |
+
base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.weight False
|
236 |
+
base_model.model.model.layers.12.self_attn.o_proj.base_layer.weight False
|
237 |
+
base_model.model.model.layers.12.self_attn.o_proj.lora_A.default.weight False
|
238 |
+
base_model.model.model.layers.12.self_attn.o_proj.lora_B.default.weight False
|
239 |
+
base_model.model.model.layers.12.mlp.gate_proj.weight False
|
240 |
+
base_model.model.model.layers.12.mlp.up_proj.weight False
|
241 |
+
base_model.model.model.layers.12.mlp.down_proj.weight False
|
242 |
+
base_model.model.model.layers.12.input_layernorm.weight False
|
243 |
+
base_model.model.model.layers.12.post_attention_layernorm.weight False
|
244 |
+
base_model.model.model.layers.13.self_attn.q_proj.base_layer.weight False
|
245 |
+
base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.weight False
|
246 |
+
base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.weight False
|
247 |
+
base_model.model.model.layers.13.self_attn.k_proj.base_layer.weight False
|
248 |
+
base_model.model.model.layers.13.self_attn.k_proj.lora_A.default.weight False
|
249 |
+
base_model.model.model.layers.13.self_attn.k_proj.lora_B.default.weight False
|
250 |
+
base_model.model.model.layers.13.self_attn.v_proj.base_layer.weight False
|
251 |
+
base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.weight False
|
252 |
+
base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.weight False
|
253 |
+
base_model.model.model.layers.13.self_attn.o_proj.base_layer.weight False
|
254 |
+
base_model.model.model.layers.13.self_attn.o_proj.lora_A.default.weight False
|
255 |
+
base_model.model.model.layers.13.self_attn.o_proj.lora_B.default.weight False
|
256 |
+
base_model.model.model.layers.13.mlp.gate_proj.weight False
|
257 |
+
base_model.model.model.layers.13.mlp.up_proj.weight False
|
258 |
+
base_model.model.model.layers.13.mlp.down_proj.weight False
|
259 |
+
base_model.model.model.layers.13.input_layernorm.weight False
|
260 |
+
base_model.model.model.layers.13.post_attention_layernorm.weight False
|
261 |
+
base_model.model.model.layers.14.self_attn.q_proj.base_layer.weight False
|
262 |
+
base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.weight False
|
263 |
+
base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.weight False
|
264 |
+
base_model.model.model.layers.14.self_attn.k_proj.base_layer.weight False
|
265 |
+
base_model.model.model.layers.14.self_attn.k_proj.lora_A.default.weight False
|
266 |
+
base_model.model.model.layers.14.self_attn.k_proj.lora_B.default.weight False
|
267 |
+
base_model.model.model.layers.14.self_attn.v_proj.base_layer.weight False
|
268 |
+
base_model.model.model.layers.14.self_attn.v_proj.lora_A.default.weight False
|
269 |
+
base_model.model.model.layers.14.self_attn.v_proj.lora_B.default.weight False
|
270 |
+
base_model.model.model.layers.14.self_attn.o_proj.base_layer.weight False
|
271 |
+
base_model.model.model.layers.14.self_attn.o_proj.lora_A.default.weight False
|
272 |
+
base_model.model.model.layers.14.self_attn.o_proj.lora_B.default.weight False
|
273 |
+
base_model.model.model.layers.14.mlp.gate_proj.weight False
|
274 |
+
base_model.model.model.layers.14.mlp.up_proj.weight False
|
275 |
+
base_model.model.model.layers.14.mlp.down_proj.weight False
|
276 |
+
base_model.model.model.layers.14.input_layernorm.weight False
|
277 |
+
base_model.model.model.layers.14.post_attention_layernorm.weight False
|
278 |
+
base_model.model.model.layers.15.self_attn.q_proj.base_layer.weight False
|
279 |
+
base_model.model.model.layers.15.self_attn.q_proj.lora_A.default.weight False
|
280 |
+
base_model.model.model.layers.15.self_attn.q_proj.lora_B.default.weight False
|
281 |
+
base_model.model.model.layers.15.self_attn.k_proj.base_layer.weight False
|
282 |
+
base_model.model.model.layers.15.self_attn.k_proj.lora_A.default.weight False
|
283 |
+
base_model.model.model.layers.15.self_attn.k_proj.lora_B.default.weight False
|
284 |
+
base_model.model.model.layers.15.self_attn.v_proj.base_layer.weight False
|
285 |
+
base_model.model.model.layers.15.self_attn.v_proj.lora_A.default.weight False
|
286 |
+
base_model.model.model.layers.15.self_attn.v_proj.lora_B.default.weight False
|
287 |
+
base_model.model.model.layers.15.self_attn.o_proj.base_layer.weight False
|
288 |
+
base_model.model.model.layers.15.self_attn.o_proj.lora_A.default.weight False
|
289 |
+
base_model.model.model.layers.15.self_attn.o_proj.lora_B.default.weight False
|
290 |
+
base_model.model.model.layers.15.mlp.gate_proj.weight False
|
291 |
+
base_model.model.model.layers.15.mlp.up_proj.weight False
|
292 |
+
base_model.model.model.layers.15.mlp.down_proj.weight False
|
293 |
+
base_model.model.model.layers.15.input_layernorm.weight False
|
294 |
+
base_model.model.model.layers.15.post_attention_layernorm.weight False
|
295 |
+
base_model.model.model.layers.16.self_attn.q_proj.base_layer.weight False
|
296 |
+
base_model.model.model.layers.16.self_attn.q_proj.lora_A.default.weight False
|
297 |
+
base_model.model.model.layers.16.self_attn.q_proj.lora_B.default.weight False
|
298 |
+
base_model.model.model.layers.16.self_attn.k_proj.base_layer.weight False
|
299 |
+
base_model.model.model.layers.16.self_attn.k_proj.lora_A.default.weight False
|
300 |
+
base_model.model.model.layers.16.self_attn.k_proj.lora_B.default.weight False
|
301 |
+
base_model.model.model.layers.16.self_attn.v_proj.base_layer.weight False
|
302 |
+
base_model.model.model.layers.16.self_attn.v_proj.lora_A.default.weight False
|
303 |
+
base_model.model.model.layers.16.self_attn.v_proj.lora_B.default.weight False
|
304 |
+
base_model.model.model.layers.16.self_attn.o_proj.base_layer.weight False
|
305 |
+
base_model.model.model.layers.16.self_attn.o_proj.lora_A.default.weight False
|
306 |
+
base_model.model.model.layers.16.self_attn.o_proj.lora_B.default.weight False
|
307 |
+
base_model.model.model.layers.16.mlp.gate_proj.weight False
|
308 |
+
base_model.model.model.layers.16.mlp.up_proj.weight False
|
309 |
+
base_model.model.model.layers.16.mlp.down_proj.weight False
|
310 |
+
base_model.model.model.layers.16.input_layernorm.weight False
|
311 |
+
base_model.model.model.layers.16.post_attention_layernorm.weight False
|
312 |
+
base_model.model.model.layers.17.self_attn.q_proj.base_layer.weight False
|
313 |
+
base_model.model.model.layers.17.self_attn.q_proj.lora_A.default.weight False
|
314 |
+
base_model.model.model.layers.17.self_attn.q_proj.lora_B.default.weight False
|
315 |
+
base_model.model.model.layers.17.self_attn.k_proj.base_layer.weight False
|
316 |
+
base_model.model.model.layers.17.self_attn.k_proj.lora_A.default.weight False
|
317 |
+
base_model.model.model.layers.17.self_attn.k_proj.lora_B.default.weight False
|
318 |
+
base_model.model.model.layers.17.self_attn.v_proj.base_layer.weight False
|
319 |
+
base_model.model.model.layers.17.self_attn.v_proj.lora_A.default.weight False
|
320 |
+
base_model.model.model.layers.17.self_attn.v_proj.lora_B.default.weight False
|
321 |
+
base_model.model.model.layers.17.self_attn.o_proj.base_layer.weight False
|
322 |
+
base_model.model.model.layers.17.self_attn.o_proj.lora_A.default.weight False
|
323 |
+
base_model.model.model.layers.17.self_attn.o_proj.lora_B.default.weight False
|
324 |
+
base_model.model.model.layers.17.mlp.gate_proj.weight False
|
325 |
+
base_model.model.model.layers.17.mlp.up_proj.weight False
|
326 |
+
base_model.model.model.layers.17.mlp.down_proj.weight False
|
327 |
+
base_model.model.model.layers.17.input_layernorm.weight False
|
328 |
+
base_model.model.model.layers.17.post_attention_layernorm.weight False
|
329 |
+
base_model.model.model.layers.18.self_attn.q_proj.base_layer.weight False
|
330 |
+
base_model.model.model.layers.18.self_attn.q_proj.lora_A.default.weight False
|
331 |
+
base_model.model.model.layers.18.self_attn.q_proj.lora_B.default.weight False
|
332 |
+
base_model.model.model.layers.18.self_attn.k_proj.base_layer.weight False
|
333 |
+
base_model.model.model.layers.18.self_attn.k_proj.lora_A.default.weight False
|
334 |
+
base_model.model.model.layers.18.self_attn.k_proj.lora_B.default.weight False
|
335 |
+
base_model.model.model.layers.18.self_attn.v_proj.base_layer.weight False
|
336 |
+
base_model.model.model.layers.18.self_attn.v_proj.lora_A.default.weight False
|
337 |
+
base_model.model.model.layers.18.self_attn.v_proj.lora_B.default.weight False
|
338 |
+
base_model.model.model.layers.18.self_attn.o_proj.base_layer.weight False
|
339 |
+
base_model.model.model.layers.18.self_attn.o_proj.lora_A.default.weight False
|
340 |
+
base_model.model.model.layers.18.self_attn.o_proj.lora_B.default.weight False
|
341 |
+
base_model.model.model.layers.18.mlp.gate_proj.weight False
|
342 |
+
base_model.model.model.layers.18.mlp.up_proj.weight False
|
343 |
+
base_model.model.model.layers.18.mlp.down_proj.weight False
|
344 |
+
base_model.model.model.layers.18.input_layernorm.weight False
|
345 |
+
base_model.model.model.layers.18.post_attention_layernorm.weight False
|
346 |
+
base_model.model.model.layers.19.self_attn.q_proj.base_layer.weight False
|
347 |
+
base_model.model.model.layers.19.self_attn.q_proj.lora_A.default.weight False
|
348 |
+
base_model.model.model.layers.19.self_attn.q_proj.lora_B.default.weight False
|
349 |
+
base_model.model.model.layers.19.self_attn.k_proj.base_layer.weight False
|
350 |
+
base_model.model.model.layers.19.self_attn.k_proj.lora_A.default.weight False
|
351 |
+
base_model.model.model.layers.19.self_attn.k_proj.lora_B.default.weight False
|
352 |
+
base_model.model.model.layers.19.self_attn.v_proj.base_layer.weight False
|
353 |
+
base_model.model.model.layers.19.self_attn.v_proj.lora_A.default.weight False
|
354 |
+
base_model.model.model.layers.19.self_attn.v_proj.lora_B.default.weight False
|
355 |
+
base_model.model.model.layers.19.self_attn.o_proj.base_layer.weight False
|
356 |
+
base_model.model.model.layers.19.self_attn.o_proj.lora_A.default.weight False
|
357 |
+
base_model.model.model.layers.19.self_attn.o_proj.lora_B.default.weight False
|
358 |
+
base_model.model.model.layers.19.mlp.gate_proj.weight False
|
359 |
+
base_model.model.model.layers.19.mlp.up_proj.weight False
|
360 |
+
base_model.model.model.layers.19.mlp.down_proj.weight False
|
361 |
+
base_model.model.model.layers.19.input_layernorm.weight False
|
362 |
+
base_model.model.model.layers.19.post_attention_layernorm.weight False
|
363 |
+
base_model.model.model.layers.20.self_attn.q_proj.base_layer.weight False
|
364 |
+
base_model.model.model.layers.20.self_attn.q_proj.lora_A.default.weight False
|
365 |
+
base_model.model.model.layers.20.self_attn.q_proj.lora_B.default.weight False
|
366 |
+
base_model.model.model.layers.20.self_attn.k_proj.base_layer.weight False
|
367 |
+
base_model.model.model.layers.20.self_attn.k_proj.lora_A.default.weight False
|
368 |
+
base_model.model.model.layers.20.self_attn.k_proj.lora_B.default.weight False
|
369 |
+
base_model.model.model.layers.20.self_attn.v_proj.base_layer.weight False
|
370 |
+
base_model.model.model.layers.20.self_attn.v_proj.lora_A.default.weight False
|
371 |
+
base_model.model.model.layers.20.self_attn.v_proj.lora_B.default.weight False
|
372 |
+
base_model.model.model.layers.20.self_attn.o_proj.base_layer.weight False
|
373 |
+
base_model.model.model.layers.20.self_attn.o_proj.lora_A.default.weight False
|
374 |
+
base_model.model.model.layers.20.self_attn.o_proj.lora_B.default.weight False
|
375 |
+
base_model.model.model.layers.20.mlp.gate_proj.weight False
|
376 |
+
base_model.model.model.layers.20.mlp.up_proj.weight False
|
377 |
+
base_model.model.model.layers.20.mlp.down_proj.weight False
|
378 |
+
base_model.model.model.layers.20.input_layernorm.weight False
|
379 |
+
base_model.model.model.layers.20.post_attention_layernorm.weight False
|
380 |
+
base_model.model.model.layers.21.self_attn.q_proj.base_layer.weight False
|
381 |
+
base_model.model.model.layers.21.self_attn.q_proj.lora_A.default.weight False
|
382 |
+
base_model.model.model.layers.21.self_attn.q_proj.lora_B.default.weight False
|
383 |
+
base_model.model.model.layers.21.self_attn.k_proj.base_layer.weight False
|
384 |
+
base_model.model.model.layers.21.self_attn.k_proj.lora_A.default.weight False
|
385 |
+
base_model.model.model.layers.21.self_attn.k_proj.lora_B.default.weight False
|
386 |
+
base_model.model.model.layers.21.self_attn.v_proj.base_layer.weight False
|
387 |
+
base_model.model.model.layers.21.self_attn.v_proj.lora_A.default.weight False
|
388 |
+
base_model.model.model.layers.21.self_attn.v_proj.lora_B.default.weight False
|
389 |
+
base_model.model.model.layers.21.self_attn.o_proj.base_layer.weight False
|
390 |
+
base_model.model.model.layers.21.self_attn.o_proj.lora_A.default.weight False
|
391 |
+
base_model.model.model.layers.21.self_attn.o_proj.lora_B.default.weight False
|
392 |
+
base_model.model.model.layers.21.mlp.gate_proj.weight False
|
393 |
+
base_model.model.model.layers.21.mlp.up_proj.weight False
|
394 |
+
base_model.model.model.layers.21.mlp.down_proj.weight False
|
395 |
+
base_model.model.model.layers.21.input_layernorm.weight False
|
396 |
+
base_model.model.model.layers.21.post_attention_layernorm.weight False
|
397 |
+
base_model.model.model.layers.22.self_attn.q_proj.base_layer.weight False
|
398 |
+
base_model.model.model.layers.22.self_attn.q_proj.lora_A.default.weight False
|
399 |
+
base_model.model.model.layers.22.self_attn.q_proj.lora_B.default.weight False
|
400 |
+
base_model.model.model.layers.22.self_attn.k_proj.base_layer.weight False
|
401 |
+
base_model.model.model.layers.22.self_attn.k_proj.lora_A.default.weight False
|
402 |
+
base_model.model.model.layers.22.self_attn.k_proj.lora_B.default.weight False
|
403 |
+
base_model.model.model.layers.22.self_attn.v_proj.base_layer.weight False
|
404 |
+
base_model.model.model.layers.22.self_attn.v_proj.lora_A.default.weight False
|
405 |
+
base_model.model.model.layers.22.self_attn.v_proj.lora_B.default.weight False
|
406 |
+
base_model.model.model.layers.22.self_attn.o_proj.base_layer.weight False
|
407 |
+
base_model.model.model.layers.22.self_attn.o_proj.lora_A.default.weight False
|
408 |
+
base_model.model.model.layers.22.self_attn.o_proj.lora_B.default.weight False
|
409 |
+
base_model.model.model.layers.22.mlp.gate_proj.weight False
|
410 |
+
base_model.model.model.layers.22.mlp.up_proj.weight False
|
411 |
+
base_model.model.model.layers.22.mlp.down_proj.weight False
|
412 |
+
base_model.model.model.layers.22.input_layernorm.weight False
|
413 |
+
base_model.model.model.layers.22.post_attention_layernorm.weight False
|
414 |
+
base_model.model.model.layers.23.self_attn.q_proj.base_layer.weight False
|
415 |
+
base_model.model.model.layers.23.self_attn.q_proj.lora_A.default.weight False
|
416 |
+
base_model.model.model.layers.23.self_attn.q_proj.lora_B.default.weight False
|
417 |
+
base_model.model.model.layers.23.self_attn.k_proj.base_layer.weight False
|
418 |
+
base_model.model.model.layers.23.self_attn.k_proj.lora_A.default.weight False
|
419 |
+
base_model.model.model.layers.23.self_attn.k_proj.lora_B.default.weight False
|
420 |
+
base_model.model.model.layers.23.self_attn.v_proj.base_layer.weight False
|
421 |
+
base_model.model.model.layers.23.self_attn.v_proj.lora_A.default.weight False
|
422 |
+
base_model.model.model.layers.23.self_attn.v_proj.lora_B.default.weight False
|
423 |
+
base_model.model.model.layers.23.self_attn.o_proj.base_layer.weight False
|
424 |
+
base_model.model.model.layers.23.self_attn.o_proj.lora_A.default.weight False
|
425 |
+
base_model.model.model.layers.23.self_attn.o_proj.lora_B.default.weight False
|
426 |
+
base_model.model.model.layers.23.mlp.gate_proj.weight False
|
427 |
+
base_model.model.model.layers.23.mlp.up_proj.weight False
|
428 |
+
base_model.model.model.layers.23.mlp.down_proj.weight False
|
429 |
+
base_model.model.model.layers.23.input_layernorm.weight False
|
430 |
+
base_model.model.model.layers.23.post_attention_layernorm.weight False
|
431 |
+
base_model.model.model.layers.24.self_attn.q_proj.base_layer.weight False
|
432 |
+
base_model.model.model.layers.24.self_attn.q_proj.lora_A.default.weight False
|
433 |
+
base_model.model.model.layers.24.self_attn.q_proj.lora_B.default.weight False
|
434 |
+
base_model.model.model.layers.24.self_attn.k_proj.base_layer.weight False
|
435 |
+
base_model.model.model.layers.24.self_attn.k_proj.lora_A.default.weight False
|
436 |
+
base_model.model.model.layers.24.self_attn.k_proj.lora_B.default.weight False
|
437 |
+
base_model.model.model.layers.24.self_attn.v_proj.base_layer.weight False
|
438 |
+
base_model.model.model.layers.24.self_attn.v_proj.lora_A.default.weight False
|
439 |
+
base_model.model.model.layers.24.self_attn.v_proj.lora_B.default.weight False
|
440 |
+
base_model.model.model.layers.24.self_attn.o_proj.base_layer.weight False
|
441 |
+
base_model.model.model.layers.24.self_attn.o_proj.lora_A.default.weight False
|
442 |
+
base_model.model.model.layers.24.self_attn.o_proj.lora_B.default.weight False
|
443 |
+
base_model.model.model.layers.24.mlp.gate_proj.weight False
|
444 |
+
base_model.model.model.layers.24.mlp.up_proj.weight False
|
445 |
+
base_model.model.model.layers.24.mlp.down_proj.weight False
|
446 |
+
base_model.model.model.layers.24.input_layernorm.weight False
|
447 |
+
base_model.model.model.layers.24.post_attention_layernorm.weight False
|
448 |
+
base_model.model.model.layers.25.self_attn.q_proj.base_layer.weight False
|
449 |
+
base_model.model.model.layers.25.self_attn.q_proj.lora_A.default.weight False
|
450 |
+
base_model.model.model.layers.25.self_attn.q_proj.lora_B.default.weight False
|
451 |
+
base_model.model.model.layers.25.self_attn.k_proj.base_layer.weight False
|
452 |
+
base_model.model.model.layers.25.self_attn.k_proj.lora_A.default.weight False
|
453 |
+
base_model.model.model.layers.25.self_attn.k_proj.lora_B.default.weight False
|
454 |
+
base_model.model.model.layers.25.self_attn.v_proj.base_layer.weight False
|
455 |
+
base_model.model.model.layers.25.self_attn.v_proj.lora_A.default.weight False
|
456 |
+
base_model.model.model.layers.25.self_attn.v_proj.lora_B.default.weight False
|
457 |
+
base_model.model.model.layers.25.self_attn.o_proj.base_layer.weight False
|
458 |
+
base_model.model.model.layers.25.self_attn.o_proj.lora_A.default.weight False
|
459 |
+
base_model.model.model.layers.25.self_attn.o_proj.lora_B.default.weight False
|
460 |
+
base_model.model.model.layers.25.mlp.gate_proj.weight False
|
461 |
+
base_model.model.model.layers.25.mlp.up_proj.weight False
|
462 |
+
base_model.model.model.layers.25.mlp.down_proj.weight False
|
463 |
+
base_model.model.model.layers.25.input_layernorm.weight False
|
464 |
+
base_model.model.model.layers.25.post_attention_layernorm.weight False
|
465 |
+
base_model.model.model.layers.26.self_attn.q_proj.base_layer.weight False
|
466 |
+
base_model.model.model.layers.26.self_attn.q_proj.lora_A.default.weight False
|
467 |
+
base_model.model.model.layers.26.self_attn.q_proj.lora_B.default.weight False
|
468 |
+
base_model.model.model.layers.26.self_attn.k_proj.base_layer.weight False
|
469 |
+
base_model.model.model.layers.26.self_attn.k_proj.lora_A.default.weight False
|
470 |
+
base_model.model.model.layers.26.self_attn.k_proj.lora_B.default.weight False
|
471 |
+
base_model.model.model.layers.26.self_attn.v_proj.base_layer.weight False
|
472 |
+
base_model.model.model.layers.26.self_attn.v_proj.lora_A.default.weight False
|
473 |
+
base_model.model.model.layers.26.self_attn.v_proj.lora_B.default.weight False
|
474 |
+
base_model.model.model.layers.26.self_attn.o_proj.base_layer.weight False
|
475 |
+
base_model.model.model.layers.26.self_attn.o_proj.lora_A.default.weight False
|
476 |
+
base_model.model.model.layers.26.self_attn.o_proj.lora_B.default.weight False
|
477 |
+
base_model.model.model.layers.26.mlp.gate_proj.weight False
|
478 |
+
base_model.model.model.layers.26.mlp.up_proj.weight False
|
479 |
+
base_model.model.model.layers.26.mlp.down_proj.weight False
|
480 |
+
base_model.model.model.layers.26.input_layernorm.weight False
|
481 |
+
base_model.model.model.layers.26.post_attention_layernorm.weight False
|
482 |
+
base_model.model.model.layers.27.self_attn.q_proj.base_layer.weight False
|
483 |
+
base_model.model.model.layers.27.self_attn.q_proj.lora_A.default.weight False
|
484 |
+
base_model.model.model.layers.27.self_attn.q_proj.lora_B.default.weight False
|
485 |
+
base_model.model.model.layers.27.self_attn.k_proj.base_layer.weight False
|
486 |
+
base_model.model.model.layers.27.self_attn.k_proj.lora_A.default.weight False
|
487 |
+
base_model.model.model.layers.27.self_attn.k_proj.lora_B.default.weight False
|
488 |
+
base_model.model.model.layers.27.self_attn.v_proj.base_layer.weight False
|
489 |
+
base_model.model.model.layers.27.self_attn.v_proj.lora_A.default.weight False
|
490 |
+
base_model.model.model.layers.27.self_attn.v_proj.lora_B.default.weight False
|
491 |
+
base_model.model.model.layers.27.self_attn.o_proj.base_layer.weight False
|
492 |
+
base_model.model.model.layers.27.self_attn.o_proj.lora_A.default.weight False
|
493 |
+
base_model.model.model.layers.27.self_attn.o_proj.lora_B.default.weight False
|
494 |
+
base_model.model.model.layers.27.mlp.gate_proj.weight False
|
495 |
+
base_model.model.model.layers.27.mlp.up_proj.weight False
|
496 |
+
base_model.model.model.layers.27.mlp.down_proj.weight False
|
497 |
+
base_model.model.model.layers.27.input_layernorm.weight False
|
498 |
+
base_model.model.model.layers.27.post_attention_layernorm.weight False
|
499 |
+
base_model.model.model.layers.28.self_attn.q_proj.base_layer.weight False
|
500 |
+
base_model.model.model.layers.28.self_attn.q_proj.lora_A.default.weight False
|
501 |
+
base_model.model.model.layers.28.self_attn.q_proj.lora_B.default.weight False
|
502 |
+
base_model.model.model.layers.28.self_attn.k_proj.base_layer.weight False
|
503 |
+
base_model.model.model.layers.28.self_attn.k_proj.lora_A.default.weight False
|
504 |
+
base_model.model.model.layers.28.self_attn.k_proj.lora_B.default.weight False
|
505 |
+
base_model.model.model.layers.28.self_attn.v_proj.base_layer.weight False
|
506 |
+
base_model.model.model.layers.28.self_attn.v_proj.lora_A.default.weight False
|
507 |
+
base_model.model.model.layers.28.self_attn.v_proj.lora_B.default.weight False
|
508 |
+
base_model.model.model.layers.28.self_attn.o_proj.base_layer.weight False
|
509 |
+
base_model.model.model.layers.28.self_attn.o_proj.lora_A.default.weight False
|
510 |
+
base_model.model.model.layers.28.self_attn.o_proj.lora_B.default.weight False
|
511 |
+
base_model.model.model.layers.28.mlp.gate_proj.weight False
|
512 |
+
base_model.model.model.layers.28.mlp.up_proj.weight False
|
513 |
+
base_model.model.model.layers.28.mlp.down_proj.weight False
|
514 |
+
base_model.model.model.layers.28.input_layernorm.weight False
|
515 |
+
base_model.model.model.layers.28.post_attention_layernorm.weight False
|
516 |
+
base_model.model.model.layers.29.self_attn.q_proj.base_layer.weight False
|
517 |
+
base_model.model.model.layers.29.self_attn.q_proj.lora_A.default.weight False
|
518 |
+
base_model.model.model.layers.29.self_attn.q_proj.lora_B.default.weight False
|
519 |
+
base_model.model.model.layers.29.self_attn.k_proj.base_layer.weight False
|
520 |
+
base_model.model.model.layers.29.self_attn.k_proj.lora_A.default.weight False
|
521 |
+
base_model.model.model.layers.29.self_attn.k_proj.lora_B.default.weight False
|
522 |
+
base_model.model.model.layers.29.self_attn.v_proj.base_layer.weight False
|
523 |
+
base_model.model.model.layers.29.self_attn.v_proj.lora_A.default.weight False
|
524 |
+
base_model.model.model.layers.29.self_attn.v_proj.lora_B.default.weight False
|
525 |
+
base_model.model.model.layers.29.self_attn.o_proj.base_layer.weight False
|
526 |
+
base_model.model.model.layers.29.self_attn.o_proj.lora_A.default.weight False
|
527 |
+
base_model.model.model.layers.29.self_attn.o_proj.lora_B.default.weight False
|
528 |
+
base_model.model.model.layers.29.mlp.gate_proj.weight False
|
529 |
+
base_model.model.model.layers.29.mlp.up_proj.weight False
|
530 |
+
base_model.model.model.layers.29.mlp.down_proj.weight False
|
531 |
+
base_model.model.model.layers.29.input_layernorm.weight False
|
532 |
+
base_model.model.model.layers.29.post_attention_layernorm.weight False
|
533 |
+
base_model.model.model.layers.30.self_attn.q_proj.base_layer.weight False
|
534 |
+
base_model.model.model.layers.30.self_attn.q_proj.lora_A.default.weight False
|
535 |
+
base_model.model.model.layers.30.self_attn.q_proj.lora_B.default.weight False
|
536 |
+
base_model.model.model.layers.30.self_attn.k_proj.base_layer.weight False
|
537 |
+
base_model.model.model.layers.30.self_attn.k_proj.lora_A.default.weight False
|
538 |
+
base_model.model.model.layers.30.self_attn.k_proj.lora_B.default.weight False
|
539 |
+
base_model.model.model.layers.30.self_attn.v_proj.base_layer.weight False
|
540 |
+
base_model.model.model.layers.30.self_attn.v_proj.lora_A.default.weight False
|
541 |
+
base_model.model.model.layers.30.self_attn.v_proj.lora_B.default.weight False
|
542 |
+
base_model.model.model.layers.30.self_attn.o_proj.base_layer.weight False
|
543 |
+
base_model.model.model.layers.30.self_attn.o_proj.lora_A.default.weight False
|
544 |
+
base_model.model.model.layers.30.self_attn.o_proj.lora_B.default.weight False
|
545 |
+
base_model.model.model.layers.30.mlp.gate_proj.weight False
|
546 |
+
base_model.model.model.layers.30.mlp.up_proj.weight False
|
547 |
+
base_model.model.model.layers.30.mlp.down_proj.weight False
|
548 |
+
base_model.model.model.layers.30.input_layernorm.weight False
|
549 |
+
base_model.model.model.layers.30.post_attention_layernorm.weight False
|
550 |
+
base_model.model.model.layers.31.self_attn.q_proj.base_layer.weight False
|
551 |
+
base_model.model.model.layers.31.self_attn.q_proj.lora_A.default.weight False
|
552 |
+
base_model.model.model.layers.31.self_attn.q_proj.lora_B.default.weight False
|
553 |
+
base_model.model.model.layers.31.self_attn.k_proj.base_layer.weight False
|
554 |
+
base_model.model.model.layers.31.self_attn.k_proj.lora_A.default.weight False
|
555 |
+
base_model.model.model.layers.31.self_attn.k_proj.lora_B.default.weight False
|
556 |
+
base_model.model.model.layers.31.self_attn.v_proj.base_layer.weight False
|
557 |
+
base_model.model.model.layers.31.self_attn.v_proj.lora_A.default.weight False
|
558 |
+
base_model.model.model.layers.31.self_attn.v_proj.lora_B.default.weight False
|
559 |
+
base_model.model.model.layers.31.self_attn.o_proj.base_layer.weight False
|
560 |
+
base_model.model.model.layers.31.self_attn.o_proj.lora_A.default.weight False
|
561 |
+
base_model.model.model.layers.31.self_attn.o_proj.lora_B.default.weight False
|
562 |
+
base_model.model.model.layers.31.mlp.gate_proj.weight False
|
563 |
+
base_model.model.model.layers.31.mlp.up_proj.weight False
|
564 |
+
base_model.model.model.layers.31.mlp.down_proj.weight False
|
565 |
+
base_model.model.model.layers.31.input_layernorm.weight False
|
566 |
+
base_model.model.model.layers.31.post_attention_layernorm.weight False
|
567 |
+
base_model.model.model.norm.weight False
|
568 |
+
base_model.model.lm_head.weight False
|
569 |
+
/opt/conda/lib/python3.10/site-packages/peft/tuners/lora/bnb.py:325: UserWarning: Merge lora module to 4-bit linear may get different generations due to rounding errors.
|
570 |
+
warnings.warn(
|
571 |
+
/opt/conda/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py:332: UserWarning: When using DPODataCollatorWithPadding, you should set `remove_unused_columns=False` in your TrainingArguments we have set it for you, but you should do it yourself in the future.
|
572 |
+
warnings.warn(
|
573 |
+
Token indices sequence length is longer than the specified maximum sequence length for this model (1126 > 1024). Running this sequence through the model will result in indexing errors
|
574 |
+
/opt/conda/lib/python3.10/site-packages/accelerate/accelerator.py:436: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches', 'even_batches', 'use_seedable_sampler']). Please pass an `accelerate.DataLoaderConfiguration` instead:
|
575 |
+
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
|
576 |
+
warnings.warn(
|
577 |
+
You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.
|
578 |
+
Using auto half precision backend
|
579 |
+
***** Running training *****
|
580 |
+
Num examples = 7,200
|
581 |
+
Num Epochs = 1
|
582 |
+
Instantaneous batch size per device = 1
|
583 |
+
Total train batch size (w. parallel, distributed & accumulation) = 4
|
584 |
+
Gradient Accumulation steps = 4
|
585 |
+
Total optimization steps = 1,800
|
586 |
+
Number of trainable parameters = 319,815,680
|
587 |
+
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
|
588 |
+
Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
589 |
+
***** Running Evaluation *****
|
590 |
+
Num examples = 1801
|
591 |
+
Batch size = 1
|
592 |
+
Saving model checkpoint to /kaggle/working/checkpoint-100
|
593 |
+
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-chat-hf/snapshots/f5db02db724555f92da89c216ac04704f23d4590/config.json
|
594 |
+
Model config LlamaConfig {
|
595 |
+
"_name_or_path": "meta-llama/Llama-2-7b-chat-hf",
|
596 |
+
"architectures": [
|
597 |
+
"LlamaForCausalLM"
|
598 |
+
],
|
599 |
+
"attention_bias": false,
|
600 |
+
"attention_dropout": 0.0,
|
601 |
+
"bos_token_id": 1,
|
602 |
+
"eos_token_id": 2,
|
603 |
+
"hidden_act": "silu",
|
604 |
+
"hidden_size": 4096,
|
605 |
+
"initializer_range": 0.02,
|
606 |
+
"intermediate_size": 11008,
|
607 |
+
"max_position_embeddings": 4096,
|
608 |
+
"model_type": "llama",
|
609 |
+
"num_attention_heads": 32,
|
610 |
+
"num_hidden_layers": 32,
|
611 |
+
"num_key_value_heads": 32,
|
612 |
+
"pretraining_tp": 1,
|
613 |
+
"rms_norm_eps": 1e-05,
|
614 |
+
"rope_scaling": null,
|
615 |
+
"rope_theta": 10000.0,
|
616 |
+
"tie_word_embeddings": false,
|
617 |
+
"torch_dtype": "float16",
|
618 |
+
"transformers_version": "4.39.3",
|
619 |
+
"use_cache": true,
|
620 |
+
"vocab_size": 32000
|
621 |
+
}
|
622 |
+
tokenizer config file saved in /kaggle/working/checkpoint-100/tokenizer_config.json
|
623 |
+
Special tokens file saved in /kaggle/working/checkpoint-100/special_tokens_map.json
|
624 |
+
tokenizer config file saved in /kaggle/working/tokenizer_config.json
|
wandb/run-20240522_054348-vgrzs6jq/files/requirements.txt
ADDED
@@ -0,0 +1,867 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Babel==2.14.0
|
2 |
+
Boruta==0.3
|
3 |
+
Brotli==1.0.9
|
4 |
+
CVXcanon==0.1.2
|
5 |
+
Cartopy==0.23.0
|
6 |
+
Cython==3.0.8
|
7 |
+
Deprecated==1.2.14
|
8 |
+
Farama-Notifications==0.0.4
|
9 |
+
Flask==3.0.3
|
10 |
+
Geohash==1.0
|
11 |
+
GitPython==3.1.41
|
12 |
+
ImageHash==4.3.1
|
13 |
+
Janome==0.5.0
|
14 |
+
Jinja2==3.1.2
|
15 |
+
LunarCalendar==0.0.9
|
16 |
+
Mako==1.3.3
|
17 |
+
Markdown==3.5.2
|
18 |
+
MarkupSafe==2.1.3
|
19 |
+
MarkupSafe==2.1.5
|
20 |
+
Pillow==9.5.0
|
21 |
+
PuLP==2.8.0
|
22 |
+
PyArabic==0.6.15
|
23 |
+
PyJWT==2.8.0
|
24 |
+
PyMeeus==0.5.12
|
25 |
+
PySocks==1.7.1
|
26 |
+
PyUpSet==0.1.1.post7
|
27 |
+
PyWavelets==1.5.0
|
28 |
+
PyYAML==6.0.1
|
29 |
+
Pygments==2.17.2
|
30 |
+
Pympler==1.0.1
|
31 |
+
QtPy==2.4.1
|
32 |
+
Rtree==1.2.0
|
33 |
+
SQLAlchemy==2.0.25
|
34 |
+
SecretStorage==3.3.3
|
35 |
+
Send2Trash==1.8.2
|
36 |
+
Shapely==1.8.5.post1
|
37 |
+
Shimmy==1.3.0
|
38 |
+
SimpleITK==2.3.1
|
39 |
+
TPOT==0.12.1
|
40 |
+
Theano-PyMC==1.1.2
|
41 |
+
Theano==1.0.5
|
42 |
+
Wand==0.6.13
|
43 |
+
Werkzeug==3.0.2
|
44 |
+
absl-py==1.4.0
|
45 |
+
accelerate==0.29.3
|
46 |
+
access==1.1.9
|
47 |
+
affine==2.4.0
|
48 |
+
aiobotocore==2.12.3
|
49 |
+
aiofiles==22.1.0
|
50 |
+
aiohttp-cors==0.7.0
|
51 |
+
aiohttp==3.9.1
|
52 |
+
aioitertools==0.11.0
|
53 |
+
aiorwlock==1.3.0
|
54 |
+
aiosignal==1.3.1
|
55 |
+
aiosqlite==0.19.0
|
56 |
+
albumentations==1.4.0
|
57 |
+
alembic==1.13.1
|
58 |
+
altair==5.3.0
|
59 |
+
annotated-types==0.6.0
|
60 |
+
annoy==1.17.3
|
61 |
+
anyio==4.2.0
|
62 |
+
apache-beam==2.46.0
|
63 |
+
aplus==0.11.0
|
64 |
+
appdirs==1.4.4
|
65 |
+
archspec==0.2.3
|
66 |
+
argon2-cffi-bindings==21.2.0
|
67 |
+
argon2-cffi==23.1.0
|
68 |
+
array-record==0.5.0
|
69 |
+
arrow==1.3.0
|
70 |
+
arviz==0.18.0
|
71 |
+
astroid==3.1.0
|
72 |
+
astropy-iers-data==0.2024.4.15.2.45.49
|
73 |
+
astropy==6.0.1
|
74 |
+
asttokens==2.4.1
|
75 |
+
astunparse==1.6.3
|
76 |
+
async-lru==2.0.4
|
77 |
+
async-timeout==4.0.3
|
78 |
+
attrs==23.2.0
|
79 |
+
audioread==3.0.1
|
80 |
+
autopep8==2.0.4
|
81 |
+
backoff==2.2.1
|
82 |
+
bayesian-optimization==1.4.3
|
83 |
+
beatrix_jupyterlab==2023.128.151533
|
84 |
+
beautifulsoup4==4.12.2
|
85 |
+
bitsandbytes==0.43.1
|
86 |
+
blake3==0.2.1
|
87 |
+
bleach==6.1.0
|
88 |
+
blessed==1.20.0
|
89 |
+
blinker==1.7.0
|
90 |
+
blis==0.7.10
|
91 |
+
blosc2==2.6.2
|
92 |
+
bokeh==3.4.1
|
93 |
+
boltons==23.1.1
|
94 |
+
boto3==1.26.100
|
95 |
+
botocore==1.34.69
|
96 |
+
bq_helper==0.4.1
|
97 |
+
bqplot==0.12.43
|
98 |
+
branca==0.7.1
|
99 |
+
brewer2mpl==1.4.1
|
100 |
+
brotlipy==0.7.0
|
101 |
+
cached-property==1.5.2
|
102 |
+
cachetools==4.2.4
|
103 |
+
cachetools==5.3.2
|
104 |
+
catalogue==2.0.10
|
105 |
+
catalyst==22.4
|
106 |
+
catboost==1.2.3
|
107 |
+
category-encoders==2.6.3
|
108 |
+
certifi==2024.2.2
|
109 |
+
cesium==0.12.1
|
110 |
+
cffi==1.16.0
|
111 |
+
charset-normalizer==3.3.2
|
112 |
+
chex==0.1.86
|
113 |
+
cleverhans==4.0.0
|
114 |
+
click-plugins==1.1.1
|
115 |
+
click==8.1.7
|
116 |
+
cligj==0.7.2
|
117 |
+
cloud-tpu-client==0.10
|
118 |
+
cloud-tpu-profiler==2.4.0
|
119 |
+
cloudpathlib==0.16.0
|
120 |
+
cloudpickle==2.2.1
|
121 |
+
cloudpickle==3.0.0
|
122 |
+
cmdstanpy==1.2.2
|
123 |
+
colorama==0.4.6
|
124 |
+
colorcet==3.1.0
|
125 |
+
colorful==0.5.6
|
126 |
+
colorlog==6.8.2
|
127 |
+
colorlover==0.3.0
|
128 |
+
comm==0.2.1
|
129 |
+
conda-libmamba-solver==23.7.0
|
130 |
+
conda-package-handling==2.2.0
|
131 |
+
conda==23.7.4
|
132 |
+
conda_package_streaming==0.9.0
|
133 |
+
confection==0.1.4
|
134 |
+
contextily==1.6.0
|
135 |
+
contourpy==1.2.0
|
136 |
+
contourpy==1.2.1
|
137 |
+
convertdate==2.4.0
|
138 |
+
crcmod==1.7
|
139 |
+
cryptography==41.0.7
|
140 |
+
cuda-python==12.4.0
|
141 |
+
cudf==23.8.0
|
142 |
+
cufflinks==0.17.3
|
143 |
+
cuml==23.8.0
|
144 |
+
cupy==13.0.0
|
145 |
+
cycler==0.12.1
|
146 |
+
cymem==2.0.8
|
147 |
+
cytoolz==0.12.3
|
148 |
+
daal4py==2024.3.0
|
149 |
+
daal==2024.3.0
|
150 |
+
dacite==1.8.1
|
151 |
+
dask-cuda==23.8.0
|
152 |
+
dask-cudf==23.8.0
|
153 |
+
dask-expr==1.0.11
|
154 |
+
dask==2024.4.1
|
155 |
+
dataclasses-json==0.6.4
|
156 |
+
dataproc_jupyter_plugin==0.1.66
|
157 |
+
datasets==2.18.0
|
158 |
+
datashader==0.16.0
|
159 |
+
datatile==1.0.3
|
160 |
+
db-dtypes==1.2.0
|
161 |
+
deap==1.4.1
|
162 |
+
debugpy==1.8.0
|
163 |
+
decorator==5.1.1
|
164 |
+
deepdiff==7.0.1
|
165 |
+
defusedxml==0.7.1
|
166 |
+
deprecation==2.1.0
|
167 |
+
descartes==1.1.0
|
168 |
+
dill==0.3.8
|
169 |
+
dipy==1.9.0
|
170 |
+
distlib==0.3.8
|
171 |
+
distributed==2023.7.1
|
172 |
+
distro==1.9.0
|
173 |
+
dm-tree==0.1.8
|
174 |
+
docker-pycreds==0.4.0
|
175 |
+
docker==7.0.0
|
176 |
+
docopt==0.6.2
|
177 |
+
docstring-parser==0.15
|
178 |
+
docstring-to-markdown==0.15
|
179 |
+
docutils==0.21.1
|
180 |
+
earthengine-api==0.1.399
|
181 |
+
easydict==1.13
|
182 |
+
easyocr==1.7.1
|
183 |
+
ecos==2.0.13
|
184 |
+
einops==0.8.0
|
185 |
+
eli5==0.13.0
|
186 |
+
emoji==2.11.0
|
187 |
+
en-core-web-lg==3.7.1
|
188 |
+
en-core-web-sm==3.7.1
|
189 |
+
entrypoints==0.4
|
190 |
+
ephem==4.1.5
|
191 |
+
esda==2.5.1
|
192 |
+
essentia==2.1b6.dev1110
|
193 |
+
et-xmlfile==1.1.0
|
194 |
+
etils==1.6.0
|
195 |
+
exceptiongroup==1.2.0
|
196 |
+
executing==2.0.1
|
197 |
+
explainable-ai-sdk==1.3.3
|
198 |
+
fastai==2.7.14
|
199 |
+
fastapi==0.108.0
|
200 |
+
fastavro==1.9.3
|
201 |
+
fastcore==1.5.29
|
202 |
+
fastdownload==0.0.7
|
203 |
+
fasteners==0.19
|
204 |
+
fastjsonschema==2.19.1
|
205 |
+
fastprogress==1.0.3
|
206 |
+
fastrlock==0.8.2
|
207 |
+
fasttext==0.9.2
|
208 |
+
feather-format==0.4.1
|
209 |
+
featuretools==1.30.0
|
210 |
+
filelock==3.13.1
|
211 |
+
fiona==1.9.6
|
212 |
+
fitter==1.7.0
|
213 |
+
flake8==7.0.0
|
214 |
+
flash-attn==2.5.8
|
215 |
+
flashtext==2.7
|
216 |
+
flatbuffers==23.5.26
|
217 |
+
flax==0.8.2
|
218 |
+
folium==0.16.0
|
219 |
+
fonttools==4.47.0
|
220 |
+
fonttools==4.51.0
|
221 |
+
fqdn==1.5.1
|
222 |
+
frozendict==2.4.2
|
223 |
+
frozenlist==1.4.1
|
224 |
+
fsspec==2024.2.0
|
225 |
+
fsspec==2024.3.1
|
226 |
+
funcy==2.0
|
227 |
+
fury==0.10.0
|
228 |
+
future==1.0.0
|
229 |
+
fuzzywuzzy==0.18.0
|
230 |
+
gast==0.5.4
|
231 |
+
gatspy==0.3
|
232 |
+
gcsfs==2024.2.0
|
233 |
+
gensim==4.3.2
|
234 |
+
geographiclib==2.0
|
235 |
+
geojson==3.1.0
|
236 |
+
geopandas==0.14.3
|
237 |
+
geoplot==0.5.1
|
238 |
+
geopy==2.4.1
|
239 |
+
geoviews==1.12.0
|
240 |
+
ggplot==0.11.5
|
241 |
+
giddy==2.3.5
|
242 |
+
gitdb==4.0.11
|
243 |
+
google-ai-generativelanguage==0.6.2
|
244 |
+
google-api-core==2.11.1
|
245 |
+
google-api-core==2.18.0
|
246 |
+
google-api-python-client==2.126.0
|
247 |
+
google-apitools==0.5.31
|
248 |
+
google-auth-httplib2==0.2.0
|
249 |
+
google-auth-oauthlib==1.2.0
|
250 |
+
google-auth==2.26.1
|
251 |
+
google-cloud-aiplatform==0.6.0a1
|
252 |
+
google-cloud-artifact-registry==1.10.0
|
253 |
+
google-cloud-automl==1.0.1
|
254 |
+
google-cloud-bigquery==2.34.4
|
255 |
+
google-cloud-bigtable==1.7.3
|
256 |
+
google-cloud-core==2.4.1
|
257 |
+
google-cloud-datastore==2.19.0
|
258 |
+
google-cloud-dlp==3.14.0
|
259 |
+
google-cloud-jupyter-config==0.0.5
|
260 |
+
google-cloud-language==2.13.3
|
261 |
+
google-cloud-monitoring==2.18.0
|
262 |
+
google-cloud-pubsub==2.19.0
|
263 |
+
google-cloud-pubsublite==1.9.0
|
264 |
+
google-cloud-recommendations-ai==0.7.1
|
265 |
+
google-cloud-resource-manager==1.11.0
|
266 |
+
google-cloud-spanner==3.40.1
|
267 |
+
google-cloud-storage==1.44.0
|
268 |
+
google-cloud-translate==3.12.1
|
269 |
+
google-cloud-videointelligence==2.13.3
|
270 |
+
google-cloud-vision==2.8.0
|
271 |
+
google-crc32c==1.5.0
|
272 |
+
google-generativeai==0.5.1
|
273 |
+
google-pasta==0.2.0
|
274 |
+
google-resumable-media==2.7.0
|
275 |
+
googleapis-common-protos==1.62.0
|
276 |
+
gplearn==0.4.2
|
277 |
+
gpustat==1.0.0
|
278 |
+
gpxpy==1.6.2
|
279 |
+
graphviz==0.20.3
|
280 |
+
greenlet==3.0.3
|
281 |
+
grpc-google-iam-v1==0.12.7
|
282 |
+
grpcio-status==1.48.1
|
283 |
+
grpcio-status==1.48.2
|
284 |
+
grpcio==1.51.1
|
285 |
+
grpcio==1.60.0
|
286 |
+
gviz-api==1.10.0
|
287 |
+
gym-notices==0.0.8
|
288 |
+
gym==0.26.2
|
289 |
+
gymnasium==0.29.0
|
290 |
+
h11==0.14.0
|
291 |
+
h2o==3.46.0.1
|
292 |
+
h5netcdf==1.3.0
|
293 |
+
h5py==3.10.0
|
294 |
+
haversine==2.8.1
|
295 |
+
hdfs==2.7.3
|
296 |
+
hep-ml==0.7.2
|
297 |
+
hijri-converter==2.3.1
|
298 |
+
hmmlearn==0.3.2
|
299 |
+
holidays==0.24
|
300 |
+
holoviews==1.18.3
|
301 |
+
hpsklearn==0.1.0
|
302 |
+
html5lib==1.1
|
303 |
+
htmlmin==0.1.12
|
304 |
+
httpcore==1.0.5
|
305 |
+
httplib2==0.21.0
|
306 |
+
httptools==0.6.1
|
307 |
+
httpx==0.27.0
|
308 |
+
huggingface-hub==0.22.2
|
309 |
+
hunspell==0.5.5
|
310 |
+
hydra-slayer==0.5.0
|
311 |
+
hyperopt==0.2.7
|
312 |
+
hypertools==0.8.0
|
313 |
+
idna==3.6
|
314 |
+
igraph==0.11.4
|
315 |
+
imagecodecs==2024.1.1
|
316 |
+
imageio==2.33.1
|
317 |
+
imbalanced-learn==0.12.2
|
318 |
+
imgaug==0.4.0
|
319 |
+
importlib-metadata==6.11.0
|
320 |
+
importlib-metadata==7.0.1
|
321 |
+
importlib-resources==6.1.1
|
322 |
+
inequality==1.0.1
|
323 |
+
iniconfig==2.0.0
|
324 |
+
ipydatawidgets==4.3.5
|
325 |
+
ipykernel==6.28.0
|
326 |
+
ipyleaflet==0.18.2
|
327 |
+
ipympl==0.7.0
|
328 |
+
ipython-genutils==0.2.0
|
329 |
+
ipython-genutils==0.2.0
|
330 |
+
ipython-sql==0.5.0
|
331 |
+
ipython==8.20.0
|
332 |
+
ipyvolume==0.6.3
|
333 |
+
ipyvue==1.11.0
|
334 |
+
ipyvuetify==1.9.4
|
335 |
+
ipywebrtc==0.6.0
|
336 |
+
ipywidgets==7.7.1
|
337 |
+
isoduration==20.11.0
|
338 |
+
isort==5.13.2
|
339 |
+
isoweek==1.3.3
|
340 |
+
itsdangerous==2.2.0
|
341 |
+
jaraco.classes==3.3.0
|
342 |
+
jax-jumpy==1.0.0
|
343 |
+
jax==0.4.23
|
344 |
+
jaxlib==0.4.23.dev20240116
|
345 |
+
jedi==0.19.1
|
346 |
+
jeepney==0.8.0
|
347 |
+
jieba==0.42.1
|
348 |
+
jmespath==1.0.1
|
349 |
+
joblib==1.4.0
|
350 |
+
json5==0.9.14
|
351 |
+
jsonpatch==1.33
|
352 |
+
jsonpointer==2.4
|
353 |
+
jsonschema-specifications==2023.12.1
|
354 |
+
jsonschema==4.20.0
|
355 |
+
jupyter-console==6.6.3
|
356 |
+
jupyter-events==0.9.0
|
357 |
+
jupyter-http-over-ws==0.0.8
|
358 |
+
jupyter-lsp==1.5.1
|
359 |
+
jupyter-server-mathjax==0.2.6
|
360 |
+
jupyter-ydoc==0.2.5
|
361 |
+
jupyter_client==7.4.9
|
362 |
+
jupyter_client==8.6.0
|
363 |
+
jupyter_core==5.7.1
|
364 |
+
jupyter_server==2.12.5
|
365 |
+
jupyter_server_fileid==0.9.1
|
366 |
+
jupyter_server_proxy==4.1.0
|
367 |
+
jupyter_server_terminals==0.5.1
|
368 |
+
jupyter_server_ydoc==0.8.0
|
369 |
+
jupyterlab-lsp==5.1.0
|
370 |
+
jupyterlab-widgets==3.0.9
|
371 |
+
jupyterlab==4.1.6
|
372 |
+
jupyterlab_git==0.44.0
|
373 |
+
jupyterlab_pygments==0.3.0
|
374 |
+
jupyterlab_server==2.25.2
|
375 |
+
jupytext==1.16.0
|
376 |
+
kaggle-environments==1.14.3
|
377 |
+
kaggle==1.6.12
|
378 |
+
kagglehub==0.2.3
|
379 |
+
keras-cv==0.8.2
|
380 |
+
keras-nlp==0.9.3
|
381 |
+
keras-tuner==1.4.6
|
382 |
+
keras==3.2.1
|
383 |
+
kernels-mixer==0.0.7
|
384 |
+
keyring==24.3.0
|
385 |
+
keyrings.google-artifactregistry-auth==1.1.2
|
386 |
+
kfp-pipeline-spec==0.2.2
|
387 |
+
kfp-server-api==2.0.5
|
388 |
+
kfp==2.5.0
|
389 |
+
kiwisolver==1.4.5
|
390 |
+
kmapper==2.0.1
|
391 |
+
kmodes==0.12.2
|
392 |
+
korean-lunar-calendar==0.3.1
|
393 |
+
kornia==0.7.2
|
394 |
+
kornia_rs==0.1.3
|
395 |
+
kt-legacy==1.0.5
|
396 |
+
kubernetes==26.1.0
|
397 |
+
langcodes==3.3.0
|
398 |
+
langid==1.1.6
|
399 |
+
lazy_loader==0.3
|
400 |
+
learntools==0.3.4
|
401 |
+
leven==1.0.4
|
402 |
+
libclang==16.0.6
|
403 |
+
libmambapy==1.5.0
|
404 |
+
libpysal==4.9.2
|
405 |
+
librosa==0.10.1
|
406 |
+
lightgbm==4.2.0
|
407 |
+
lightning-utilities==0.11.2
|
408 |
+
lime==0.2.0.1
|
409 |
+
line-profiler==4.1.2
|
410 |
+
linkify-it-py==2.0.3
|
411 |
+
llvmlite==0.41.1
|
412 |
+
llvmlite==0.42.0
|
413 |
+
lml==0.1.0
|
414 |
+
locket==1.0.0
|
415 |
+
loguru==0.7.2
|
416 |
+
lxml==5.2.1
|
417 |
+
lz4==4.3.3
|
418 |
+
mamba==1.5.0
|
419 |
+
mapclassify==2.6.1
|
420 |
+
markdown-it-py==3.0.0
|
421 |
+
marshmallow==3.21.1
|
422 |
+
matplotlib-inline==0.1.6
|
423 |
+
matplotlib-venn==0.11.10
|
424 |
+
matplotlib==3.7.5
|
425 |
+
matplotlib==3.8.4
|
426 |
+
mccabe==0.7.0
|
427 |
+
mdit-py-plugins==0.4.0
|
428 |
+
mdurl==0.1.2
|
429 |
+
memory-profiler==0.61.0
|
430 |
+
menuinst==2.0.1
|
431 |
+
mercantile==1.2.1
|
432 |
+
mgwr==2.2.1
|
433 |
+
missingno==0.5.2
|
434 |
+
mistune==0.8.4
|
435 |
+
mizani==0.11.1
|
436 |
+
ml-dtypes==0.2.0
|
437 |
+
mlcrate==0.2.0
|
438 |
+
mlens==0.2.3
|
439 |
+
mlxtend==0.23.1
|
440 |
+
mne==1.6.1
|
441 |
+
mnist==0.2.2
|
442 |
+
momepy==0.7.0
|
443 |
+
more-itertools==10.2.0
|
444 |
+
mpld3==0.5.10
|
445 |
+
mpmath==1.3.0
|
446 |
+
msgpack==1.0.7
|
447 |
+
multidict==6.0.4
|
448 |
+
multimethod==1.10
|
449 |
+
multipledispatch==1.0.0
|
450 |
+
multiprocess==0.70.16
|
451 |
+
munkres==1.1.4
|
452 |
+
murmurhash==1.0.10
|
453 |
+
mypy-extensions==1.0.0
|
454 |
+
namex==0.0.8
|
455 |
+
nb-conda-kernels==2.3.1
|
456 |
+
nb_conda==2.2.1
|
457 |
+
nbclassic==1.0.0
|
458 |
+
nbclient==0.5.13
|
459 |
+
nbconvert==6.4.5
|
460 |
+
nbdime==3.2.0
|
461 |
+
nbformat==5.9.2
|
462 |
+
ndindex==1.8
|
463 |
+
nest-asyncio==1.5.8
|
464 |
+
networkx==3.2.1
|
465 |
+
nibabel==5.2.1
|
466 |
+
nilearn==0.10.4
|
467 |
+
ninja==1.11.1.1
|
468 |
+
nltk==3.2.4
|
469 |
+
nose==1.3.7
|
470 |
+
notebook==6.5.4
|
471 |
+
notebook==6.5.6
|
472 |
+
notebook_executor==0.2
|
473 |
+
notebook_shim==0.2.3
|
474 |
+
numba==0.58.1
|
475 |
+
numba==0.59.1
|
476 |
+
numexpr==2.10.0
|
477 |
+
numpy==1.26.4
|
478 |
+
nvidia-ml-py==11.495.46
|
479 |
+
nvtx==0.2.10
|
480 |
+
oauth2client==4.1.3
|
481 |
+
oauthlib==3.2.2
|
482 |
+
objsize==0.6.1
|
483 |
+
odfpy==1.4.1
|
484 |
+
olefile==0.47
|
485 |
+
onnx==1.16.0
|
486 |
+
opencensus-context==0.1.3
|
487 |
+
opencensus==0.11.4
|
488 |
+
opencv-contrib-python==4.9.0.80
|
489 |
+
opencv-python-headless==4.9.0.80
|
490 |
+
opencv-python==4.9.0.80
|
491 |
+
openpyxl==3.1.2
|
492 |
+
openslide-python==1.3.1
|
493 |
+
opentelemetry-api==1.22.0
|
494 |
+
opentelemetry-exporter-otlp-proto-common==1.22.0
|
495 |
+
opentelemetry-exporter-otlp-proto-grpc==1.22.0
|
496 |
+
opentelemetry-exporter-otlp-proto-http==1.22.0
|
497 |
+
opentelemetry-exporter-otlp==1.22.0
|
498 |
+
opentelemetry-proto==1.22.0
|
499 |
+
opentelemetry-sdk==1.22.0
|
500 |
+
opentelemetry-semantic-conventions==0.43b0
|
501 |
+
opt-einsum==3.3.0
|
502 |
+
optax==0.2.2
|
503 |
+
optree==0.11.0
|
504 |
+
optuna==3.6.1
|
505 |
+
orbax-checkpoint==0.5.9
|
506 |
+
ordered-set==4.1.0
|
507 |
+
orjson==3.9.10
|
508 |
+
ortools==9.4.1874
|
509 |
+
osmnx==1.9.2
|
510 |
+
overrides==7.4.0
|
511 |
+
packaging==21.3
|
512 |
+
pandas-datareader==0.10.0
|
513 |
+
pandas-profiling==3.6.6
|
514 |
+
pandas-summary==0.2.0
|
515 |
+
pandas==2.1.4
|
516 |
+
pandas==2.2.2
|
517 |
+
pandasql==0.7.3
|
518 |
+
pandocfilters==1.5.0
|
519 |
+
panel==1.4.1
|
520 |
+
papermill==2.5.0
|
521 |
+
param==2.1.0
|
522 |
+
parso==0.8.3
|
523 |
+
partd==1.4.1
|
524 |
+
path.py==12.5.0
|
525 |
+
path==16.14.0
|
526 |
+
pathos==0.3.2
|
527 |
+
pathy==0.10.3
|
528 |
+
patsy==0.5.6
|
529 |
+
pdf2image==1.17.0
|
530 |
+
peft==0.11.1
|
531 |
+
pettingzoo==1.24.0
|
532 |
+
pexpect==4.8.0
|
533 |
+
pexpect==4.9.0
|
534 |
+
phik==0.12.4
|
535 |
+
pickleshare==0.7.5
|
536 |
+
pillow==10.3.0
|
537 |
+
pip==23.3.2
|
538 |
+
pkgutil_resolve_name==1.3.10
|
539 |
+
platformdirs==4.2.0
|
540 |
+
plotly-express==0.4.1
|
541 |
+
plotly==5.18.0
|
542 |
+
plotnine==0.13.4
|
543 |
+
pluggy==1.4.0
|
544 |
+
pointpats==2.4.0
|
545 |
+
polars==0.20.21
|
546 |
+
polyglot==16.7.4
|
547 |
+
pooch==1.8.1
|
548 |
+
pox==0.3.4
|
549 |
+
ppca==0.0.4
|
550 |
+
ppft==1.7.6.8
|
551 |
+
preprocessing==0.1.13
|
552 |
+
preshed==3.0.9
|
553 |
+
prettytable==3.9.0
|
554 |
+
progressbar2==4.4.2
|
555 |
+
prometheus-client==0.19.0
|
556 |
+
promise==2.3
|
557 |
+
prompt-toolkit==3.0.42
|
558 |
+
prompt-toolkit==3.0.43
|
559 |
+
prophet==1.1.1
|
560 |
+
proto-plus==1.23.0
|
561 |
+
protobuf==3.20.3
|
562 |
+
protobuf==4.21.12
|
563 |
+
psutil==5.9.3
|
564 |
+
psutil==5.9.7
|
565 |
+
ptyprocess==0.7.0
|
566 |
+
pudb==2024.1
|
567 |
+
pure-eval==0.2.2
|
568 |
+
py-cpuinfo==9.0.0
|
569 |
+
py-spy==0.3.14
|
570 |
+
py4j==0.10.9.7
|
571 |
+
pyLDAvis==3.4.1
|
572 |
+
pyOpenSSL==23.3.0
|
573 |
+
pyaml==23.12.0
|
574 |
+
pyarrow-hotfix==0.6
|
575 |
+
pyarrow==15.0.2
|
576 |
+
pyasn1-modules==0.3.0
|
577 |
+
pyasn1==0.5.1
|
578 |
+
pybind11==2.12.0
|
579 |
+
pyclipper==1.3.0.post5
|
580 |
+
pycodestyle==2.11.1
|
581 |
+
pycosat==0.6.6
|
582 |
+
pycparser==2.21
|
583 |
+
pycryptodome==3.20.0
|
584 |
+
pyct==0.5.0
|
585 |
+
pycuda==2024.1
|
586 |
+
pydantic==2.5.3
|
587 |
+
pydantic==2.7.0
|
588 |
+
pydantic_core==2.14.6
|
589 |
+
pydantic_core==2.18.1
|
590 |
+
pydegensac==0.1.2
|
591 |
+
pydicom==2.4.4
|
592 |
+
pydocstyle==6.3.0
|
593 |
+
pydot==1.4.2
|
594 |
+
pydub==0.25.1
|
595 |
+
pyemd==1.0.0
|
596 |
+
pyerfa==2.0.1.4
|
597 |
+
pyexcel-io==0.6.6
|
598 |
+
pyexcel-ods==0.6.0
|
599 |
+
pyflakes==3.2.0
|
600 |
+
pygltflib==1.16.2
|
601 |
+
pykalman==0.9.7
|
602 |
+
pylibraft==23.8.0
|
603 |
+
pylint==3.1.0
|
604 |
+
pymc3==3.11.4
|
605 |
+
pymongo==3.13.0
|
606 |
+
pynndescent==0.5.12
|
607 |
+
pynvml==11.4.1
|
608 |
+
pynvrtc==9.2
|
609 |
+
pyparsing==3.1.1
|
610 |
+
pyparsing==3.1.2
|
611 |
+
pypdf==4.2.0
|
612 |
+
pyproj==3.6.1
|
613 |
+
pysal==24.1
|
614 |
+
pyshp==2.3.1
|
615 |
+
pytesseract==0.3.10
|
616 |
+
pytest==8.1.1
|
617 |
+
python-bidi==0.4.2
|
618 |
+
python-dateutil==2.9.0.post0
|
619 |
+
python-dotenv==1.0.0
|
620 |
+
python-json-logger==2.0.7
|
621 |
+
python-louvain==0.16
|
622 |
+
python-lsp-jsonrpc==1.1.2
|
623 |
+
python-lsp-server==1.11.0
|
624 |
+
python-slugify==8.0.4
|
625 |
+
python-utils==3.8.2
|
626 |
+
pythreejs==2.4.2
|
627 |
+
pytoolconfig==1.3.1
|
628 |
+
pytools==2024.1.1
|
629 |
+
pytorch-ignite==0.5.0.post2
|
630 |
+
pytorch-lightning==2.2.2
|
631 |
+
pytz==2023.3.post1
|
632 |
+
pytz==2024.1
|
633 |
+
pyu2f==0.1.5
|
634 |
+
pyviz_comms==3.0.2
|
635 |
+
pyzmq==24.0.1
|
636 |
+
pyzmq==25.1.2
|
637 |
+
qgrid==1.3.1
|
638 |
+
qtconsole==5.5.1
|
639 |
+
quantecon==0.7.2
|
640 |
+
qudida==0.0.4
|
641 |
+
raft-dask==23.8.0
|
642 |
+
rasterio==1.3.10
|
643 |
+
rasterstats==0.19.0
|
644 |
+
ray-cpp==2.9.0
|
645 |
+
ray==2.9.0
|
646 |
+
referencing==0.32.1
|
647 |
+
regex==2023.12.25
|
648 |
+
requests-oauthlib==1.3.1
|
649 |
+
requests-toolbelt==0.10.1
|
650 |
+
requests==2.31.0
|
651 |
+
retrying==1.3.3
|
652 |
+
retrying==1.3.4
|
653 |
+
rfc3339-validator==0.1.4
|
654 |
+
rfc3986-validator==0.1.1
|
655 |
+
rgf-python==3.12.0
|
656 |
+
rich-click==1.7.4
|
657 |
+
rich==13.7.0
|
658 |
+
rich==13.7.1
|
659 |
+
rmm==23.8.0
|
660 |
+
rope==1.13.0
|
661 |
+
rpds-py==0.16.2
|
662 |
+
rsa==4.9
|
663 |
+
ruamel-yaml-conda==0.15.100
|
664 |
+
ruamel.yaml.clib==0.2.7
|
665 |
+
ruamel.yaml==0.17.40
|
666 |
+
s2sphere==0.2.5
|
667 |
+
s3fs==2024.2.0
|
668 |
+
s3transfer==0.6.2
|
669 |
+
safetensors==0.4.3
|
670 |
+
scattertext==0.1.19
|
671 |
+
scikit-image==0.22.0
|
672 |
+
scikit-learn-intelex==2024.3.0
|
673 |
+
scikit-learn==1.2.2
|
674 |
+
scikit-multilearn==0.2.0
|
675 |
+
scikit-optimize==0.10.1
|
676 |
+
scikit-plot==0.3.7
|
677 |
+
scikit-surprise==1.1.3
|
678 |
+
scipy==1.11.4
|
679 |
+
scipy==1.13.0
|
680 |
+
seaborn==0.12.2
|
681 |
+
segment_anything==1.0
|
682 |
+
segregation==2.5
|
683 |
+
semver==3.0.2
|
684 |
+
sentencepiece==0.2.0
|
685 |
+
sentry-sdk==1.45.0
|
686 |
+
setproctitle==1.3.3
|
687 |
+
setuptools-git==1.2
|
688 |
+
setuptools-scm==8.0.4
|
689 |
+
setuptools==69.0.3
|
690 |
+
shap==0.44.1
|
691 |
+
shapely==2.0.4
|
692 |
+
shellingham==1.5.4
|
693 |
+
shtab==1.7.1
|
694 |
+
simpervisor==1.0.0
|
695 |
+
simplejson==3.19.2
|
696 |
+
six==1.16.0
|
697 |
+
sklearn-pandas==2.2.0
|
698 |
+
slicer==0.0.7
|
699 |
+
smart-open==6.4.0
|
700 |
+
smmap==5.0.1
|
701 |
+
sniffio==1.3.0
|
702 |
+
snowballstemmer==2.2.0
|
703 |
+
snuggs==1.4.7
|
704 |
+
sortedcontainers==2.4.0
|
705 |
+
soundfile==0.12.1
|
706 |
+
soupsieve==2.5
|
707 |
+
soxr==0.3.7
|
708 |
+
spacy-legacy==3.0.12
|
709 |
+
spacy-loggers==1.0.5
|
710 |
+
spacy==3.7.3
|
711 |
+
spaghetti==1.7.5.post1
|
712 |
+
spectral==0.23.1
|
713 |
+
spglm==1.1.0
|
714 |
+
sphinx-rtd-theme==0.2.4
|
715 |
+
spint==1.0.7
|
716 |
+
splot==1.1.5.post1
|
717 |
+
spopt==0.6.0
|
718 |
+
spreg==1.4.2
|
719 |
+
spvcm==0.3.0
|
720 |
+
sqlparse==0.4.4
|
721 |
+
squarify==0.4.3
|
722 |
+
srsly==2.4.8
|
723 |
+
stable-baselines3==2.1.0
|
724 |
+
stack-data==0.6.2
|
725 |
+
stack-data==0.6.3
|
726 |
+
stanio==0.5.0
|
727 |
+
starlette==0.32.0.post1
|
728 |
+
statsmodels==0.14.1
|
729 |
+
stemming==1.0.1
|
730 |
+
stop-words==2018.7.23
|
731 |
+
stopit==1.1.2
|
732 |
+
stumpy==1.12.0
|
733 |
+
sympy==1.12
|
734 |
+
tables==3.9.2
|
735 |
+
tabulate==0.9.0
|
736 |
+
tangled-up-in-unicode==0.2.0
|
737 |
+
tbb==2021.12.0
|
738 |
+
tblib==3.0.0
|
739 |
+
tenacity==8.2.3
|
740 |
+
tensorboard-data-server==0.7.2
|
741 |
+
tensorboard-plugin-profile==2.15.0
|
742 |
+
tensorboard==2.15.1
|
743 |
+
tensorboardX==2.6.2.2
|
744 |
+
tensorflow-cloud==0.1.16
|
745 |
+
tensorflow-datasets==4.9.4
|
746 |
+
tensorflow-decision-forests==1.8.1
|
747 |
+
tensorflow-estimator==2.15.0
|
748 |
+
tensorflow-hub==0.16.1
|
749 |
+
tensorflow-io-gcs-filesystem==0.35.0
|
750 |
+
tensorflow-io==0.35.0
|
751 |
+
tensorflow-metadata==0.14.0
|
752 |
+
tensorflow-probability==0.23.0
|
753 |
+
tensorflow-serving-api==2.14.1
|
754 |
+
tensorflow-text==2.15.0
|
755 |
+
tensorflow-transform==0.14.0
|
756 |
+
tensorflow==2.15.0
|
757 |
+
tensorstore==0.1.56
|
758 |
+
termcolor==2.4.0
|
759 |
+
terminado==0.18.0
|
760 |
+
testpath==0.6.0
|
761 |
+
text-unidecode==1.3
|
762 |
+
textblob==0.18.0.post0
|
763 |
+
texttable==1.7.0
|
764 |
+
tf_keras==2.15.1
|
765 |
+
tfp-nightly==0.24.0.dev0
|
766 |
+
thinc==8.2.2
|
767 |
+
threadpoolctl==3.2.0
|
768 |
+
tifffile==2023.12.9
|
769 |
+
timm==0.9.16
|
770 |
+
tinycss2==1.2.1
|
771 |
+
tobler==0.11.2
|
772 |
+
tokenizers==0.15.2
|
773 |
+
toml==0.10.2
|
774 |
+
tomli==2.0.1
|
775 |
+
tomlkit==0.12.4
|
776 |
+
toolz==0.12.1
|
777 |
+
torch==2.1.2
|
778 |
+
torchaudio==2.1.2
|
779 |
+
torchdata==0.7.1
|
780 |
+
torchinfo==1.8.0
|
781 |
+
torchmetrics==1.3.2
|
782 |
+
torchtext==0.16.2
|
783 |
+
torchvision==0.16.2
|
784 |
+
tornado==6.3.3
|
785 |
+
tqdm==4.66.1
|
786 |
+
traceml==1.0.8
|
787 |
+
traitlets==5.9.0
|
788 |
+
traittypes==0.2.1
|
789 |
+
transformers==4.39.3
|
790 |
+
treelite-runtime==3.2.0
|
791 |
+
treelite==3.2.0
|
792 |
+
trl==0.8.6
|
793 |
+
truststore==0.8.0
|
794 |
+
trx-python==0.2.9
|
795 |
+
tsfresh==0.20.2
|
796 |
+
typeguard==4.1.5
|
797 |
+
typer==0.9.0
|
798 |
+
typer==0.9.4
|
799 |
+
types-python-dateutil==2.8.19.20240106
|
800 |
+
typing-inspect==0.9.0
|
801 |
+
typing-utils==0.1.0
|
802 |
+
typing_extensions==4.9.0
|
803 |
+
tyro==0.8.4
|
804 |
+
tzdata==2023.4
|
805 |
+
uc-micro-py==1.0.3
|
806 |
+
ucx-py==0.33.0
|
807 |
+
ujson==5.9.0
|
808 |
+
umap-learn==0.5.6
|
809 |
+
unicodedata2==15.1.0
|
810 |
+
update-checker==0.18.0
|
811 |
+
uri-template==1.3.0
|
812 |
+
uritemplate==3.0.1
|
813 |
+
urllib3==1.26.18
|
814 |
+
urllib3==2.1.0
|
815 |
+
urwid==2.6.10
|
816 |
+
urwid_readline==0.14
|
817 |
+
uvicorn==0.25.0
|
818 |
+
uvloop==0.19.0
|
819 |
+
vaex-astro==0.9.3
|
820 |
+
vaex-core==4.17.1
|
821 |
+
vaex-hdf5==0.14.1
|
822 |
+
vaex-jupyter==0.8.2
|
823 |
+
vaex-ml==0.18.3
|
824 |
+
vaex-server==0.9.0
|
825 |
+
vaex-viz==0.5.4
|
826 |
+
vaex==4.17.0
|
827 |
+
vec_noise==1.1.4
|
828 |
+
vecstack==0.4.0
|
829 |
+
virtualenv==20.21.0
|
830 |
+
visions==0.7.5
|
831 |
+
vowpalwabbit==9.9.0
|
832 |
+
vtk==9.3.0
|
833 |
+
wandb==0.16.6
|
834 |
+
wasabi==1.1.2
|
835 |
+
watchfiles==0.21.0
|
836 |
+
wavio==0.0.8
|
837 |
+
wcwidth==0.2.13
|
838 |
+
weasel==0.3.4
|
839 |
+
webcolors==1.13
|
840 |
+
webencodings==0.5.1
|
841 |
+
websocket-client==1.7.0
|
842 |
+
websockets==12.0
|
843 |
+
wfdb==4.1.2
|
844 |
+
whatthepatch==1.0.5
|
845 |
+
wheel==0.42.0
|
846 |
+
widgetsnbextension==3.6.6
|
847 |
+
witwidget==1.8.1
|
848 |
+
woodwork==0.30.0
|
849 |
+
wordcloud==1.9.3
|
850 |
+
wordsegment==1.3.1
|
851 |
+
wrapt==1.14.1
|
852 |
+
xarray-einstats==0.7.0
|
853 |
+
xarray==2024.3.0
|
854 |
+
xgboost==2.0.3
|
855 |
+
xvfbwrapper==0.2.9
|
856 |
+
xxhash==3.4.1
|
857 |
+
xyzservices==2024.4.0
|
858 |
+
y-py==0.6.2
|
859 |
+
yapf==0.40.2
|
860 |
+
yarl==1.9.3
|
861 |
+
yarl==1.9.4
|
862 |
+
ydata-profiling==4.6.4
|
863 |
+
yellowbrick==1.5
|
864 |
+
ypy-websocket==0.8.4
|
865 |
+
zict==3.0.0
|
866 |
+
zipp==3.17.0
|
867 |
+
zstandard==0.22.0
|
wandb/run-20240522_054348-vgrzs6jq/files/wandb-metadata.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.15.133+-x86_64-with-glibc2.31",
|
3 |
+
"python": "3.10.13",
|
4 |
+
"heartbeatAt": "2024-05-22T05:43:48.890724",
|
5 |
+
"startedAt": "2024-05-22T05:43:48.627495",
|
6 |
+
"docker": null,
|
7 |
+
"cuda": null,
|
8 |
+
"args": [
|
9 |
+
"-f",
|
10 |
+
"/tmp/tmpem7pu0hu.json",
|
11 |
+
"--HistoryManager.hist_file=:memory:"
|
12 |
+
],
|
13 |
+
"state": "running",
|
14 |
+
"program": "<python with no main file>",
|
15 |
+
"codePathLocal": null,
|
16 |
+
"host": "92dcc4555414",
|
17 |
+
"username": "root",
|
18 |
+
"executable": "/opt/conda/bin/python",
|
19 |
+
"cpu_count": 2,
|
20 |
+
"cpu_count_logical": 4,
|
21 |
+
"cpu_freq": {
|
22 |
+
"current": 2000.194,
|
23 |
+
"min": 0.0,
|
24 |
+
"max": 0.0
|
25 |
+
},
|
26 |
+
"cpu_freq_per_core": [
|
27 |
+
{
|
28 |
+
"current": 2000.194,
|
29 |
+
"min": 0.0,
|
30 |
+
"max": 0.0
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"current": 2000.194,
|
34 |
+
"min": 0.0,
|
35 |
+
"max": 0.0
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"current": 2000.194,
|
39 |
+
"min": 0.0,
|
40 |
+
"max": 0.0
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"current": 2000.194,
|
44 |
+
"min": 0.0,
|
45 |
+
"max": 0.0
|
46 |
+
}
|
47 |
+
],
|
48 |
+
"disk": {
|
49 |
+
"/": {
|
50 |
+
"total": 8062.387607574463,
|
51 |
+
"used": 5598.47790145874
|
52 |
+
}
|
53 |
+
},
|
54 |
+
"gpu": "Tesla T4",
|
55 |
+
"gpu_count": 2,
|
56 |
+
"gpu_devices": [
|
57 |
+
{
|
58 |
+
"name": "Tesla T4",
|
59 |
+
"memory_total": 16106127360
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"name": "Tesla T4",
|
63 |
+
"memory_total": 16106127360
|
64 |
+
}
|
65 |
+
],
|
66 |
+
"memory": {
|
67 |
+
"total": 31.357555389404297
|
68 |
+
}
|
69 |
+
}
|
wandb/run-20240522_054348-vgrzs6jq/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"train/loss": 0.5057, "train/grad_norm": 6.874316215515137, "train/learning_rate": 1.0444444444444445e-05, "train/rewards/chosen": -4.634873390197754, "train/rewards/rejected": -9.829792976379395, "train/rewards/accuracies": 0.875, "train/rewards/margins": 5.194918632507324, "train/logps/rejected": -3123.00830078125, "train/logps/chosen": -1652.506591796875, "train/logits/rejected": -4.661086082458496, "train/logits/chosen": -4.734623908996582, "train/epoch": 0.06, "train/global_step": 100, "_timestamp": 1716364238.0474954, "_runtime": 7609.412012338638, "_step": 10, "eval/loss": 0.4624544084072113, "eval/runtime": 5068.444, "eval/samples_per_second": 0.355, "eval/steps_per_second": 0.355, "eval/rewards/chosen": -5.165964603424072, "eval/rewards/rejected": -10.169805526733398, "eval/rewards/accuracies": 0.8722931742668152, "eval/rewards/margins": 5.003841876983643, "eval/logps/rejected": -3300.483154296875, "eval/logps/chosen": -2120.26904296875, "eval/logits/rejected": -4.54031229019165, "eval/logits/chosen": -4.554856777191162}
|
wandb/run-20240522_054348-vgrzs6jq/logs/debug-internal.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
wandb/run-20240522_054348-vgrzs6jq/logs/debug.log
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-05-22 05:43:48,628 INFO MainThread:24 [wandb_setup.py:_flush():76] Current SDK version is 0.16.6
|
2 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Configure stats pid to 24
|
3 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
|
4 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Loading settings from /kaggle/working/wandb/settings
|
5 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
|
6 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
|
7 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
|
8 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
|
9 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Applying login settings: {}
|
10 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:_log_setup():521] Logging user logs to /kaggle/working/wandb/run-20240522_054348-vgrzs6jq/logs/debug.log
|
11 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:_log_setup():522] Logging internal logs to /kaggle/working/wandb/run-20240522_054348-vgrzs6jq/logs/debug-internal.log
|
12 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:init():561] calling init triggers
|
13 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:init():568] wandb.init called with sweep_config: {}
|
14 |
+
config: {}
|
15 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:init():611] starting backend
|
16 |
+
2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:init():615] setting up manager
|
17 |
+
2024-05-22 05:43:48,632 INFO MainThread:24 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
18 |
+
2024-05-22 05:43:48,635 INFO MainThread:24 [wandb_init.py:init():623] backend started and connected
|
19 |
+
2024-05-22 05:43:48,638 INFO MainThread:24 [wandb_init.py:init():715] updated telemetry
|
20 |
+
2024-05-22 05:43:48,641 INFO MainThread:24 [wandb_init.py:init():748] communicating run to backend with 90.0 second timeout
|
21 |
+
2024-05-22 05:43:48,764 INFO MainThread:24 [wandb_run.py:_on_init():2357] communicating current version
|
22 |
+
2024-05-22 05:43:48,850 INFO MainThread:24 [wandb_run.py:_on_init():2366] got version response upgrade_message: "wandb version 0.17.0 is available! To upgrade, please run:\n $ pip install wandb --upgrade"
|
23 |
+
|
24 |
+
2024-05-22 05:43:48,851 INFO MainThread:24 [wandb_init.py:init():799] starting run threads in backend
|
25 |
+
2024-05-22 05:44:04,914 INFO MainThread:24 [wandb_run.py:_console_start():2335] atexit reg
|
26 |
+
2024-05-22 05:44:04,914 INFO MainThread:24 [wandb_run.py:_redirect():2190] redirect: wrap_raw
|
27 |
+
2024-05-22 05:44:04,915 INFO MainThread:24 [wandb_run.py:_redirect():2255] Wrapping output streams.
|
28 |
+
2024-05-22 05:44:04,915 INFO MainThread:24 [wandb_run.py:_redirect():2280] Redirects installed.
|
29 |
+
2024-05-22 05:44:04,916 INFO MainThread:24 [wandb_init.py:init():842] run started, returning control to user process
|
30 |
+
2024-05-22 05:47:10,600 INFO MainThread:24 [wandb_run.py:_config_callback():1347] config_cb None None {'vocab_size': 32000, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 11008, 'num_hidden_layers': 32, 'num_attention_heads': 32, 'num_key_value_heads': 32, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': False, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': None, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'meta-llama/Llama-2-7b-chat-hf', 'transformers_version': '4.39.3', 'model_type': 'llama', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.1, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/May22_05-46-00_92dcc4555414', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'VanCan23/DPO_Vietnamese_chatbot_lessData', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': {'use_reentrant': False}, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None}
|
wandb/run-20240522_054348-vgrzs6jq/run-vgrzs6jq.wandb
ADDED
Binary file (464 kB). View file
|
|