VanCan23 commited on
Commit
dbfbfc3
1 Parent(s): c722a13

Training in progress, step 100

Browse files
adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 128,
14
+ "lora_dropout": 0.05,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 128,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "up_proj",
24
+ "v_proj",
25
+ "o_proj",
26
+ "down_proj",
27
+ "q_proj",
28
+ "k_proj",
29
+ "gate_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bf876e785763ef5ddcd813701c0f1fa8169366ac31046d716c7357c64672cef
3
+ size 1279323952
added_tokens.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "</s>": 23596,
3
+ "<pad>": 23598,
4
+ "<s>": 23595,
5
+ "<unk>": 23597
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
runs/May22_05-46-00_92dcc4555414/events.out.tfevents.1716356830.92dcc4555414.24.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:337ffc5a348d28a205517078012f86cb26e941d456bdc7fd516aadd77ef5a858
3
+ size 12681
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "23595": {
13
+ "content": "<s>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "23596": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "23597": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "23598": {
37
+ "content": "<pad>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
47
+ "clean_up_tokenization_spaces": true,
48
+ "eos_token": "</s>",
49
+ "model_max_length": 1024,
50
+ "pad_token": "<pad>",
51
+ "tokenizer_class": "GPT2Tokenizer",
52
+ "unk_token": "<unk>"
53
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f2347a47dcb65e4c27c7ecf02ead180ffd83b8034d525b3b2ca60a664d98dc6
3
+ size 4984
vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
wandb/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/debug.log ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-05-22 05:43:48,628 INFO MainThread:24 [wandb_setup.py:_flush():76] Current SDK version is 0.16.6
2
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Configure stats pid to 24
3
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
4
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Loading settings from /kaggle/working/wandb/settings
5
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
7
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
8
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
9
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Applying login settings: {}
10
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:_log_setup():521] Logging user logs to /kaggle/working/wandb/run-20240522_054348-vgrzs6jq/logs/debug.log
11
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:_log_setup():522] Logging internal logs to /kaggle/working/wandb/run-20240522_054348-vgrzs6jq/logs/debug-internal.log
12
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:init():561] calling init triggers
13
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:init():568] wandb.init called with sweep_config: {}
14
+ config: {}
15
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:init():611] starting backend
16
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:init():615] setting up manager
17
+ 2024-05-22 05:43:48,632 INFO MainThread:24 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
18
+ 2024-05-22 05:43:48,635 INFO MainThread:24 [wandb_init.py:init():623] backend started and connected
19
+ 2024-05-22 05:43:48,638 INFO MainThread:24 [wandb_init.py:init():715] updated telemetry
20
+ 2024-05-22 05:43:48,641 INFO MainThread:24 [wandb_init.py:init():748] communicating run to backend with 90.0 second timeout
21
+ 2024-05-22 05:43:48,764 INFO MainThread:24 [wandb_run.py:_on_init():2357] communicating current version
22
+ 2024-05-22 05:43:48,850 INFO MainThread:24 [wandb_run.py:_on_init():2366] got version response upgrade_message: "wandb version 0.17.0 is available! To upgrade, please run:\n $ pip install wandb --upgrade"
23
+
24
+ 2024-05-22 05:43:48,851 INFO MainThread:24 [wandb_init.py:init():799] starting run threads in backend
25
+ 2024-05-22 05:44:04,914 INFO MainThread:24 [wandb_run.py:_console_start():2335] atexit reg
26
+ 2024-05-22 05:44:04,914 INFO MainThread:24 [wandb_run.py:_redirect():2190] redirect: wrap_raw
27
+ 2024-05-22 05:44:04,915 INFO MainThread:24 [wandb_run.py:_redirect():2255] Wrapping output streams.
28
+ 2024-05-22 05:44:04,915 INFO MainThread:24 [wandb_run.py:_redirect():2280] Redirects installed.
29
+ 2024-05-22 05:44:04,916 INFO MainThread:24 [wandb_init.py:init():842] run started, returning control to user process
30
+ 2024-05-22 05:47:10,600 INFO MainThread:24 [wandb_run.py:_config_callback():1347] config_cb None None {'vocab_size': 32000, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 11008, 'num_hidden_layers': 32, 'num_attention_heads': 32, 'num_key_value_heads': 32, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': False, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': None, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'meta-llama/Llama-2-7b-chat-hf', 'transformers_version': '4.39.3', 'model_type': 'llama', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.1, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/May22_05-46-00_92dcc4555414', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'VanCan23/DPO_Vietnamese_chatbot_lessData', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': {'use_reentrant': False}, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None}
wandb/run-20240522_054348-vgrzs6jq/files/conda-environment.yaml ADDED
File without changes
wandb/run-20240522_054348-vgrzs6jq/files/config.yaml ADDED
@@ -0,0 +1,753 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.10.13
7
+ cli_version: 0.16.6
8
+ is_jupyter_run: false
9
+ is_kaggle_kernel: true
10
+ start_time: 1716356628.0
11
+ t:
12
+ 1:
13
+ - 55
14
+ - 105
15
+ 2:
16
+ - 1
17
+ - 2
18
+ - 3
19
+ - 5
20
+ - 11
21
+ - 12
22
+ - 49
23
+ - 51
24
+ - 53
25
+ - 55
26
+ - 71
27
+ - 84
28
+ - 98
29
+ - 105
30
+ 3:
31
+ - 7
32
+ - 23
33
+ 4: 3.10.13
34
+ 5: 0.16.6
35
+ 8:
36
+ - 2
37
+ - 5
38
+ - 13
39
+ 9:
40
+ 1: transformers_trainer
41
+ 13: linux-x86_64
42
+ framework: huggingface
43
+ m:
44
+ - 1: train/global_step
45
+ 6:
46
+ - 3
47
+ - 1: train/loss
48
+ 5: 1
49
+ 6:
50
+ - 1
51
+ - 1: train/grad_norm
52
+ 5: 1
53
+ 6:
54
+ - 1
55
+ - 1: train/learning_rate
56
+ 5: 1
57
+ 6:
58
+ - 1
59
+ - 1: train/rewards/chosen
60
+ 5: 1
61
+ 6:
62
+ - 1
63
+ - 1: train/rewards/rejected
64
+ 5: 1
65
+ 6:
66
+ - 1
67
+ - 1: train/rewards/accuracies
68
+ 5: 1
69
+ 6:
70
+ - 1
71
+ - 1: train/rewards/margins
72
+ 5: 1
73
+ 6:
74
+ - 1
75
+ - 1: train/logps/rejected
76
+ 5: 1
77
+ 6:
78
+ - 1
79
+ - 1: train/logps/chosen
80
+ 5: 1
81
+ 6:
82
+ - 1
83
+ - 1: train/logits/rejected
84
+ 5: 1
85
+ 6:
86
+ - 1
87
+ - 1: train/logits/chosen
88
+ 5: 1
89
+ 6:
90
+ - 1
91
+ - 1: train/epoch
92
+ 5: 1
93
+ 6:
94
+ - 1
95
+ - 1: eval/loss
96
+ 5: 1
97
+ 6:
98
+ - 1
99
+ - 1: eval/runtime
100
+ 5: 1
101
+ 6:
102
+ - 1
103
+ - 1: eval/samples_per_second
104
+ 5: 1
105
+ 6:
106
+ - 1
107
+ - 1: eval/steps_per_second
108
+ 5: 1
109
+ 6:
110
+ - 1
111
+ - 1: eval/rewards/chosen
112
+ 5: 1
113
+ 6:
114
+ - 1
115
+ - 1: eval/rewards/rejected
116
+ 5: 1
117
+ 6:
118
+ - 1
119
+ - 1: eval/rewards/accuracies
120
+ 5: 1
121
+ 6:
122
+ - 1
123
+ - 1: eval/rewards/margins
124
+ 5: 1
125
+ 6:
126
+ - 1
127
+ - 1: eval/logps/rejected
128
+ 5: 1
129
+ 6:
130
+ - 1
131
+ - 1: eval/logps/chosen
132
+ 5: 1
133
+ 6:
134
+ - 1
135
+ - 1: eval/logits/rejected
136
+ 5: 1
137
+ 6:
138
+ - 1
139
+ - 1: eval/logits/chosen
140
+ 5: 1
141
+ 6:
142
+ - 1
143
+ vocab_size:
144
+ desc: null
145
+ value: 32000
146
+ max_position_embeddings:
147
+ desc: null
148
+ value: 4096
149
+ hidden_size:
150
+ desc: null
151
+ value: 4096
152
+ intermediate_size:
153
+ desc: null
154
+ value: 11008
155
+ num_hidden_layers:
156
+ desc: null
157
+ value: 32
158
+ num_attention_heads:
159
+ desc: null
160
+ value: 32
161
+ num_key_value_heads:
162
+ desc: null
163
+ value: 32
164
+ hidden_act:
165
+ desc: null
166
+ value: silu
167
+ initializer_range:
168
+ desc: null
169
+ value: 0.02
170
+ rms_norm_eps:
171
+ desc: null
172
+ value: 1.0e-05
173
+ pretraining_tp:
174
+ desc: null
175
+ value: 1
176
+ use_cache:
177
+ desc: null
178
+ value: false
179
+ rope_theta:
180
+ desc: null
181
+ value: 10000.0
182
+ rope_scaling:
183
+ desc: null
184
+ value: null
185
+ attention_bias:
186
+ desc: null
187
+ value: false
188
+ attention_dropout:
189
+ desc: null
190
+ value: 0.0
191
+ return_dict:
192
+ desc: null
193
+ value: true
194
+ output_hidden_states:
195
+ desc: null
196
+ value: false
197
+ output_attentions:
198
+ desc: null
199
+ value: false
200
+ torchscript:
201
+ desc: null
202
+ value: false
203
+ torch_dtype:
204
+ desc: null
205
+ value: float16
206
+ use_bfloat16:
207
+ desc: null
208
+ value: false
209
+ tf_legacy_loss:
210
+ desc: null
211
+ value: false
212
+ pruned_heads:
213
+ desc: null
214
+ value: {}
215
+ tie_word_embeddings:
216
+ desc: null
217
+ value: false
218
+ chunk_size_feed_forward:
219
+ desc: null
220
+ value: 0
221
+ is_encoder_decoder:
222
+ desc: null
223
+ value: false
224
+ is_decoder:
225
+ desc: null
226
+ value: false
227
+ cross_attention_hidden_size:
228
+ desc: null
229
+ value: null
230
+ add_cross_attention:
231
+ desc: null
232
+ value: false
233
+ tie_encoder_decoder:
234
+ desc: null
235
+ value: false
236
+ max_length:
237
+ desc: null
238
+ value: 20
239
+ min_length:
240
+ desc: null
241
+ value: 0
242
+ do_sample:
243
+ desc: null
244
+ value: false
245
+ early_stopping:
246
+ desc: null
247
+ value: false
248
+ num_beams:
249
+ desc: null
250
+ value: 1
251
+ num_beam_groups:
252
+ desc: null
253
+ value: 1
254
+ diversity_penalty:
255
+ desc: null
256
+ value: 0.0
257
+ temperature:
258
+ desc: null
259
+ value: 1.0
260
+ top_k:
261
+ desc: null
262
+ value: 50
263
+ top_p:
264
+ desc: null
265
+ value: 1.0
266
+ typical_p:
267
+ desc: null
268
+ value: 1.0
269
+ repetition_penalty:
270
+ desc: null
271
+ value: 1.0
272
+ length_penalty:
273
+ desc: null
274
+ value: 1.0
275
+ no_repeat_ngram_size:
276
+ desc: null
277
+ value: 0
278
+ encoder_no_repeat_ngram_size:
279
+ desc: null
280
+ value: 0
281
+ bad_words_ids:
282
+ desc: null
283
+ value: null
284
+ num_return_sequences:
285
+ desc: null
286
+ value: 1
287
+ output_scores:
288
+ desc: null
289
+ value: false
290
+ return_dict_in_generate:
291
+ desc: null
292
+ value: false
293
+ forced_bos_token_id:
294
+ desc: null
295
+ value: null
296
+ forced_eos_token_id:
297
+ desc: null
298
+ value: null
299
+ remove_invalid_values:
300
+ desc: null
301
+ value: false
302
+ exponential_decay_length_penalty:
303
+ desc: null
304
+ value: null
305
+ suppress_tokens:
306
+ desc: null
307
+ value: null
308
+ begin_suppress_tokens:
309
+ desc: null
310
+ value: null
311
+ architectures:
312
+ desc: null
313
+ value:
314
+ - LlamaForCausalLM
315
+ finetuning_task:
316
+ desc: null
317
+ value: null
318
+ id2label:
319
+ desc: null
320
+ value:
321
+ '0': LABEL_0
322
+ '1': LABEL_1
323
+ label2id:
324
+ desc: null
325
+ value:
326
+ LABEL_0: 0
327
+ LABEL_1: 1
328
+ tokenizer_class:
329
+ desc: null
330
+ value: null
331
+ prefix:
332
+ desc: null
333
+ value: null
334
+ bos_token_id:
335
+ desc: null
336
+ value: 1
337
+ pad_token_id:
338
+ desc: null
339
+ value: null
340
+ eos_token_id:
341
+ desc: null
342
+ value: 2
343
+ sep_token_id:
344
+ desc: null
345
+ value: null
346
+ decoder_start_token_id:
347
+ desc: null
348
+ value: null
349
+ task_specific_params:
350
+ desc: null
351
+ value: null
352
+ problem_type:
353
+ desc: null
354
+ value: null
355
+ _name_or_path:
356
+ desc: null
357
+ value: meta-llama/Llama-2-7b-chat-hf
358
+ transformers_version:
359
+ desc: null
360
+ value: 4.39.3
361
+ model_type:
362
+ desc: null
363
+ value: llama
364
+ quantization_config:
365
+ desc: null
366
+ value:
367
+ quant_method: QuantizationMethod.BITS_AND_BYTES
368
+ _load_in_8bit: false
369
+ _load_in_4bit: true
370
+ llm_int8_threshold: 6.0
371
+ llm_int8_skip_modules: null
372
+ llm_int8_enable_fp32_cpu_offload: false
373
+ llm_int8_has_fp16_weight: false
374
+ bnb_4bit_quant_type: nf4
375
+ bnb_4bit_use_double_quant: false
376
+ bnb_4bit_compute_dtype: float16
377
+ bnb_4bit_quant_storage: uint8
378
+ load_in_4bit: true
379
+ load_in_8bit: false
380
+ output_dir:
381
+ desc: null
382
+ value: /kaggle/working/
383
+ overwrite_output_dir:
384
+ desc: null
385
+ value: false
386
+ do_train:
387
+ desc: null
388
+ value: false
389
+ do_eval:
390
+ desc: null
391
+ value: true
392
+ do_predict:
393
+ desc: null
394
+ value: false
395
+ evaluation_strategy:
396
+ desc: null
397
+ value: steps
398
+ prediction_loss_only:
399
+ desc: null
400
+ value: false
401
+ per_device_train_batch_size:
402
+ desc: null
403
+ value: 1
404
+ per_device_eval_batch_size:
405
+ desc: null
406
+ value: 1
407
+ per_gpu_train_batch_size:
408
+ desc: null
409
+ value: null
410
+ per_gpu_eval_batch_size:
411
+ desc: null
412
+ value: null
413
+ gradient_accumulation_steps:
414
+ desc: null
415
+ value: 4
416
+ eval_accumulation_steps:
417
+ desc: null
418
+ value: null
419
+ eval_delay:
420
+ desc: null
421
+ value: 0
422
+ learning_rate:
423
+ desc: null
424
+ value: 2.0e-05
425
+ weight_decay:
426
+ desc: null
427
+ value: 0.0
428
+ adam_beta1:
429
+ desc: null
430
+ value: 0.9
431
+ adam_beta2:
432
+ desc: null
433
+ value: 0.999
434
+ adam_epsilon:
435
+ desc: null
436
+ value: 1.0e-08
437
+ max_grad_norm:
438
+ desc: null
439
+ value: 1.0
440
+ num_train_epochs:
441
+ desc: null
442
+ value: 1
443
+ max_steps:
444
+ desc: null
445
+ value: -1
446
+ lr_scheduler_type:
447
+ desc: null
448
+ value: cosine
449
+ lr_scheduler_kwargs:
450
+ desc: null
451
+ value: {}
452
+ warmup_ratio:
453
+ desc: null
454
+ value: 0.1
455
+ warmup_steps:
456
+ desc: null
457
+ value: 0
458
+ log_level:
459
+ desc: null
460
+ value: info
461
+ log_level_replica:
462
+ desc: null
463
+ value: warning
464
+ log_on_each_node:
465
+ desc: null
466
+ value: true
467
+ logging_dir:
468
+ desc: null
469
+ value: /kaggle/working/runs/May22_05-46-00_92dcc4555414
470
+ logging_strategy:
471
+ desc: null
472
+ value: steps
473
+ logging_first_step:
474
+ desc: null
475
+ value: false
476
+ logging_steps:
477
+ desc: null
478
+ value: 10
479
+ logging_nan_inf_filter:
480
+ desc: null
481
+ value: true
482
+ save_strategy:
483
+ desc: null
484
+ value: steps
485
+ save_steps:
486
+ desc: null
487
+ value: 100
488
+ save_total_limit:
489
+ desc: null
490
+ value: 1
491
+ save_safetensors:
492
+ desc: null
493
+ value: true
494
+ save_on_each_node:
495
+ desc: null
496
+ value: false
497
+ save_only_model:
498
+ desc: null
499
+ value: false
500
+ no_cuda:
501
+ desc: null
502
+ value: false
503
+ use_cpu:
504
+ desc: null
505
+ value: false
506
+ use_mps_device:
507
+ desc: null
508
+ value: false
509
+ seed:
510
+ desc: null
511
+ value: 42
512
+ data_seed:
513
+ desc: null
514
+ value: null
515
+ jit_mode_eval:
516
+ desc: null
517
+ value: false
518
+ use_ipex:
519
+ desc: null
520
+ value: false
521
+ bf16:
522
+ desc: null
523
+ value: false
524
+ fp16:
525
+ desc: null
526
+ value: true
527
+ fp16_opt_level:
528
+ desc: null
529
+ value: O1
530
+ half_precision_backend:
531
+ desc: null
532
+ value: auto
533
+ bf16_full_eval:
534
+ desc: null
535
+ value: false
536
+ fp16_full_eval:
537
+ desc: null
538
+ value: false
539
+ tf32:
540
+ desc: null
541
+ value: null
542
+ local_rank:
543
+ desc: null
544
+ value: 0
545
+ ddp_backend:
546
+ desc: null
547
+ value: null
548
+ tpu_num_cores:
549
+ desc: null
550
+ value: null
551
+ tpu_metrics_debug:
552
+ desc: null
553
+ value: false
554
+ debug:
555
+ desc: null
556
+ value: []
557
+ dataloader_drop_last:
558
+ desc: null
559
+ value: false
560
+ eval_steps:
561
+ desc: null
562
+ value: 100
563
+ dataloader_num_workers:
564
+ desc: null
565
+ value: 0
566
+ dataloader_prefetch_factor:
567
+ desc: null
568
+ value: null
569
+ past_index:
570
+ desc: null
571
+ value: -1
572
+ run_name:
573
+ desc: null
574
+ value: /kaggle/working/
575
+ disable_tqdm:
576
+ desc: null
577
+ value: false
578
+ remove_unused_columns:
579
+ desc: null
580
+ value: false
581
+ label_names:
582
+ desc: null
583
+ value: null
584
+ load_best_model_at_end:
585
+ desc: null
586
+ value: false
587
+ metric_for_best_model:
588
+ desc: null
589
+ value: null
590
+ greater_is_better:
591
+ desc: null
592
+ value: null
593
+ ignore_data_skip:
594
+ desc: null
595
+ value: false
596
+ fsdp:
597
+ desc: null
598
+ value: []
599
+ fsdp_min_num_params:
600
+ desc: null
601
+ value: 0
602
+ fsdp_config:
603
+ desc: null
604
+ value:
605
+ min_num_params: 0
606
+ xla: false
607
+ xla_fsdp_v2: false
608
+ xla_fsdp_grad_ckpt: false
609
+ fsdp_transformer_layer_cls_to_wrap:
610
+ desc: null
611
+ value: null
612
+ accelerator_config:
613
+ desc: null
614
+ value:
615
+ split_batches: false
616
+ dispatch_batches: null
617
+ even_batches: true
618
+ use_seedable_sampler: true
619
+ deepspeed:
620
+ desc: null
621
+ value: null
622
+ label_smoothing_factor:
623
+ desc: null
624
+ value: 0.0
625
+ optim:
626
+ desc: null
627
+ value: paged_adamw_32bit
628
+ optim_args:
629
+ desc: null
630
+ value: null
631
+ adafactor:
632
+ desc: null
633
+ value: false
634
+ group_by_length:
635
+ desc: null
636
+ value: false
637
+ length_column_name:
638
+ desc: null
639
+ value: length
640
+ report_to:
641
+ desc: null
642
+ value:
643
+ - tensorboard
644
+ - wandb
645
+ ddp_find_unused_parameters:
646
+ desc: null
647
+ value: null
648
+ ddp_bucket_cap_mb:
649
+ desc: null
650
+ value: null
651
+ ddp_broadcast_buffers:
652
+ desc: null
653
+ value: null
654
+ dataloader_pin_memory:
655
+ desc: null
656
+ value: true
657
+ dataloader_persistent_workers:
658
+ desc: null
659
+ value: false
660
+ skip_memory_metrics:
661
+ desc: null
662
+ value: true
663
+ use_legacy_prediction_loop:
664
+ desc: null
665
+ value: false
666
+ push_to_hub:
667
+ desc: null
668
+ value: true
669
+ resume_from_checkpoint:
670
+ desc: null
671
+ value: null
672
+ hub_model_id:
673
+ desc: null
674
+ value: VanCan23/DPO_Vietnamese_chatbot_lessData
675
+ hub_strategy:
676
+ desc: null
677
+ value: every_save
678
+ hub_token:
679
+ desc: null
680
+ value: <HUB_TOKEN>
681
+ hub_private_repo:
682
+ desc: null
683
+ value: false
684
+ hub_always_push:
685
+ desc: null
686
+ value: false
687
+ gradient_checkpointing:
688
+ desc: null
689
+ value: true
690
+ gradient_checkpointing_kwargs:
691
+ desc: null
692
+ value:
693
+ use_reentrant: false
694
+ include_inputs_for_metrics:
695
+ desc: null
696
+ value: false
697
+ fp16_backend:
698
+ desc: null
699
+ value: auto
700
+ push_to_hub_model_id:
701
+ desc: null
702
+ value: null
703
+ push_to_hub_organization:
704
+ desc: null
705
+ value: null
706
+ push_to_hub_token:
707
+ desc: null
708
+ value: <PUSH_TO_HUB_TOKEN>
709
+ mp_parameters:
710
+ desc: null
711
+ value: ''
712
+ auto_find_batch_size:
713
+ desc: null
714
+ value: false
715
+ full_determinism:
716
+ desc: null
717
+ value: false
718
+ torchdynamo:
719
+ desc: null
720
+ value: null
721
+ ray_scope:
722
+ desc: null
723
+ value: last
724
+ ddp_timeout:
725
+ desc: null
726
+ value: 1800
727
+ torch_compile:
728
+ desc: null
729
+ value: false
730
+ torch_compile_backend:
731
+ desc: null
732
+ value: null
733
+ torch_compile_mode:
734
+ desc: null
735
+ value: null
736
+ dispatch_batches:
737
+ desc: null
738
+ value: null
739
+ split_batches:
740
+ desc: null
741
+ value: null
742
+ include_tokens_per_second:
743
+ desc: null
744
+ value: false
745
+ include_num_input_tokens_seen:
746
+ desc: null
747
+ value: false
748
+ neftune_noise_alpha:
749
+ desc: null
750
+ value: null
751
+ optim_target_modules:
752
+ desc: null
753
+ value: null
wandb/run-20240522_054348-vgrzs6jq/files/output.log ADDED
@@ -0,0 +1,624 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
2
+ Token is valid (permission: write).
3
+ Your token has been saved to /root/.cache/huggingface/token
4
+ Login successful
5
+ 2024-05-22 05:44:14.340486: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
6
+ 2024-05-22 05:44:14.340587: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
7
+ 2024-05-22 05:44:14.458170: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
8
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
9
+ Downloading data: 100%|██████████| 84.7M/84.7M [00:00<00:00, 120MB/s]
10
+ chosen_en : Mexico, especially when they involve international travelers, time is of the essence as each passing day makes the search harder and leads grow cold," said Detective Sanchez with a heavy heart, as he closed the folder on Harry Devert's case and glanced out the window, the setting sun casting a somber glow over a landscape that still held its secrets tight and unyielding - a stark reminder that for some journeys, the road goes on forever and the destination remains unreachable.
11
+ system_en : You are an AI assistant. You will be given a task. You must generate a detailed and long answer.
12
+ question_vi : Viết câu cuối cùng trong câu chuyện này. (CNN) – Hành trình đi xe máy của Harry Devert từ Hoa Kỳ đến Châu Mỹ Latinh phần nào gợi nhớ đến “Nhật ký xe máy”, kể lại chuyến du hành Nam Mỹ của nhà cách mạng Ernesto “Che” Guevara trên một chiếc xe máy cũ để tìm kiếm tinh thần nổi dậy của mình. Nhưng Devert, 32 tuổi, người đã rời bỏ công việc kinh doanh tài chính để đi du lịch khắp thế giới, đã không liên lạc với mẹ hoặc bạn gái của mình ở New York kể từ ngày 25 tháng 1. Ngày hôm đó, anh gửi cho bạn gái Sarah Ashley Schiear một tin nhắn đáng ngại qua WhatsApp. ứng dụng nhắn tin. Tin nhắn cho biết: “Tôi vừa phải hộ tống một tiếng rưỡi dài ra khỏi một khu vực nào đó, điều đó quá nguy hiểm đối với tôi”. "Dừng ăn trưa và ... thì đấy Internet. ... Sẽ sớm quay lại đường thôi. Hình như có một đội hộ tống quân sự khác đang đợi tôi ở một thị trấn khác nào đó... Tôi đến muộn vì mấy chuyện quân sự điên rồ. ..hy vọng có cơ hội nói chuyện với bạn tối nay khi tôi (hy vọng) cuối cùng cũng đến nơi." “Đối với trường hợp người mất tích ở
13
+ rejected_en : ... the search for Harry Devert, who embarked on a motorcycle journey from the United States to Latin America, has taken a dire turn. Despite numerous attempts to contact him, his family and friends have received no word from him since the ominous text message on January 25th, in which he mentioned military escorts and delays. As the days go by, the concern for Harry's safety grows, and the authorities have launched a full-scale investigation into his disappearance. The last known location of Harry's motorcycle was tracked to a small town in a remote region of Latin America, where eyewitnesses reported seeing him being escorted by military personnel. The search for Harry continues, with hopes that he may still be alive and in need of urgent assistance. The family and friends of Harry Devert remain vigilant, holding onto the hope that he will soon be found and brought home safely.
14
+ system_vi : Bạn là một trợ lý AI. Bạn sẽ được giao một nhiệm vụ. Bạn phải tạo ra một câu trả lời chi tiết và dài.
15
+ question_en : Write the last sentence in this story.
16
+ (CNN) -- Harry Devert's motorcycle journey from the United States to Latin America is somewhat reminiscent of "The Motorcycle Diaries," which recount the South American travels of revolutionary Ernesto "Che" Guevara on an old motorbike in search of his insurgent spirit. But Devert, 32, who left a job as a trader in finance to travel the world, has not been in touch with his mother or girlfriend in New York since January 25. That day he sent girlfriend Sarah Ashley Schiear an ominous text via the WhatsApp messenger app. "Just got an hour and a half long escort out of some area it was too dangerous for me to be," the message said. "Stopping for lunch and ... voila Internet. ... Gonna get back on the road soon. Apparently there's another military escort waiting for me in some other town... I'm running way late because of the crazy military stuff...hopefully get a chance to talk to you tonight when I (hopefully) finally arrive."
17
+ "For missing person cases in
18
+ chosen_vi : Mexico, đặc biệt là khi chúng liên quan đến du khách quốc tế, thời gian là điều cốt yếu vì mỗi ngày trôi qua khiến việc tìm kiếm trở nên khó khăn hơn và các manh mối ngày càng trở nên mờ nhạt,” Thám tử Sanchez nói với trái tim trĩu nặng khi đóng tập hồ sơ về vụ án của Harry Devert và liếc ra ngoài cửa sổ. , mặt trời lặn tỏa ánh sáng ảm đạm lên một khung cảnh vẫn còn nắm giữ những bí mật chặt chẽ và kiên cường - một lời nhắc nhở rõ ràng rằng đối với một số hành trình, con đường sẽ kéo dài mãi mãi và đích đến vẫn không thể đến được.
19
+ rejected_vi : ... cuộc tìm kiếm Harry Devert, người bắt đầu cuộc hành trình bằng mô tô từ Hoa Kỳ đến Châu Mỹ Latinh, đã có một bước ngoặt thảm khốc. Bất chấp nhiều nỗ lực liên lạc với anh ấy, gia đình và bạn bè của anh ấy vẫn không nhận được tin tức gì từ anh ấy kể từ tin nhắn đáng lo ngại vào ngày 25 tháng 1, trong đó anh ấy đề cập đến việc hộ tống quân sự và sự chậm trễ. Ngày tháng trôi qua, mối lo ngại về sự an toàn của Harry ngày càng tăng và chính quyền đã mở một cuộc điều tra toàn diện về sự mất tích của anh. Vị trí cuối cùng được biết đến của chiếc xe máy của Harry được theo dõi đến một thị trấn nhỏ ở một vùng xa xôi của châu Mỹ Latinh, nơi các nhân chứng cho biết đã nhìn thấy anh ta được quân nhân hộ tống. Cuộc tìm kiếm Harry vẫn tiếp tục với hy vọng rằng cậu bé có thể vẫn còn sống và cần được hỗ trợ khẩn cấp. Gia đình và bạn bè của Harry Devert vẫn cảnh giác, nuôi hy vọng rằng anh sẽ sớm được tìm thấy và đưa về nhà an toàn.
20
+ Adapter weights model repo: VanCan23/SFTDPO_3epoch_adapter
21
+ Base model weights model repo: meta-llama/Llama-2-7b-chat-hf
22
+ base_model.model.model.embed_tokens.weight False
23
+ base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight False
24
+ base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight False
25
+ base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight False
26
+ base_model.model.model.layers.0.self_attn.k_proj.base_layer.weight False
27
+ base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.weight False
28
+ base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.weight False
29
+ base_model.model.model.layers.0.self_attn.v_proj.base_layer.weight False
30
+ base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight False
31
+ base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight False
32
+ base_model.model.model.layers.0.self_attn.o_proj.base_layer.weight False
33
+ base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight False
34
+ base_model.model.model.layers.0.self_attn.o_proj.lora_B.default.weight False
35
+ base_model.model.model.layers.0.mlp.gate_proj.weight False
36
+ base_model.model.model.layers.0.mlp.up_proj.weight False
37
+ base_model.model.model.layers.0.mlp.down_proj.weight False
38
+ base_model.model.model.layers.0.input_layernorm.weight False
39
+ base_model.model.model.layers.0.post_attention_layernorm.weight False
40
+ base_model.model.model.layers.1.self_attn.q_proj.base_layer.weight False
41
+ base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.weight False
42
+ base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.weight False
43
+ base_model.model.model.layers.1.self_attn.k_proj.base_layer.weight False
44
+ base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.weight False
45
+ base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.weight False
46
+ base_model.model.model.layers.1.self_attn.v_proj.base_layer.weight False
47
+ base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.weight False
48
+ base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.weight False
49
+ base_model.model.model.layers.1.self_attn.o_proj.base_layer.weight False
50
+ base_model.model.model.layers.1.self_attn.o_proj.lora_A.default.weight False
51
+ base_model.model.model.layers.1.self_attn.o_proj.lora_B.default.weight False
52
+ base_model.model.model.layers.1.mlp.gate_proj.weight False
53
+ base_model.model.model.layers.1.mlp.up_proj.weight False
54
+ base_model.model.model.layers.1.mlp.down_proj.weight False
55
+ base_model.model.model.layers.1.input_layernorm.weight False
56
+ base_model.model.model.layers.1.post_attention_layernorm.weight False
57
+ base_model.model.model.layers.2.self_attn.q_proj.base_layer.weight False
58
+ base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.weight False
59
+ base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.weight False
60
+ base_model.model.model.layers.2.self_attn.k_proj.base_layer.weight False
61
+ base_model.model.model.layers.2.self_attn.k_proj.lora_A.default.weight False
62
+ base_model.model.model.layers.2.self_attn.k_proj.lora_B.default.weight False
63
+ base_model.model.model.layers.2.self_attn.v_proj.base_layer.weight False
64
+ base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.weight False
65
+ base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.weight False
66
+ base_model.model.model.layers.2.self_attn.o_proj.base_layer.weight False
67
+ base_model.model.model.layers.2.self_attn.o_proj.lora_A.default.weight False
68
+ base_model.model.model.layers.2.self_attn.o_proj.lora_B.default.weight False
69
+ base_model.model.model.layers.2.mlp.gate_proj.weight False
70
+ base_model.model.model.layers.2.mlp.up_proj.weight False
71
+ base_model.model.model.layers.2.mlp.down_proj.weight False
72
+ base_model.model.model.layers.2.input_layernorm.weight False
73
+ base_model.model.model.layers.2.post_attention_layernorm.weight False
74
+ base_model.model.model.layers.3.self_attn.q_proj.base_layer.weight False
75
+ base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.weight False
76
+ base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.weight False
77
+ base_model.model.model.layers.3.self_attn.k_proj.base_layer.weight False
78
+ base_model.model.model.layers.3.self_attn.k_proj.lora_A.default.weight False
79
+ base_model.model.model.layers.3.self_attn.k_proj.lora_B.default.weight False
80
+ base_model.model.model.layers.3.self_attn.v_proj.base_layer.weight False
81
+ base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.weight False
82
+ base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.weight False
83
+ base_model.model.model.layers.3.self_attn.o_proj.base_layer.weight False
84
+ base_model.model.model.layers.3.self_attn.o_proj.lora_A.default.weight False
85
+ base_model.model.model.layers.3.self_attn.o_proj.lora_B.default.weight False
86
+ base_model.model.model.layers.3.mlp.gate_proj.weight False
87
+ base_model.model.model.layers.3.mlp.up_proj.weight False
88
+ base_model.model.model.layers.3.mlp.down_proj.weight False
89
+ base_model.model.model.layers.3.input_layernorm.weight False
90
+ base_model.model.model.layers.3.post_attention_layernorm.weight False
91
+ base_model.model.model.layers.4.self_attn.q_proj.base_layer.weight False
92
+ base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.weight False
93
+ base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.weight False
94
+ base_model.model.model.layers.4.self_attn.k_proj.base_layer.weight False
95
+ base_model.model.model.layers.4.self_attn.k_proj.lora_A.default.weight False
96
+ base_model.model.model.layers.4.self_attn.k_proj.lora_B.default.weight False
97
+ base_model.model.model.layers.4.self_attn.v_proj.base_layer.weight False
98
+ base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.weight False
99
+ base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.weight False
100
+ base_model.model.model.layers.4.self_attn.o_proj.base_layer.weight False
101
+ base_model.model.model.layers.4.self_attn.o_proj.lora_A.default.weight False
102
+ base_model.model.model.layers.4.self_attn.o_proj.lora_B.default.weight False
103
+ base_model.model.model.layers.4.mlp.gate_proj.weight False
104
+ base_model.model.model.layers.4.mlp.up_proj.weight False
105
+ base_model.model.model.layers.4.mlp.down_proj.weight False
106
+ base_model.model.model.layers.4.input_layernorm.weight False
107
+ base_model.model.model.layers.4.post_attention_layernorm.weight False
108
+ base_model.model.model.layers.5.self_attn.q_proj.base_layer.weight False
109
+ base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.weight False
110
+ base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.weight False
111
+ base_model.model.model.layers.5.self_attn.k_proj.base_layer.weight False
112
+ base_model.model.model.layers.5.self_attn.k_proj.lora_A.default.weight False
113
+ base_model.model.model.layers.5.self_attn.k_proj.lora_B.default.weight False
114
+ base_model.model.model.layers.5.self_attn.v_proj.base_layer.weight False
115
+ base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.weight False
116
+ base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.weight False
117
+ base_model.model.model.layers.5.self_attn.o_proj.base_layer.weight False
118
+ base_model.model.model.layers.5.self_attn.o_proj.lora_A.default.weight False
119
+ base_model.model.model.layers.5.self_attn.o_proj.lora_B.default.weight False
120
+ base_model.model.model.layers.5.mlp.gate_proj.weight False
121
+ base_model.model.model.layers.5.mlp.up_proj.weight False
122
+ base_model.model.model.layers.5.mlp.down_proj.weight False
123
+ base_model.model.model.layers.5.input_layernorm.weight False
124
+ base_model.model.model.layers.5.post_attention_layernorm.weight False
125
+ base_model.model.model.layers.6.self_attn.q_proj.base_layer.weight False
126
+ base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.weight False
127
+ base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.weight False
128
+ base_model.model.model.layers.6.self_attn.k_proj.base_layer.weight False
129
+ base_model.model.model.layers.6.self_attn.k_proj.lora_A.default.weight False
130
+ base_model.model.model.layers.6.self_attn.k_proj.lora_B.default.weight False
131
+ base_model.model.model.layers.6.self_attn.v_proj.base_layer.weight False
132
+ base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.weight False
133
+ base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.weight False
134
+ base_model.model.model.layers.6.self_attn.o_proj.base_layer.weight False
135
+ base_model.model.model.layers.6.self_attn.o_proj.lora_A.default.weight False
136
+ base_model.model.model.layers.6.self_attn.o_proj.lora_B.default.weight False
137
+ base_model.model.model.layers.6.mlp.gate_proj.weight False
138
+ base_model.model.model.layers.6.mlp.up_proj.weight False
139
+ base_model.model.model.layers.6.mlp.down_proj.weight False
140
+ base_model.model.model.layers.6.input_layernorm.weight False
141
+ base_model.model.model.layers.6.post_attention_layernorm.weight False
142
+ base_model.model.model.layers.7.self_attn.q_proj.base_layer.weight False
143
+ base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.weight False
144
+ base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.weight False
145
+ base_model.model.model.layers.7.self_attn.k_proj.base_layer.weight False
146
+ base_model.model.model.layers.7.self_attn.k_proj.lora_A.default.weight False
147
+ base_model.model.model.layers.7.self_attn.k_proj.lora_B.default.weight False
148
+ base_model.model.model.layers.7.self_attn.v_proj.base_layer.weight False
149
+ base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.weight False
150
+ base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.weight False
151
+ base_model.model.model.layers.7.self_attn.o_proj.base_layer.weight False
152
+ base_model.model.model.layers.7.self_attn.o_proj.lora_A.default.weight False
153
+ base_model.model.model.layers.7.self_attn.o_proj.lora_B.default.weight False
154
+ base_model.model.model.layers.7.mlp.gate_proj.weight False
155
+ base_model.model.model.layers.7.mlp.up_proj.weight False
156
+ base_model.model.model.layers.7.mlp.down_proj.weight False
157
+ base_model.model.model.layers.7.input_layernorm.weight False
158
+ base_model.model.model.layers.7.post_attention_layernorm.weight False
159
+ base_model.model.model.layers.8.self_attn.q_proj.base_layer.weight False
160
+ base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.weight False
161
+ base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.weight False
162
+ base_model.model.model.layers.8.self_attn.k_proj.base_layer.weight False
163
+ base_model.model.model.layers.8.self_attn.k_proj.lora_A.default.weight False
164
+ base_model.model.model.layers.8.self_attn.k_proj.lora_B.default.weight False
165
+ base_model.model.model.layers.8.self_attn.v_proj.base_layer.weight False
166
+ base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.weight False
167
+ base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.weight False
168
+ base_model.model.model.layers.8.self_attn.o_proj.base_layer.weight False
169
+ base_model.model.model.layers.8.self_attn.o_proj.lora_A.default.weight False
170
+ base_model.model.model.layers.8.self_attn.o_proj.lora_B.default.weight False
171
+ base_model.model.model.layers.8.mlp.gate_proj.weight False
172
+ base_model.model.model.layers.8.mlp.up_proj.weight False
173
+ base_model.model.model.layers.8.mlp.down_proj.weight False
174
+ base_model.model.model.layers.8.input_layernorm.weight False
175
+ base_model.model.model.layers.8.post_attention_layernorm.weight False
176
+ base_model.model.model.layers.9.self_attn.q_proj.base_layer.weight False
177
+ base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.weight False
178
+ base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.weight False
179
+ base_model.model.model.layers.9.self_attn.k_proj.base_layer.weight False
180
+ base_model.model.model.layers.9.self_attn.k_proj.lora_A.default.weight False
181
+ base_model.model.model.layers.9.self_attn.k_proj.lora_B.default.weight False
182
+ base_model.model.model.layers.9.self_attn.v_proj.base_layer.weight False
183
+ base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.weight False
184
+ base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.weight False
185
+ base_model.model.model.layers.9.self_attn.o_proj.base_layer.weight False
186
+ base_model.model.model.layers.9.self_attn.o_proj.lora_A.default.weight False
187
+ base_model.model.model.layers.9.self_attn.o_proj.lora_B.default.weight False
188
+ base_model.model.model.layers.9.mlp.gate_proj.weight False
189
+ base_model.model.model.layers.9.mlp.up_proj.weight False
190
+ base_model.model.model.layers.9.mlp.down_proj.weight False
191
+ base_model.model.model.layers.9.input_layernorm.weight False
192
+ base_model.model.model.layers.9.post_attention_layernorm.weight False
193
+ base_model.model.model.layers.10.self_attn.q_proj.base_layer.weight False
194
+ base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.weight False
195
+ base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.weight False
196
+ base_model.model.model.layers.10.self_attn.k_proj.base_layer.weight False
197
+ base_model.model.model.layers.10.self_attn.k_proj.lora_A.default.weight False
198
+ base_model.model.model.layers.10.self_attn.k_proj.lora_B.default.weight False
199
+ base_model.model.model.layers.10.self_attn.v_proj.base_layer.weight False
200
+ base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.weight False
201
+ base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.weight False
202
+ base_model.model.model.layers.10.self_attn.o_proj.base_layer.weight False
203
+ base_model.model.model.layers.10.self_attn.o_proj.lora_A.default.weight False
204
+ base_model.model.model.layers.10.self_attn.o_proj.lora_B.default.weight False
205
+ base_model.model.model.layers.10.mlp.gate_proj.weight False
206
+ base_model.model.model.layers.10.mlp.up_proj.weight False
207
+ base_model.model.model.layers.10.mlp.down_proj.weight False
208
+ base_model.model.model.layers.10.input_layernorm.weight False
209
+ base_model.model.model.layers.10.post_attention_layernorm.weight False
210
+ base_model.model.model.layers.11.self_attn.q_proj.base_layer.weight False
211
+ base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.weight False
212
+ base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.weight False
213
+ base_model.model.model.layers.11.self_attn.k_proj.base_layer.weight False
214
+ base_model.model.model.layers.11.self_attn.k_proj.lora_A.default.weight False
215
+ base_model.model.model.layers.11.self_attn.k_proj.lora_B.default.weight False
216
+ base_model.model.model.layers.11.self_attn.v_proj.base_layer.weight False
217
+ base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.weight False
218
+ base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.weight False
219
+ base_model.model.model.layers.11.self_attn.o_proj.base_layer.weight False
220
+ base_model.model.model.layers.11.self_attn.o_proj.lora_A.default.weight False
221
+ base_model.model.model.layers.11.self_attn.o_proj.lora_B.default.weight False
222
+ base_model.model.model.layers.11.mlp.gate_proj.weight False
223
+ base_model.model.model.layers.11.mlp.up_proj.weight False
224
+ base_model.model.model.layers.11.mlp.down_proj.weight False
225
+ base_model.model.model.layers.11.input_layernorm.weight False
226
+ base_model.model.model.layers.11.post_attention_layernorm.weight False
227
+ base_model.model.model.layers.12.self_attn.q_proj.base_layer.weight False
228
+ base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.weight False
229
+ base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.weight False
230
+ base_model.model.model.layers.12.self_attn.k_proj.base_layer.weight False
231
+ base_model.model.model.layers.12.self_attn.k_proj.lora_A.default.weight False
232
+ base_model.model.model.layers.12.self_attn.k_proj.lora_B.default.weight False
233
+ base_model.model.model.layers.12.self_attn.v_proj.base_layer.weight False
234
+ base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.weight False
235
+ base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.weight False
236
+ base_model.model.model.layers.12.self_attn.o_proj.base_layer.weight False
237
+ base_model.model.model.layers.12.self_attn.o_proj.lora_A.default.weight False
238
+ base_model.model.model.layers.12.self_attn.o_proj.lora_B.default.weight False
239
+ base_model.model.model.layers.12.mlp.gate_proj.weight False
240
+ base_model.model.model.layers.12.mlp.up_proj.weight False
241
+ base_model.model.model.layers.12.mlp.down_proj.weight False
242
+ base_model.model.model.layers.12.input_layernorm.weight False
243
+ base_model.model.model.layers.12.post_attention_layernorm.weight False
244
+ base_model.model.model.layers.13.self_attn.q_proj.base_layer.weight False
245
+ base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.weight False
246
+ base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.weight False
247
+ base_model.model.model.layers.13.self_attn.k_proj.base_layer.weight False
248
+ base_model.model.model.layers.13.self_attn.k_proj.lora_A.default.weight False
249
+ base_model.model.model.layers.13.self_attn.k_proj.lora_B.default.weight False
250
+ base_model.model.model.layers.13.self_attn.v_proj.base_layer.weight False
251
+ base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.weight False
252
+ base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.weight False
253
+ base_model.model.model.layers.13.self_attn.o_proj.base_layer.weight False
254
+ base_model.model.model.layers.13.self_attn.o_proj.lora_A.default.weight False
255
+ base_model.model.model.layers.13.self_attn.o_proj.lora_B.default.weight False
256
+ base_model.model.model.layers.13.mlp.gate_proj.weight False
257
+ base_model.model.model.layers.13.mlp.up_proj.weight False
258
+ base_model.model.model.layers.13.mlp.down_proj.weight False
259
+ base_model.model.model.layers.13.input_layernorm.weight False
260
+ base_model.model.model.layers.13.post_attention_layernorm.weight False
261
+ base_model.model.model.layers.14.self_attn.q_proj.base_layer.weight False
262
+ base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.weight False
263
+ base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.weight False
264
+ base_model.model.model.layers.14.self_attn.k_proj.base_layer.weight False
265
+ base_model.model.model.layers.14.self_attn.k_proj.lora_A.default.weight False
266
+ base_model.model.model.layers.14.self_attn.k_proj.lora_B.default.weight False
267
+ base_model.model.model.layers.14.self_attn.v_proj.base_layer.weight False
268
+ base_model.model.model.layers.14.self_attn.v_proj.lora_A.default.weight False
269
+ base_model.model.model.layers.14.self_attn.v_proj.lora_B.default.weight False
270
+ base_model.model.model.layers.14.self_attn.o_proj.base_layer.weight False
271
+ base_model.model.model.layers.14.self_attn.o_proj.lora_A.default.weight False
272
+ base_model.model.model.layers.14.self_attn.o_proj.lora_B.default.weight False
273
+ base_model.model.model.layers.14.mlp.gate_proj.weight False
274
+ base_model.model.model.layers.14.mlp.up_proj.weight False
275
+ base_model.model.model.layers.14.mlp.down_proj.weight False
276
+ base_model.model.model.layers.14.input_layernorm.weight False
277
+ base_model.model.model.layers.14.post_attention_layernorm.weight False
278
+ base_model.model.model.layers.15.self_attn.q_proj.base_layer.weight False
279
+ base_model.model.model.layers.15.self_attn.q_proj.lora_A.default.weight False
280
+ base_model.model.model.layers.15.self_attn.q_proj.lora_B.default.weight False
281
+ base_model.model.model.layers.15.self_attn.k_proj.base_layer.weight False
282
+ base_model.model.model.layers.15.self_attn.k_proj.lora_A.default.weight False
283
+ base_model.model.model.layers.15.self_attn.k_proj.lora_B.default.weight False
284
+ base_model.model.model.layers.15.self_attn.v_proj.base_layer.weight False
285
+ base_model.model.model.layers.15.self_attn.v_proj.lora_A.default.weight False
286
+ base_model.model.model.layers.15.self_attn.v_proj.lora_B.default.weight False
287
+ base_model.model.model.layers.15.self_attn.o_proj.base_layer.weight False
288
+ base_model.model.model.layers.15.self_attn.o_proj.lora_A.default.weight False
289
+ base_model.model.model.layers.15.self_attn.o_proj.lora_B.default.weight False
290
+ base_model.model.model.layers.15.mlp.gate_proj.weight False
291
+ base_model.model.model.layers.15.mlp.up_proj.weight False
292
+ base_model.model.model.layers.15.mlp.down_proj.weight False
293
+ base_model.model.model.layers.15.input_layernorm.weight False
294
+ base_model.model.model.layers.15.post_attention_layernorm.weight False
295
+ base_model.model.model.layers.16.self_attn.q_proj.base_layer.weight False
296
+ base_model.model.model.layers.16.self_attn.q_proj.lora_A.default.weight False
297
+ base_model.model.model.layers.16.self_attn.q_proj.lora_B.default.weight False
298
+ base_model.model.model.layers.16.self_attn.k_proj.base_layer.weight False
299
+ base_model.model.model.layers.16.self_attn.k_proj.lora_A.default.weight False
300
+ base_model.model.model.layers.16.self_attn.k_proj.lora_B.default.weight False
301
+ base_model.model.model.layers.16.self_attn.v_proj.base_layer.weight False
302
+ base_model.model.model.layers.16.self_attn.v_proj.lora_A.default.weight False
303
+ base_model.model.model.layers.16.self_attn.v_proj.lora_B.default.weight False
304
+ base_model.model.model.layers.16.self_attn.o_proj.base_layer.weight False
305
+ base_model.model.model.layers.16.self_attn.o_proj.lora_A.default.weight False
306
+ base_model.model.model.layers.16.self_attn.o_proj.lora_B.default.weight False
307
+ base_model.model.model.layers.16.mlp.gate_proj.weight False
308
+ base_model.model.model.layers.16.mlp.up_proj.weight False
309
+ base_model.model.model.layers.16.mlp.down_proj.weight False
310
+ base_model.model.model.layers.16.input_layernorm.weight False
311
+ base_model.model.model.layers.16.post_attention_layernorm.weight False
312
+ base_model.model.model.layers.17.self_attn.q_proj.base_layer.weight False
313
+ base_model.model.model.layers.17.self_attn.q_proj.lora_A.default.weight False
314
+ base_model.model.model.layers.17.self_attn.q_proj.lora_B.default.weight False
315
+ base_model.model.model.layers.17.self_attn.k_proj.base_layer.weight False
316
+ base_model.model.model.layers.17.self_attn.k_proj.lora_A.default.weight False
317
+ base_model.model.model.layers.17.self_attn.k_proj.lora_B.default.weight False
318
+ base_model.model.model.layers.17.self_attn.v_proj.base_layer.weight False
319
+ base_model.model.model.layers.17.self_attn.v_proj.lora_A.default.weight False
320
+ base_model.model.model.layers.17.self_attn.v_proj.lora_B.default.weight False
321
+ base_model.model.model.layers.17.self_attn.o_proj.base_layer.weight False
322
+ base_model.model.model.layers.17.self_attn.o_proj.lora_A.default.weight False
323
+ base_model.model.model.layers.17.self_attn.o_proj.lora_B.default.weight False
324
+ base_model.model.model.layers.17.mlp.gate_proj.weight False
325
+ base_model.model.model.layers.17.mlp.up_proj.weight False
326
+ base_model.model.model.layers.17.mlp.down_proj.weight False
327
+ base_model.model.model.layers.17.input_layernorm.weight False
328
+ base_model.model.model.layers.17.post_attention_layernorm.weight False
329
+ base_model.model.model.layers.18.self_attn.q_proj.base_layer.weight False
330
+ base_model.model.model.layers.18.self_attn.q_proj.lora_A.default.weight False
331
+ base_model.model.model.layers.18.self_attn.q_proj.lora_B.default.weight False
332
+ base_model.model.model.layers.18.self_attn.k_proj.base_layer.weight False
333
+ base_model.model.model.layers.18.self_attn.k_proj.lora_A.default.weight False
334
+ base_model.model.model.layers.18.self_attn.k_proj.lora_B.default.weight False
335
+ base_model.model.model.layers.18.self_attn.v_proj.base_layer.weight False
336
+ base_model.model.model.layers.18.self_attn.v_proj.lora_A.default.weight False
337
+ base_model.model.model.layers.18.self_attn.v_proj.lora_B.default.weight False
338
+ base_model.model.model.layers.18.self_attn.o_proj.base_layer.weight False
339
+ base_model.model.model.layers.18.self_attn.o_proj.lora_A.default.weight False
340
+ base_model.model.model.layers.18.self_attn.o_proj.lora_B.default.weight False
341
+ base_model.model.model.layers.18.mlp.gate_proj.weight False
342
+ base_model.model.model.layers.18.mlp.up_proj.weight False
343
+ base_model.model.model.layers.18.mlp.down_proj.weight False
344
+ base_model.model.model.layers.18.input_layernorm.weight False
345
+ base_model.model.model.layers.18.post_attention_layernorm.weight False
346
+ base_model.model.model.layers.19.self_attn.q_proj.base_layer.weight False
347
+ base_model.model.model.layers.19.self_attn.q_proj.lora_A.default.weight False
348
+ base_model.model.model.layers.19.self_attn.q_proj.lora_B.default.weight False
349
+ base_model.model.model.layers.19.self_attn.k_proj.base_layer.weight False
350
+ base_model.model.model.layers.19.self_attn.k_proj.lora_A.default.weight False
351
+ base_model.model.model.layers.19.self_attn.k_proj.lora_B.default.weight False
352
+ base_model.model.model.layers.19.self_attn.v_proj.base_layer.weight False
353
+ base_model.model.model.layers.19.self_attn.v_proj.lora_A.default.weight False
354
+ base_model.model.model.layers.19.self_attn.v_proj.lora_B.default.weight False
355
+ base_model.model.model.layers.19.self_attn.o_proj.base_layer.weight False
356
+ base_model.model.model.layers.19.self_attn.o_proj.lora_A.default.weight False
357
+ base_model.model.model.layers.19.self_attn.o_proj.lora_B.default.weight False
358
+ base_model.model.model.layers.19.mlp.gate_proj.weight False
359
+ base_model.model.model.layers.19.mlp.up_proj.weight False
360
+ base_model.model.model.layers.19.mlp.down_proj.weight False
361
+ base_model.model.model.layers.19.input_layernorm.weight False
362
+ base_model.model.model.layers.19.post_attention_layernorm.weight False
363
+ base_model.model.model.layers.20.self_attn.q_proj.base_layer.weight False
364
+ base_model.model.model.layers.20.self_attn.q_proj.lora_A.default.weight False
365
+ base_model.model.model.layers.20.self_attn.q_proj.lora_B.default.weight False
366
+ base_model.model.model.layers.20.self_attn.k_proj.base_layer.weight False
367
+ base_model.model.model.layers.20.self_attn.k_proj.lora_A.default.weight False
368
+ base_model.model.model.layers.20.self_attn.k_proj.lora_B.default.weight False
369
+ base_model.model.model.layers.20.self_attn.v_proj.base_layer.weight False
370
+ base_model.model.model.layers.20.self_attn.v_proj.lora_A.default.weight False
371
+ base_model.model.model.layers.20.self_attn.v_proj.lora_B.default.weight False
372
+ base_model.model.model.layers.20.self_attn.o_proj.base_layer.weight False
373
+ base_model.model.model.layers.20.self_attn.o_proj.lora_A.default.weight False
374
+ base_model.model.model.layers.20.self_attn.o_proj.lora_B.default.weight False
375
+ base_model.model.model.layers.20.mlp.gate_proj.weight False
376
+ base_model.model.model.layers.20.mlp.up_proj.weight False
377
+ base_model.model.model.layers.20.mlp.down_proj.weight False
378
+ base_model.model.model.layers.20.input_layernorm.weight False
379
+ base_model.model.model.layers.20.post_attention_layernorm.weight False
380
+ base_model.model.model.layers.21.self_attn.q_proj.base_layer.weight False
381
+ base_model.model.model.layers.21.self_attn.q_proj.lora_A.default.weight False
382
+ base_model.model.model.layers.21.self_attn.q_proj.lora_B.default.weight False
383
+ base_model.model.model.layers.21.self_attn.k_proj.base_layer.weight False
384
+ base_model.model.model.layers.21.self_attn.k_proj.lora_A.default.weight False
385
+ base_model.model.model.layers.21.self_attn.k_proj.lora_B.default.weight False
386
+ base_model.model.model.layers.21.self_attn.v_proj.base_layer.weight False
387
+ base_model.model.model.layers.21.self_attn.v_proj.lora_A.default.weight False
388
+ base_model.model.model.layers.21.self_attn.v_proj.lora_B.default.weight False
389
+ base_model.model.model.layers.21.self_attn.o_proj.base_layer.weight False
390
+ base_model.model.model.layers.21.self_attn.o_proj.lora_A.default.weight False
391
+ base_model.model.model.layers.21.self_attn.o_proj.lora_B.default.weight False
392
+ base_model.model.model.layers.21.mlp.gate_proj.weight False
393
+ base_model.model.model.layers.21.mlp.up_proj.weight False
394
+ base_model.model.model.layers.21.mlp.down_proj.weight False
395
+ base_model.model.model.layers.21.input_layernorm.weight False
396
+ base_model.model.model.layers.21.post_attention_layernorm.weight False
397
+ base_model.model.model.layers.22.self_attn.q_proj.base_layer.weight False
398
+ base_model.model.model.layers.22.self_attn.q_proj.lora_A.default.weight False
399
+ base_model.model.model.layers.22.self_attn.q_proj.lora_B.default.weight False
400
+ base_model.model.model.layers.22.self_attn.k_proj.base_layer.weight False
401
+ base_model.model.model.layers.22.self_attn.k_proj.lora_A.default.weight False
402
+ base_model.model.model.layers.22.self_attn.k_proj.lora_B.default.weight False
403
+ base_model.model.model.layers.22.self_attn.v_proj.base_layer.weight False
404
+ base_model.model.model.layers.22.self_attn.v_proj.lora_A.default.weight False
405
+ base_model.model.model.layers.22.self_attn.v_proj.lora_B.default.weight False
406
+ base_model.model.model.layers.22.self_attn.o_proj.base_layer.weight False
407
+ base_model.model.model.layers.22.self_attn.o_proj.lora_A.default.weight False
408
+ base_model.model.model.layers.22.self_attn.o_proj.lora_B.default.weight False
409
+ base_model.model.model.layers.22.mlp.gate_proj.weight False
410
+ base_model.model.model.layers.22.mlp.up_proj.weight False
411
+ base_model.model.model.layers.22.mlp.down_proj.weight False
412
+ base_model.model.model.layers.22.input_layernorm.weight False
413
+ base_model.model.model.layers.22.post_attention_layernorm.weight False
414
+ base_model.model.model.layers.23.self_attn.q_proj.base_layer.weight False
415
+ base_model.model.model.layers.23.self_attn.q_proj.lora_A.default.weight False
416
+ base_model.model.model.layers.23.self_attn.q_proj.lora_B.default.weight False
417
+ base_model.model.model.layers.23.self_attn.k_proj.base_layer.weight False
418
+ base_model.model.model.layers.23.self_attn.k_proj.lora_A.default.weight False
419
+ base_model.model.model.layers.23.self_attn.k_proj.lora_B.default.weight False
420
+ base_model.model.model.layers.23.self_attn.v_proj.base_layer.weight False
421
+ base_model.model.model.layers.23.self_attn.v_proj.lora_A.default.weight False
422
+ base_model.model.model.layers.23.self_attn.v_proj.lora_B.default.weight False
423
+ base_model.model.model.layers.23.self_attn.o_proj.base_layer.weight False
424
+ base_model.model.model.layers.23.self_attn.o_proj.lora_A.default.weight False
425
+ base_model.model.model.layers.23.self_attn.o_proj.lora_B.default.weight False
426
+ base_model.model.model.layers.23.mlp.gate_proj.weight False
427
+ base_model.model.model.layers.23.mlp.up_proj.weight False
428
+ base_model.model.model.layers.23.mlp.down_proj.weight False
429
+ base_model.model.model.layers.23.input_layernorm.weight False
430
+ base_model.model.model.layers.23.post_attention_layernorm.weight False
431
+ base_model.model.model.layers.24.self_attn.q_proj.base_layer.weight False
432
+ base_model.model.model.layers.24.self_attn.q_proj.lora_A.default.weight False
433
+ base_model.model.model.layers.24.self_attn.q_proj.lora_B.default.weight False
434
+ base_model.model.model.layers.24.self_attn.k_proj.base_layer.weight False
435
+ base_model.model.model.layers.24.self_attn.k_proj.lora_A.default.weight False
436
+ base_model.model.model.layers.24.self_attn.k_proj.lora_B.default.weight False
437
+ base_model.model.model.layers.24.self_attn.v_proj.base_layer.weight False
438
+ base_model.model.model.layers.24.self_attn.v_proj.lora_A.default.weight False
439
+ base_model.model.model.layers.24.self_attn.v_proj.lora_B.default.weight False
440
+ base_model.model.model.layers.24.self_attn.o_proj.base_layer.weight False
441
+ base_model.model.model.layers.24.self_attn.o_proj.lora_A.default.weight False
442
+ base_model.model.model.layers.24.self_attn.o_proj.lora_B.default.weight False
443
+ base_model.model.model.layers.24.mlp.gate_proj.weight False
444
+ base_model.model.model.layers.24.mlp.up_proj.weight False
445
+ base_model.model.model.layers.24.mlp.down_proj.weight False
446
+ base_model.model.model.layers.24.input_layernorm.weight False
447
+ base_model.model.model.layers.24.post_attention_layernorm.weight False
448
+ base_model.model.model.layers.25.self_attn.q_proj.base_layer.weight False
449
+ base_model.model.model.layers.25.self_attn.q_proj.lora_A.default.weight False
450
+ base_model.model.model.layers.25.self_attn.q_proj.lora_B.default.weight False
451
+ base_model.model.model.layers.25.self_attn.k_proj.base_layer.weight False
452
+ base_model.model.model.layers.25.self_attn.k_proj.lora_A.default.weight False
453
+ base_model.model.model.layers.25.self_attn.k_proj.lora_B.default.weight False
454
+ base_model.model.model.layers.25.self_attn.v_proj.base_layer.weight False
455
+ base_model.model.model.layers.25.self_attn.v_proj.lora_A.default.weight False
456
+ base_model.model.model.layers.25.self_attn.v_proj.lora_B.default.weight False
457
+ base_model.model.model.layers.25.self_attn.o_proj.base_layer.weight False
458
+ base_model.model.model.layers.25.self_attn.o_proj.lora_A.default.weight False
459
+ base_model.model.model.layers.25.self_attn.o_proj.lora_B.default.weight False
460
+ base_model.model.model.layers.25.mlp.gate_proj.weight False
461
+ base_model.model.model.layers.25.mlp.up_proj.weight False
462
+ base_model.model.model.layers.25.mlp.down_proj.weight False
463
+ base_model.model.model.layers.25.input_layernorm.weight False
464
+ base_model.model.model.layers.25.post_attention_layernorm.weight False
465
+ base_model.model.model.layers.26.self_attn.q_proj.base_layer.weight False
466
+ base_model.model.model.layers.26.self_attn.q_proj.lora_A.default.weight False
467
+ base_model.model.model.layers.26.self_attn.q_proj.lora_B.default.weight False
468
+ base_model.model.model.layers.26.self_attn.k_proj.base_layer.weight False
469
+ base_model.model.model.layers.26.self_attn.k_proj.lora_A.default.weight False
470
+ base_model.model.model.layers.26.self_attn.k_proj.lora_B.default.weight False
471
+ base_model.model.model.layers.26.self_attn.v_proj.base_layer.weight False
472
+ base_model.model.model.layers.26.self_attn.v_proj.lora_A.default.weight False
473
+ base_model.model.model.layers.26.self_attn.v_proj.lora_B.default.weight False
474
+ base_model.model.model.layers.26.self_attn.o_proj.base_layer.weight False
475
+ base_model.model.model.layers.26.self_attn.o_proj.lora_A.default.weight False
476
+ base_model.model.model.layers.26.self_attn.o_proj.lora_B.default.weight False
477
+ base_model.model.model.layers.26.mlp.gate_proj.weight False
478
+ base_model.model.model.layers.26.mlp.up_proj.weight False
479
+ base_model.model.model.layers.26.mlp.down_proj.weight False
480
+ base_model.model.model.layers.26.input_layernorm.weight False
481
+ base_model.model.model.layers.26.post_attention_layernorm.weight False
482
+ base_model.model.model.layers.27.self_attn.q_proj.base_layer.weight False
483
+ base_model.model.model.layers.27.self_attn.q_proj.lora_A.default.weight False
484
+ base_model.model.model.layers.27.self_attn.q_proj.lora_B.default.weight False
485
+ base_model.model.model.layers.27.self_attn.k_proj.base_layer.weight False
486
+ base_model.model.model.layers.27.self_attn.k_proj.lora_A.default.weight False
487
+ base_model.model.model.layers.27.self_attn.k_proj.lora_B.default.weight False
488
+ base_model.model.model.layers.27.self_attn.v_proj.base_layer.weight False
489
+ base_model.model.model.layers.27.self_attn.v_proj.lora_A.default.weight False
490
+ base_model.model.model.layers.27.self_attn.v_proj.lora_B.default.weight False
491
+ base_model.model.model.layers.27.self_attn.o_proj.base_layer.weight False
492
+ base_model.model.model.layers.27.self_attn.o_proj.lora_A.default.weight False
493
+ base_model.model.model.layers.27.self_attn.o_proj.lora_B.default.weight False
494
+ base_model.model.model.layers.27.mlp.gate_proj.weight False
495
+ base_model.model.model.layers.27.mlp.up_proj.weight False
496
+ base_model.model.model.layers.27.mlp.down_proj.weight False
497
+ base_model.model.model.layers.27.input_layernorm.weight False
498
+ base_model.model.model.layers.27.post_attention_layernorm.weight False
499
+ base_model.model.model.layers.28.self_attn.q_proj.base_layer.weight False
500
+ base_model.model.model.layers.28.self_attn.q_proj.lora_A.default.weight False
501
+ base_model.model.model.layers.28.self_attn.q_proj.lora_B.default.weight False
502
+ base_model.model.model.layers.28.self_attn.k_proj.base_layer.weight False
503
+ base_model.model.model.layers.28.self_attn.k_proj.lora_A.default.weight False
504
+ base_model.model.model.layers.28.self_attn.k_proj.lora_B.default.weight False
505
+ base_model.model.model.layers.28.self_attn.v_proj.base_layer.weight False
506
+ base_model.model.model.layers.28.self_attn.v_proj.lora_A.default.weight False
507
+ base_model.model.model.layers.28.self_attn.v_proj.lora_B.default.weight False
508
+ base_model.model.model.layers.28.self_attn.o_proj.base_layer.weight False
509
+ base_model.model.model.layers.28.self_attn.o_proj.lora_A.default.weight False
510
+ base_model.model.model.layers.28.self_attn.o_proj.lora_B.default.weight False
511
+ base_model.model.model.layers.28.mlp.gate_proj.weight False
512
+ base_model.model.model.layers.28.mlp.up_proj.weight False
513
+ base_model.model.model.layers.28.mlp.down_proj.weight False
514
+ base_model.model.model.layers.28.input_layernorm.weight False
515
+ base_model.model.model.layers.28.post_attention_layernorm.weight False
516
+ base_model.model.model.layers.29.self_attn.q_proj.base_layer.weight False
517
+ base_model.model.model.layers.29.self_attn.q_proj.lora_A.default.weight False
518
+ base_model.model.model.layers.29.self_attn.q_proj.lora_B.default.weight False
519
+ base_model.model.model.layers.29.self_attn.k_proj.base_layer.weight False
520
+ base_model.model.model.layers.29.self_attn.k_proj.lora_A.default.weight False
521
+ base_model.model.model.layers.29.self_attn.k_proj.lora_B.default.weight False
522
+ base_model.model.model.layers.29.self_attn.v_proj.base_layer.weight False
523
+ base_model.model.model.layers.29.self_attn.v_proj.lora_A.default.weight False
524
+ base_model.model.model.layers.29.self_attn.v_proj.lora_B.default.weight False
525
+ base_model.model.model.layers.29.self_attn.o_proj.base_layer.weight False
526
+ base_model.model.model.layers.29.self_attn.o_proj.lora_A.default.weight False
527
+ base_model.model.model.layers.29.self_attn.o_proj.lora_B.default.weight False
528
+ base_model.model.model.layers.29.mlp.gate_proj.weight False
529
+ base_model.model.model.layers.29.mlp.up_proj.weight False
530
+ base_model.model.model.layers.29.mlp.down_proj.weight False
531
+ base_model.model.model.layers.29.input_layernorm.weight False
532
+ base_model.model.model.layers.29.post_attention_layernorm.weight False
533
+ base_model.model.model.layers.30.self_attn.q_proj.base_layer.weight False
534
+ base_model.model.model.layers.30.self_attn.q_proj.lora_A.default.weight False
535
+ base_model.model.model.layers.30.self_attn.q_proj.lora_B.default.weight False
536
+ base_model.model.model.layers.30.self_attn.k_proj.base_layer.weight False
537
+ base_model.model.model.layers.30.self_attn.k_proj.lora_A.default.weight False
538
+ base_model.model.model.layers.30.self_attn.k_proj.lora_B.default.weight False
539
+ base_model.model.model.layers.30.self_attn.v_proj.base_layer.weight False
540
+ base_model.model.model.layers.30.self_attn.v_proj.lora_A.default.weight False
541
+ base_model.model.model.layers.30.self_attn.v_proj.lora_B.default.weight False
542
+ base_model.model.model.layers.30.self_attn.o_proj.base_layer.weight False
543
+ base_model.model.model.layers.30.self_attn.o_proj.lora_A.default.weight False
544
+ base_model.model.model.layers.30.self_attn.o_proj.lora_B.default.weight False
545
+ base_model.model.model.layers.30.mlp.gate_proj.weight False
546
+ base_model.model.model.layers.30.mlp.up_proj.weight False
547
+ base_model.model.model.layers.30.mlp.down_proj.weight False
548
+ base_model.model.model.layers.30.input_layernorm.weight False
549
+ base_model.model.model.layers.30.post_attention_layernorm.weight False
550
+ base_model.model.model.layers.31.self_attn.q_proj.base_layer.weight False
551
+ base_model.model.model.layers.31.self_attn.q_proj.lora_A.default.weight False
552
+ base_model.model.model.layers.31.self_attn.q_proj.lora_B.default.weight False
553
+ base_model.model.model.layers.31.self_attn.k_proj.base_layer.weight False
554
+ base_model.model.model.layers.31.self_attn.k_proj.lora_A.default.weight False
555
+ base_model.model.model.layers.31.self_attn.k_proj.lora_B.default.weight False
556
+ base_model.model.model.layers.31.self_attn.v_proj.base_layer.weight False
557
+ base_model.model.model.layers.31.self_attn.v_proj.lora_A.default.weight False
558
+ base_model.model.model.layers.31.self_attn.v_proj.lora_B.default.weight False
559
+ base_model.model.model.layers.31.self_attn.o_proj.base_layer.weight False
560
+ base_model.model.model.layers.31.self_attn.o_proj.lora_A.default.weight False
561
+ base_model.model.model.layers.31.self_attn.o_proj.lora_B.default.weight False
562
+ base_model.model.model.layers.31.mlp.gate_proj.weight False
563
+ base_model.model.model.layers.31.mlp.up_proj.weight False
564
+ base_model.model.model.layers.31.mlp.down_proj.weight False
565
+ base_model.model.model.layers.31.input_layernorm.weight False
566
+ base_model.model.model.layers.31.post_attention_layernorm.weight False
567
+ base_model.model.model.norm.weight False
568
+ base_model.model.lm_head.weight False
569
+ /opt/conda/lib/python3.10/site-packages/peft/tuners/lora/bnb.py:325: UserWarning: Merge lora module to 4-bit linear may get different generations due to rounding errors.
570
+ warnings.warn(
571
+ /opt/conda/lib/python3.10/site-packages/trl/trainer/dpo_trainer.py:332: UserWarning: When using DPODataCollatorWithPadding, you should set `remove_unused_columns=False` in your TrainingArguments we have set it for you, but you should do it yourself in the future.
572
+ warnings.warn(
573
+ Token indices sequence length is longer than the specified maximum sequence length for this model (1126 > 1024). Running this sequence through the model will result in indexing errors
574
+ /opt/conda/lib/python3.10/site-packages/accelerate/accelerator.py:436: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches', 'even_batches', 'use_seedable_sampler']). Please pass an `accelerate.DataLoaderConfiguration` instead:
575
+ dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
576
+ warnings.warn(
577
+ You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.
578
+ Using auto half precision backend
579
+ ***** Running training *****
580
+ Num examples = 7,200
581
+ Num Epochs = 1
582
+ Instantaneous batch size per device = 1
583
+ Total train batch size (w. parallel, distributed & accumulation) = 4
584
+ Gradient Accumulation steps = 4
585
+ Total optimization steps = 1,800
586
+ Number of trainable parameters = 319,815,680
587
+ Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
588
+ Could not estimate the number of tokens of the input, floating-point operations will not be computed
589
+ ***** Running Evaluation *****
590
+ Num examples = 1801
591
+ Batch size = 1
592
+ Saving model checkpoint to /kaggle/working/checkpoint-100
593
+ loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-chat-hf/snapshots/f5db02db724555f92da89c216ac04704f23d4590/config.json
594
+ Model config LlamaConfig {
595
+ "_name_or_path": "meta-llama/Llama-2-7b-chat-hf",
596
+ "architectures": [
597
+ "LlamaForCausalLM"
598
+ ],
599
+ "attention_bias": false,
600
+ "attention_dropout": 0.0,
601
+ "bos_token_id": 1,
602
+ "eos_token_id": 2,
603
+ "hidden_act": "silu",
604
+ "hidden_size": 4096,
605
+ "initializer_range": 0.02,
606
+ "intermediate_size": 11008,
607
+ "max_position_embeddings": 4096,
608
+ "model_type": "llama",
609
+ "num_attention_heads": 32,
610
+ "num_hidden_layers": 32,
611
+ "num_key_value_heads": 32,
612
+ "pretraining_tp": 1,
613
+ "rms_norm_eps": 1e-05,
614
+ "rope_scaling": null,
615
+ "rope_theta": 10000.0,
616
+ "tie_word_embeddings": false,
617
+ "torch_dtype": "float16",
618
+ "transformers_version": "4.39.3",
619
+ "use_cache": true,
620
+ "vocab_size": 32000
621
+ }
622
+ tokenizer config file saved in /kaggle/working/checkpoint-100/tokenizer_config.json
623
+ Special tokens file saved in /kaggle/working/checkpoint-100/special_tokens_map.json
624
+ tokenizer config file saved in /kaggle/working/tokenizer_config.json
wandb/run-20240522_054348-vgrzs6jq/files/requirements.txt ADDED
@@ -0,0 +1,867 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Babel==2.14.0
2
+ Boruta==0.3
3
+ Brotli==1.0.9
4
+ CVXcanon==0.1.2
5
+ Cartopy==0.23.0
6
+ Cython==3.0.8
7
+ Deprecated==1.2.14
8
+ Farama-Notifications==0.0.4
9
+ Flask==3.0.3
10
+ Geohash==1.0
11
+ GitPython==3.1.41
12
+ ImageHash==4.3.1
13
+ Janome==0.5.0
14
+ Jinja2==3.1.2
15
+ LunarCalendar==0.0.9
16
+ Mako==1.3.3
17
+ Markdown==3.5.2
18
+ MarkupSafe==2.1.3
19
+ MarkupSafe==2.1.5
20
+ Pillow==9.5.0
21
+ PuLP==2.8.0
22
+ PyArabic==0.6.15
23
+ PyJWT==2.8.0
24
+ PyMeeus==0.5.12
25
+ PySocks==1.7.1
26
+ PyUpSet==0.1.1.post7
27
+ PyWavelets==1.5.0
28
+ PyYAML==6.0.1
29
+ Pygments==2.17.2
30
+ Pympler==1.0.1
31
+ QtPy==2.4.1
32
+ Rtree==1.2.0
33
+ SQLAlchemy==2.0.25
34
+ SecretStorage==3.3.3
35
+ Send2Trash==1.8.2
36
+ Shapely==1.8.5.post1
37
+ Shimmy==1.3.0
38
+ SimpleITK==2.3.1
39
+ TPOT==0.12.1
40
+ Theano-PyMC==1.1.2
41
+ Theano==1.0.5
42
+ Wand==0.6.13
43
+ Werkzeug==3.0.2
44
+ absl-py==1.4.0
45
+ accelerate==0.29.3
46
+ access==1.1.9
47
+ affine==2.4.0
48
+ aiobotocore==2.12.3
49
+ aiofiles==22.1.0
50
+ aiohttp-cors==0.7.0
51
+ aiohttp==3.9.1
52
+ aioitertools==0.11.0
53
+ aiorwlock==1.3.0
54
+ aiosignal==1.3.1
55
+ aiosqlite==0.19.0
56
+ albumentations==1.4.0
57
+ alembic==1.13.1
58
+ altair==5.3.0
59
+ annotated-types==0.6.0
60
+ annoy==1.17.3
61
+ anyio==4.2.0
62
+ apache-beam==2.46.0
63
+ aplus==0.11.0
64
+ appdirs==1.4.4
65
+ archspec==0.2.3
66
+ argon2-cffi-bindings==21.2.0
67
+ argon2-cffi==23.1.0
68
+ array-record==0.5.0
69
+ arrow==1.3.0
70
+ arviz==0.18.0
71
+ astroid==3.1.0
72
+ astropy-iers-data==0.2024.4.15.2.45.49
73
+ astropy==6.0.1
74
+ asttokens==2.4.1
75
+ astunparse==1.6.3
76
+ async-lru==2.0.4
77
+ async-timeout==4.0.3
78
+ attrs==23.2.0
79
+ audioread==3.0.1
80
+ autopep8==2.0.4
81
+ backoff==2.2.1
82
+ bayesian-optimization==1.4.3
83
+ beatrix_jupyterlab==2023.128.151533
84
+ beautifulsoup4==4.12.2
85
+ bitsandbytes==0.43.1
86
+ blake3==0.2.1
87
+ bleach==6.1.0
88
+ blessed==1.20.0
89
+ blinker==1.7.0
90
+ blis==0.7.10
91
+ blosc2==2.6.2
92
+ bokeh==3.4.1
93
+ boltons==23.1.1
94
+ boto3==1.26.100
95
+ botocore==1.34.69
96
+ bq_helper==0.4.1
97
+ bqplot==0.12.43
98
+ branca==0.7.1
99
+ brewer2mpl==1.4.1
100
+ brotlipy==0.7.0
101
+ cached-property==1.5.2
102
+ cachetools==4.2.4
103
+ cachetools==5.3.2
104
+ catalogue==2.0.10
105
+ catalyst==22.4
106
+ catboost==1.2.3
107
+ category-encoders==2.6.3
108
+ certifi==2024.2.2
109
+ cesium==0.12.1
110
+ cffi==1.16.0
111
+ charset-normalizer==3.3.2
112
+ chex==0.1.86
113
+ cleverhans==4.0.0
114
+ click-plugins==1.1.1
115
+ click==8.1.7
116
+ cligj==0.7.2
117
+ cloud-tpu-client==0.10
118
+ cloud-tpu-profiler==2.4.0
119
+ cloudpathlib==0.16.0
120
+ cloudpickle==2.2.1
121
+ cloudpickle==3.0.0
122
+ cmdstanpy==1.2.2
123
+ colorama==0.4.6
124
+ colorcet==3.1.0
125
+ colorful==0.5.6
126
+ colorlog==6.8.2
127
+ colorlover==0.3.0
128
+ comm==0.2.1
129
+ conda-libmamba-solver==23.7.0
130
+ conda-package-handling==2.2.0
131
+ conda==23.7.4
132
+ conda_package_streaming==0.9.0
133
+ confection==0.1.4
134
+ contextily==1.6.0
135
+ contourpy==1.2.0
136
+ contourpy==1.2.1
137
+ convertdate==2.4.0
138
+ crcmod==1.7
139
+ cryptography==41.0.7
140
+ cuda-python==12.4.0
141
+ cudf==23.8.0
142
+ cufflinks==0.17.3
143
+ cuml==23.8.0
144
+ cupy==13.0.0
145
+ cycler==0.12.1
146
+ cymem==2.0.8
147
+ cytoolz==0.12.3
148
+ daal4py==2024.3.0
149
+ daal==2024.3.0
150
+ dacite==1.8.1
151
+ dask-cuda==23.8.0
152
+ dask-cudf==23.8.0
153
+ dask-expr==1.0.11
154
+ dask==2024.4.1
155
+ dataclasses-json==0.6.4
156
+ dataproc_jupyter_plugin==0.1.66
157
+ datasets==2.18.0
158
+ datashader==0.16.0
159
+ datatile==1.0.3
160
+ db-dtypes==1.2.0
161
+ deap==1.4.1
162
+ debugpy==1.8.0
163
+ decorator==5.1.1
164
+ deepdiff==7.0.1
165
+ defusedxml==0.7.1
166
+ deprecation==2.1.0
167
+ descartes==1.1.0
168
+ dill==0.3.8
169
+ dipy==1.9.0
170
+ distlib==0.3.8
171
+ distributed==2023.7.1
172
+ distro==1.9.0
173
+ dm-tree==0.1.8
174
+ docker-pycreds==0.4.0
175
+ docker==7.0.0
176
+ docopt==0.6.2
177
+ docstring-parser==0.15
178
+ docstring-to-markdown==0.15
179
+ docutils==0.21.1
180
+ earthengine-api==0.1.399
181
+ easydict==1.13
182
+ easyocr==1.7.1
183
+ ecos==2.0.13
184
+ einops==0.8.0
185
+ eli5==0.13.0
186
+ emoji==2.11.0
187
+ en-core-web-lg==3.7.1
188
+ en-core-web-sm==3.7.1
189
+ entrypoints==0.4
190
+ ephem==4.1.5
191
+ esda==2.5.1
192
+ essentia==2.1b6.dev1110
193
+ et-xmlfile==1.1.0
194
+ etils==1.6.0
195
+ exceptiongroup==1.2.0
196
+ executing==2.0.1
197
+ explainable-ai-sdk==1.3.3
198
+ fastai==2.7.14
199
+ fastapi==0.108.0
200
+ fastavro==1.9.3
201
+ fastcore==1.5.29
202
+ fastdownload==0.0.7
203
+ fasteners==0.19
204
+ fastjsonschema==2.19.1
205
+ fastprogress==1.0.3
206
+ fastrlock==0.8.2
207
+ fasttext==0.9.2
208
+ feather-format==0.4.1
209
+ featuretools==1.30.0
210
+ filelock==3.13.1
211
+ fiona==1.9.6
212
+ fitter==1.7.0
213
+ flake8==7.0.0
214
+ flash-attn==2.5.8
215
+ flashtext==2.7
216
+ flatbuffers==23.5.26
217
+ flax==0.8.2
218
+ folium==0.16.0
219
+ fonttools==4.47.0
220
+ fonttools==4.51.0
221
+ fqdn==1.5.1
222
+ frozendict==2.4.2
223
+ frozenlist==1.4.1
224
+ fsspec==2024.2.0
225
+ fsspec==2024.3.1
226
+ funcy==2.0
227
+ fury==0.10.0
228
+ future==1.0.0
229
+ fuzzywuzzy==0.18.0
230
+ gast==0.5.4
231
+ gatspy==0.3
232
+ gcsfs==2024.2.0
233
+ gensim==4.3.2
234
+ geographiclib==2.0
235
+ geojson==3.1.0
236
+ geopandas==0.14.3
237
+ geoplot==0.5.1
238
+ geopy==2.4.1
239
+ geoviews==1.12.0
240
+ ggplot==0.11.5
241
+ giddy==2.3.5
242
+ gitdb==4.0.11
243
+ google-ai-generativelanguage==0.6.2
244
+ google-api-core==2.11.1
245
+ google-api-core==2.18.0
246
+ google-api-python-client==2.126.0
247
+ google-apitools==0.5.31
248
+ google-auth-httplib2==0.2.0
249
+ google-auth-oauthlib==1.2.0
250
+ google-auth==2.26.1
251
+ google-cloud-aiplatform==0.6.0a1
252
+ google-cloud-artifact-registry==1.10.0
253
+ google-cloud-automl==1.0.1
254
+ google-cloud-bigquery==2.34.4
255
+ google-cloud-bigtable==1.7.3
256
+ google-cloud-core==2.4.1
257
+ google-cloud-datastore==2.19.0
258
+ google-cloud-dlp==3.14.0
259
+ google-cloud-jupyter-config==0.0.5
260
+ google-cloud-language==2.13.3
261
+ google-cloud-monitoring==2.18.0
262
+ google-cloud-pubsub==2.19.0
263
+ google-cloud-pubsublite==1.9.0
264
+ google-cloud-recommendations-ai==0.7.1
265
+ google-cloud-resource-manager==1.11.0
266
+ google-cloud-spanner==3.40.1
267
+ google-cloud-storage==1.44.0
268
+ google-cloud-translate==3.12.1
269
+ google-cloud-videointelligence==2.13.3
270
+ google-cloud-vision==2.8.0
271
+ google-crc32c==1.5.0
272
+ google-generativeai==0.5.1
273
+ google-pasta==0.2.0
274
+ google-resumable-media==2.7.0
275
+ googleapis-common-protos==1.62.0
276
+ gplearn==0.4.2
277
+ gpustat==1.0.0
278
+ gpxpy==1.6.2
279
+ graphviz==0.20.3
280
+ greenlet==3.0.3
281
+ grpc-google-iam-v1==0.12.7
282
+ grpcio-status==1.48.1
283
+ grpcio-status==1.48.2
284
+ grpcio==1.51.1
285
+ grpcio==1.60.0
286
+ gviz-api==1.10.0
287
+ gym-notices==0.0.8
288
+ gym==0.26.2
289
+ gymnasium==0.29.0
290
+ h11==0.14.0
291
+ h2o==3.46.0.1
292
+ h5netcdf==1.3.0
293
+ h5py==3.10.0
294
+ haversine==2.8.1
295
+ hdfs==2.7.3
296
+ hep-ml==0.7.2
297
+ hijri-converter==2.3.1
298
+ hmmlearn==0.3.2
299
+ holidays==0.24
300
+ holoviews==1.18.3
301
+ hpsklearn==0.1.0
302
+ html5lib==1.1
303
+ htmlmin==0.1.12
304
+ httpcore==1.0.5
305
+ httplib2==0.21.0
306
+ httptools==0.6.1
307
+ httpx==0.27.0
308
+ huggingface-hub==0.22.2
309
+ hunspell==0.5.5
310
+ hydra-slayer==0.5.0
311
+ hyperopt==0.2.7
312
+ hypertools==0.8.0
313
+ idna==3.6
314
+ igraph==0.11.4
315
+ imagecodecs==2024.1.1
316
+ imageio==2.33.1
317
+ imbalanced-learn==0.12.2
318
+ imgaug==0.4.0
319
+ importlib-metadata==6.11.0
320
+ importlib-metadata==7.0.1
321
+ importlib-resources==6.1.1
322
+ inequality==1.0.1
323
+ iniconfig==2.0.0
324
+ ipydatawidgets==4.3.5
325
+ ipykernel==6.28.0
326
+ ipyleaflet==0.18.2
327
+ ipympl==0.7.0
328
+ ipython-genutils==0.2.0
329
+ ipython-genutils==0.2.0
330
+ ipython-sql==0.5.0
331
+ ipython==8.20.0
332
+ ipyvolume==0.6.3
333
+ ipyvue==1.11.0
334
+ ipyvuetify==1.9.4
335
+ ipywebrtc==0.6.0
336
+ ipywidgets==7.7.1
337
+ isoduration==20.11.0
338
+ isort==5.13.2
339
+ isoweek==1.3.3
340
+ itsdangerous==2.2.0
341
+ jaraco.classes==3.3.0
342
+ jax-jumpy==1.0.0
343
+ jax==0.4.23
344
+ jaxlib==0.4.23.dev20240116
345
+ jedi==0.19.1
346
+ jeepney==0.8.0
347
+ jieba==0.42.1
348
+ jmespath==1.0.1
349
+ joblib==1.4.0
350
+ json5==0.9.14
351
+ jsonpatch==1.33
352
+ jsonpointer==2.4
353
+ jsonschema-specifications==2023.12.1
354
+ jsonschema==4.20.0
355
+ jupyter-console==6.6.3
356
+ jupyter-events==0.9.0
357
+ jupyter-http-over-ws==0.0.8
358
+ jupyter-lsp==1.5.1
359
+ jupyter-server-mathjax==0.2.6
360
+ jupyter-ydoc==0.2.5
361
+ jupyter_client==7.4.9
362
+ jupyter_client==8.6.0
363
+ jupyter_core==5.7.1
364
+ jupyter_server==2.12.5
365
+ jupyter_server_fileid==0.9.1
366
+ jupyter_server_proxy==4.1.0
367
+ jupyter_server_terminals==0.5.1
368
+ jupyter_server_ydoc==0.8.0
369
+ jupyterlab-lsp==5.1.0
370
+ jupyterlab-widgets==3.0.9
371
+ jupyterlab==4.1.6
372
+ jupyterlab_git==0.44.0
373
+ jupyterlab_pygments==0.3.0
374
+ jupyterlab_server==2.25.2
375
+ jupytext==1.16.0
376
+ kaggle-environments==1.14.3
377
+ kaggle==1.6.12
378
+ kagglehub==0.2.3
379
+ keras-cv==0.8.2
380
+ keras-nlp==0.9.3
381
+ keras-tuner==1.4.6
382
+ keras==3.2.1
383
+ kernels-mixer==0.0.7
384
+ keyring==24.3.0
385
+ keyrings.google-artifactregistry-auth==1.1.2
386
+ kfp-pipeline-spec==0.2.2
387
+ kfp-server-api==2.0.5
388
+ kfp==2.5.0
389
+ kiwisolver==1.4.5
390
+ kmapper==2.0.1
391
+ kmodes==0.12.2
392
+ korean-lunar-calendar==0.3.1
393
+ kornia==0.7.2
394
+ kornia_rs==0.1.3
395
+ kt-legacy==1.0.5
396
+ kubernetes==26.1.0
397
+ langcodes==3.3.0
398
+ langid==1.1.6
399
+ lazy_loader==0.3
400
+ learntools==0.3.4
401
+ leven==1.0.4
402
+ libclang==16.0.6
403
+ libmambapy==1.5.0
404
+ libpysal==4.9.2
405
+ librosa==0.10.1
406
+ lightgbm==4.2.0
407
+ lightning-utilities==0.11.2
408
+ lime==0.2.0.1
409
+ line-profiler==4.1.2
410
+ linkify-it-py==2.0.3
411
+ llvmlite==0.41.1
412
+ llvmlite==0.42.0
413
+ lml==0.1.0
414
+ locket==1.0.0
415
+ loguru==0.7.2
416
+ lxml==5.2.1
417
+ lz4==4.3.3
418
+ mamba==1.5.0
419
+ mapclassify==2.6.1
420
+ markdown-it-py==3.0.0
421
+ marshmallow==3.21.1
422
+ matplotlib-inline==0.1.6
423
+ matplotlib-venn==0.11.10
424
+ matplotlib==3.7.5
425
+ matplotlib==3.8.4
426
+ mccabe==0.7.0
427
+ mdit-py-plugins==0.4.0
428
+ mdurl==0.1.2
429
+ memory-profiler==0.61.0
430
+ menuinst==2.0.1
431
+ mercantile==1.2.1
432
+ mgwr==2.2.1
433
+ missingno==0.5.2
434
+ mistune==0.8.4
435
+ mizani==0.11.1
436
+ ml-dtypes==0.2.0
437
+ mlcrate==0.2.0
438
+ mlens==0.2.3
439
+ mlxtend==0.23.1
440
+ mne==1.6.1
441
+ mnist==0.2.2
442
+ momepy==0.7.0
443
+ more-itertools==10.2.0
444
+ mpld3==0.5.10
445
+ mpmath==1.3.0
446
+ msgpack==1.0.7
447
+ multidict==6.0.4
448
+ multimethod==1.10
449
+ multipledispatch==1.0.0
450
+ multiprocess==0.70.16
451
+ munkres==1.1.4
452
+ murmurhash==1.0.10
453
+ mypy-extensions==1.0.0
454
+ namex==0.0.8
455
+ nb-conda-kernels==2.3.1
456
+ nb_conda==2.2.1
457
+ nbclassic==1.0.0
458
+ nbclient==0.5.13
459
+ nbconvert==6.4.5
460
+ nbdime==3.2.0
461
+ nbformat==5.9.2
462
+ ndindex==1.8
463
+ nest-asyncio==1.5.8
464
+ networkx==3.2.1
465
+ nibabel==5.2.1
466
+ nilearn==0.10.4
467
+ ninja==1.11.1.1
468
+ nltk==3.2.4
469
+ nose==1.3.7
470
+ notebook==6.5.4
471
+ notebook==6.5.6
472
+ notebook_executor==0.2
473
+ notebook_shim==0.2.3
474
+ numba==0.58.1
475
+ numba==0.59.1
476
+ numexpr==2.10.0
477
+ numpy==1.26.4
478
+ nvidia-ml-py==11.495.46
479
+ nvtx==0.2.10
480
+ oauth2client==4.1.3
481
+ oauthlib==3.2.2
482
+ objsize==0.6.1
483
+ odfpy==1.4.1
484
+ olefile==0.47
485
+ onnx==1.16.0
486
+ opencensus-context==0.1.3
487
+ opencensus==0.11.4
488
+ opencv-contrib-python==4.9.0.80
489
+ opencv-python-headless==4.9.0.80
490
+ opencv-python==4.9.0.80
491
+ openpyxl==3.1.2
492
+ openslide-python==1.3.1
493
+ opentelemetry-api==1.22.0
494
+ opentelemetry-exporter-otlp-proto-common==1.22.0
495
+ opentelemetry-exporter-otlp-proto-grpc==1.22.0
496
+ opentelemetry-exporter-otlp-proto-http==1.22.0
497
+ opentelemetry-exporter-otlp==1.22.0
498
+ opentelemetry-proto==1.22.0
499
+ opentelemetry-sdk==1.22.0
500
+ opentelemetry-semantic-conventions==0.43b0
501
+ opt-einsum==3.3.0
502
+ optax==0.2.2
503
+ optree==0.11.0
504
+ optuna==3.6.1
505
+ orbax-checkpoint==0.5.9
506
+ ordered-set==4.1.0
507
+ orjson==3.9.10
508
+ ortools==9.4.1874
509
+ osmnx==1.9.2
510
+ overrides==7.4.0
511
+ packaging==21.3
512
+ pandas-datareader==0.10.0
513
+ pandas-profiling==3.6.6
514
+ pandas-summary==0.2.0
515
+ pandas==2.1.4
516
+ pandas==2.2.2
517
+ pandasql==0.7.3
518
+ pandocfilters==1.5.0
519
+ panel==1.4.1
520
+ papermill==2.5.0
521
+ param==2.1.0
522
+ parso==0.8.3
523
+ partd==1.4.1
524
+ path.py==12.5.0
525
+ path==16.14.0
526
+ pathos==0.3.2
527
+ pathy==0.10.3
528
+ patsy==0.5.6
529
+ pdf2image==1.17.0
530
+ peft==0.11.1
531
+ pettingzoo==1.24.0
532
+ pexpect==4.8.0
533
+ pexpect==4.9.0
534
+ phik==0.12.4
535
+ pickleshare==0.7.5
536
+ pillow==10.3.0
537
+ pip==23.3.2
538
+ pkgutil_resolve_name==1.3.10
539
+ platformdirs==4.2.0
540
+ plotly-express==0.4.1
541
+ plotly==5.18.0
542
+ plotnine==0.13.4
543
+ pluggy==1.4.0
544
+ pointpats==2.4.0
545
+ polars==0.20.21
546
+ polyglot==16.7.4
547
+ pooch==1.8.1
548
+ pox==0.3.4
549
+ ppca==0.0.4
550
+ ppft==1.7.6.8
551
+ preprocessing==0.1.13
552
+ preshed==3.0.9
553
+ prettytable==3.9.0
554
+ progressbar2==4.4.2
555
+ prometheus-client==0.19.0
556
+ promise==2.3
557
+ prompt-toolkit==3.0.42
558
+ prompt-toolkit==3.0.43
559
+ prophet==1.1.1
560
+ proto-plus==1.23.0
561
+ protobuf==3.20.3
562
+ protobuf==4.21.12
563
+ psutil==5.9.3
564
+ psutil==5.9.7
565
+ ptyprocess==0.7.0
566
+ pudb==2024.1
567
+ pure-eval==0.2.2
568
+ py-cpuinfo==9.0.0
569
+ py-spy==0.3.14
570
+ py4j==0.10.9.7
571
+ pyLDAvis==3.4.1
572
+ pyOpenSSL==23.3.0
573
+ pyaml==23.12.0
574
+ pyarrow-hotfix==0.6
575
+ pyarrow==15.0.2
576
+ pyasn1-modules==0.3.0
577
+ pyasn1==0.5.1
578
+ pybind11==2.12.0
579
+ pyclipper==1.3.0.post5
580
+ pycodestyle==2.11.1
581
+ pycosat==0.6.6
582
+ pycparser==2.21
583
+ pycryptodome==3.20.0
584
+ pyct==0.5.0
585
+ pycuda==2024.1
586
+ pydantic==2.5.3
587
+ pydantic==2.7.0
588
+ pydantic_core==2.14.6
589
+ pydantic_core==2.18.1
590
+ pydegensac==0.1.2
591
+ pydicom==2.4.4
592
+ pydocstyle==6.3.0
593
+ pydot==1.4.2
594
+ pydub==0.25.1
595
+ pyemd==1.0.0
596
+ pyerfa==2.0.1.4
597
+ pyexcel-io==0.6.6
598
+ pyexcel-ods==0.6.0
599
+ pyflakes==3.2.0
600
+ pygltflib==1.16.2
601
+ pykalman==0.9.7
602
+ pylibraft==23.8.0
603
+ pylint==3.1.0
604
+ pymc3==3.11.4
605
+ pymongo==3.13.0
606
+ pynndescent==0.5.12
607
+ pynvml==11.4.1
608
+ pynvrtc==9.2
609
+ pyparsing==3.1.1
610
+ pyparsing==3.1.2
611
+ pypdf==4.2.0
612
+ pyproj==3.6.1
613
+ pysal==24.1
614
+ pyshp==2.3.1
615
+ pytesseract==0.3.10
616
+ pytest==8.1.1
617
+ python-bidi==0.4.2
618
+ python-dateutil==2.9.0.post0
619
+ python-dotenv==1.0.0
620
+ python-json-logger==2.0.7
621
+ python-louvain==0.16
622
+ python-lsp-jsonrpc==1.1.2
623
+ python-lsp-server==1.11.0
624
+ python-slugify==8.0.4
625
+ python-utils==3.8.2
626
+ pythreejs==2.4.2
627
+ pytoolconfig==1.3.1
628
+ pytools==2024.1.1
629
+ pytorch-ignite==0.5.0.post2
630
+ pytorch-lightning==2.2.2
631
+ pytz==2023.3.post1
632
+ pytz==2024.1
633
+ pyu2f==0.1.5
634
+ pyviz_comms==3.0.2
635
+ pyzmq==24.0.1
636
+ pyzmq==25.1.2
637
+ qgrid==1.3.1
638
+ qtconsole==5.5.1
639
+ quantecon==0.7.2
640
+ qudida==0.0.4
641
+ raft-dask==23.8.0
642
+ rasterio==1.3.10
643
+ rasterstats==0.19.0
644
+ ray-cpp==2.9.0
645
+ ray==2.9.0
646
+ referencing==0.32.1
647
+ regex==2023.12.25
648
+ requests-oauthlib==1.3.1
649
+ requests-toolbelt==0.10.1
650
+ requests==2.31.0
651
+ retrying==1.3.3
652
+ retrying==1.3.4
653
+ rfc3339-validator==0.1.4
654
+ rfc3986-validator==0.1.1
655
+ rgf-python==3.12.0
656
+ rich-click==1.7.4
657
+ rich==13.7.0
658
+ rich==13.7.1
659
+ rmm==23.8.0
660
+ rope==1.13.0
661
+ rpds-py==0.16.2
662
+ rsa==4.9
663
+ ruamel-yaml-conda==0.15.100
664
+ ruamel.yaml.clib==0.2.7
665
+ ruamel.yaml==0.17.40
666
+ s2sphere==0.2.5
667
+ s3fs==2024.2.0
668
+ s3transfer==0.6.2
669
+ safetensors==0.4.3
670
+ scattertext==0.1.19
671
+ scikit-image==0.22.0
672
+ scikit-learn-intelex==2024.3.0
673
+ scikit-learn==1.2.2
674
+ scikit-multilearn==0.2.0
675
+ scikit-optimize==0.10.1
676
+ scikit-plot==0.3.7
677
+ scikit-surprise==1.1.3
678
+ scipy==1.11.4
679
+ scipy==1.13.0
680
+ seaborn==0.12.2
681
+ segment_anything==1.0
682
+ segregation==2.5
683
+ semver==3.0.2
684
+ sentencepiece==0.2.0
685
+ sentry-sdk==1.45.0
686
+ setproctitle==1.3.3
687
+ setuptools-git==1.2
688
+ setuptools-scm==8.0.4
689
+ setuptools==69.0.3
690
+ shap==0.44.1
691
+ shapely==2.0.4
692
+ shellingham==1.5.4
693
+ shtab==1.7.1
694
+ simpervisor==1.0.0
695
+ simplejson==3.19.2
696
+ six==1.16.0
697
+ sklearn-pandas==2.2.0
698
+ slicer==0.0.7
699
+ smart-open==6.4.0
700
+ smmap==5.0.1
701
+ sniffio==1.3.0
702
+ snowballstemmer==2.2.0
703
+ snuggs==1.4.7
704
+ sortedcontainers==2.4.0
705
+ soundfile==0.12.1
706
+ soupsieve==2.5
707
+ soxr==0.3.7
708
+ spacy-legacy==3.0.12
709
+ spacy-loggers==1.0.5
710
+ spacy==3.7.3
711
+ spaghetti==1.7.5.post1
712
+ spectral==0.23.1
713
+ spglm==1.1.0
714
+ sphinx-rtd-theme==0.2.4
715
+ spint==1.0.7
716
+ splot==1.1.5.post1
717
+ spopt==0.6.0
718
+ spreg==1.4.2
719
+ spvcm==0.3.0
720
+ sqlparse==0.4.4
721
+ squarify==0.4.3
722
+ srsly==2.4.8
723
+ stable-baselines3==2.1.0
724
+ stack-data==0.6.2
725
+ stack-data==0.6.3
726
+ stanio==0.5.0
727
+ starlette==0.32.0.post1
728
+ statsmodels==0.14.1
729
+ stemming==1.0.1
730
+ stop-words==2018.7.23
731
+ stopit==1.1.2
732
+ stumpy==1.12.0
733
+ sympy==1.12
734
+ tables==3.9.2
735
+ tabulate==0.9.0
736
+ tangled-up-in-unicode==0.2.0
737
+ tbb==2021.12.0
738
+ tblib==3.0.0
739
+ tenacity==8.2.3
740
+ tensorboard-data-server==0.7.2
741
+ tensorboard-plugin-profile==2.15.0
742
+ tensorboard==2.15.1
743
+ tensorboardX==2.6.2.2
744
+ tensorflow-cloud==0.1.16
745
+ tensorflow-datasets==4.9.4
746
+ tensorflow-decision-forests==1.8.1
747
+ tensorflow-estimator==2.15.0
748
+ tensorflow-hub==0.16.1
749
+ tensorflow-io-gcs-filesystem==0.35.0
750
+ tensorflow-io==0.35.0
751
+ tensorflow-metadata==0.14.0
752
+ tensorflow-probability==0.23.0
753
+ tensorflow-serving-api==2.14.1
754
+ tensorflow-text==2.15.0
755
+ tensorflow-transform==0.14.0
756
+ tensorflow==2.15.0
757
+ tensorstore==0.1.56
758
+ termcolor==2.4.0
759
+ terminado==0.18.0
760
+ testpath==0.6.0
761
+ text-unidecode==1.3
762
+ textblob==0.18.0.post0
763
+ texttable==1.7.0
764
+ tf_keras==2.15.1
765
+ tfp-nightly==0.24.0.dev0
766
+ thinc==8.2.2
767
+ threadpoolctl==3.2.0
768
+ tifffile==2023.12.9
769
+ timm==0.9.16
770
+ tinycss2==1.2.1
771
+ tobler==0.11.2
772
+ tokenizers==0.15.2
773
+ toml==0.10.2
774
+ tomli==2.0.1
775
+ tomlkit==0.12.4
776
+ toolz==0.12.1
777
+ torch==2.1.2
778
+ torchaudio==2.1.2
779
+ torchdata==0.7.1
780
+ torchinfo==1.8.0
781
+ torchmetrics==1.3.2
782
+ torchtext==0.16.2
783
+ torchvision==0.16.2
784
+ tornado==6.3.3
785
+ tqdm==4.66.1
786
+ traceml==1.0.8
787
+ traitlets==5.9.0
788
+ traittypes==0.2.1
789
+ transformers==4.39.3
790
+ treelite-runtime==3.2.0
791
+ treelite==3.2.0
792
+ trl==0.8.6
793
+ truststore==0.8.0
794
+ trx-python==0.2.9
795
+ tsfresh==0.20.2
796
+ typeguard==4.1.5
797
+ typer==0.9.0
798
+ typer==0.9.4
799
+ types-python-dateutil==2.8.19.20240106
800
+ typing-inspect==0.9.0
801
+ typing-utils==0.1.0
802
+ typing_extensions==4.9.0
803
+ tyro==0.8.4
804
+ tzdata==2023.4
805
+ uc-micro-py==1.0.3
806
+ ucx-py==0.33.0
807
+ ujson==5.9.0
808
+ umap-learn==0.5.6
809
+ unicodedata2==15.1.0
810
+ update-checker==0.18.0
811
+ uri-template==1.3.0
812
+ uritemplate==3.0.1
813
+ urllib3==1.26.18
814
+ urllib3==2.1.0
815
+ urwid==2.6.10
816
+ urwid_readline==0.14
817
+ uvicorn==0.25.0
818
+ uvloop==0.19.0
819
+ vaex-astro==0.9.3
820
+ vaex-core==4.17.1
821
+ vaex-hdf5==0.14.1
822
+ vaex-jupyter==0.8.2
823
+ vaex-ml==0.18.3
824
+ vaex-server==0.9.0
825
+ vaex-viz==0.5.4
826
+ vaex==4.17.0
827
+ vec_noise==1.1.4
828
+ vecstack==0.4.0
829
+ virtualenv==20.21.0
830
+ visions==0.7.5
831
+ vowpalwabbit==9.9.0
832
+ vtk==9.3.0
833
+ wandb==0.16.6
834
+ wasabi==1.1.2
835
+ watchfiles==0.21.0
836
+ wavio==0.0.8
837
+ wcwidth==0.2.13
838
+ weasel==0.3.4
839
+ webcolors==1.13
840
+ webencodings==0.5.1
841
+ websocket-client==1.7.0
842
+ websockets==12.0
843
+ wfdb==4.1.2
844
+ whatthepatch==1.0.5
845
+ wheel==0.42.0
846
+ widgetsnbextension==3.6.6
847
+ witwidget==1.8.1
848
+ woodwork==0.30.0
849
+ wordcloud==1.9.3
850
+ wordsegment==1.3.1
851
+ wrapt==1.14.1
852
+ xarray-einstats==0.7.0
853
+ xarray==2024.3.0
854
+ xgboost==2.0.3
855
+ xvfbwrapper==0.2.9
856
+ xxhash==3.4.1
857
+ xyzservices==2024.4.0
858
+ y-py==0.6.2
859
+ yapf==0.40.2
860
+ yarl==1.9.3
861
+ yarl==1.9.4
862
+ ydata-profiling==4.6.4
863
+ yellowbrick==1.5
864
+ ypy-websocket==0.8.4
865
+ zict==3.0.0
866
+ zipp==3.17.0
867
+ zstandard==0.22.0
wandb/run-20240522_054348-vgrzs6jq/files/wandb-metadata.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.133+-x86_64-with-glibc2.31",
3
+ "python": "3.10.13",
4
+ "heartbeatAt": "2024-05-22T05:43:48.890724",
5
+ "startedAt": "2024-05-22T05:43:48.627495",
6
+ "docker": null,
7
+ "cuda": null,
8
+ "args": [
9
+ "-f",
10
+ "/tmp/tmpem7pu0hu.json",
11
+ "--HistoryManager.hist_file=:memory:"
12
+ ],
13
+ "state": "running",
14
+ "program": "<python with no main file>",
15
+ "codePathLocal": null,
16
+ "host": "92dcc4555414",
17
+ "username": "root",
18
+ "executable": "/opt/conda/bin/python",
19
+ "cpu_count": 2,
20
+ "cpu_count_logical": 4,
21
+ "cpu_freq": {
22
+ "current": 2000.194,
23
+ "min": 0.0,
24
+ "max": 0.0
25
+ },
26
+ "cpu_freq_per_core": [
27
+ {
28
+ "current": 2000.194,
29
+ "min": 0.0,
30
+ "max": 0.0
31
+ },
32
+ {
33
+ "current": 2000.194,
34
+ "min": 0.0,
35
+ "max": 0.0
36
+ },
37
+ {
38
+ "current": 2000.194,
39
+ "min": 0.0,
40
+ "max": 0.0
41
+ },
42
+ {
43
+ "current": 2000.194,
44
+ "min": 0.0,
45
+ "max": 0.0
46
+ }
47
+ ],
48
+ "disk": {
49
+ "/": {
50
+ "total": 8062.387607574463,
51
+ "used": 5598.47790145874
52
+ }
53
+ },
54
+ "gpu": "Tesla T4",
55
+ "gpu_count": 2,
56
+ "gpu_devices": [
57
+ {
58
+ "name": "Tesla T4",
59
+ "memory_total": 16106127360
60
+ },
61
+ {
62
+ "name": "Tesla T4",
63
+ "memory_total": 16106127360
64
+ }
65
+ ],
66
+ "memory": {
67
+ "total": 31.357555389404297
68
+ }
69
+ }
wandb/run-20240522_054348-vgrzs6jq/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train/loss": 0.5057, "train/grad_norm": 6.874316215515137, "train/learning_rate": 1.0444444444444445e-05, "train/rewards/chosen": -4.634873390197754, "train/rewards/rejected": -9.829792976379395, "train/rewards/accuracies": 0.875, "train/rewards/margins": 5.194918632507324, "train/logps/rejected": -3123.00830078125, "train/logps/chosen": -1652.506591796875, "train/logits/rejected": -4.661086082458496, "train/logits/chosen": -4.734623908996582, "train/epoch": 0.06, "train/global_step": 100, "_timestamp": 1716364238.0474954, "_runtime": 7609.412012338638, "_step": 10, "eval/loss": 0.4624544084072113, "eval/runtime": 5068.444, "eval/samples_per_second": 0.355, "eval/steps_per_second": 0.355, "eval/rewards/chosen": -5.165964603424072, "eval/rewards/rejected": -10.169805526733398, "eval/rewards/accuracies": 0.8722931742668152, "eval/rewards/margins": 5.003841876983643, "eval/logps/rejected": -3300.483154296875, "eval/logps/chosen": -2120.26904296875, "eval/logits/rejected": -4.54031229019165, "eval/logits/chosen": -4.554856777191162}
wandb/run-20240522_054348-vgrzs6jq/logs/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20240522_054348-vgrzs6jq/logs/debug.log ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-05-22 05:43:48,628 INFO MainThread:24 [wandb_setup.py:_flush():76] Current SDK version is 0.16.6
2
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Configure stats pid to 24
3
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Loading settings from /root/.config/wandb/settings
4
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Loading settings from /kaggle/working/wandb/settings
5
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program': '<python with no main file>'}
7
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
8
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Applying login settings: {'api_key': '***REDACTED***'}
9
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_setup.py:_flush():76] Applying login settings: {}
10
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:_log_setup():521] Logging user logs to /kaggle/working/wandb/run-20240522_054348-vgrzs6jq/logs/debug.log
11
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:_log_setup():522] Logging internal logs to /kaggle/working/wandb/run-20240522_054348-vgrzs6jq/logs/debug-internal.log
12
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:init():561] calling init triggers
13
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:init():568] wandb.init called with sweep_config: {}
14
+ config: {}
15
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:init():611] starting backend
16
+ 2024-05-22 05:43:48,629 INFO MainThread:24 [wandb_init.py:init():615] setting up manager
17
+ 2024-05-22 05:43:48,632 INFO MainThread:24 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
18
+ 2024-05-22 05:43:48,635 INFO MainThread:24 [wandb_init.py:init():623] backend started and connected
19
+ 2024-05-22 05:43:48,638 INFO MainThread:24 [wandb_init.py:init():715] updated telemetry
20
+ 2024-05-22 05:43:48,641 INFO MainThread:24 [wandb_init.py:init():748] communicating run to backend with 90.0 second timeout
21
+ 2024-05-22 05:43:48,764 INFO MainThread:24 [wandb_run.py:_on_init():2357] communicating current version
22
+ 2024-05-22 05:43:48,850 INFO MainThread:24 [wandb_run.py:_on_init():2366] got version response upgrade_message: "wandb version 0.17.0 is available! To upgrade, please run:\n $ pip install wandb --upgrade"
23
+
24
+ 2024-05-22 05:43:48,851 INFO MainThread:24 [wandb_init.py:init():799] starting run threads in backend
25
+ 2024-05-22 05:44:04,914 INFO MainThread:24 [wandb_run.py:_console_start():2335] atexit reg
26
+ 2024-05-22 05:44:04,914 INFO MainThread:24 [wandb_run.py:_redirect():2190] redirect: wrap_raw
27
+ 2024-05-22 05:44:04,915 INFO MainThread:24 [wandb_run.py:_redirect():2255] Wrapping output streams.
28
+ 2024-05-22 05:44:04,915 INFO MainThread:24 [wandb_run.py:_redirect():2280] Redirects installed.
29
+ 2024-05-22 05:44:04,916 INFO MainThread:24 [wandb_init.py:init():842] run started, returning control to user process
30
+ 2024-05-22 05:47:10,600 INFO MainThread:24 [wandb_run.py:_config_callback():1347] config_cb None None {'vocab_size': 32000, 'max_position_embeddings': 4096, 'hidden_size': 4096, 'intermediate_size': 11008, 'num_hidden_layers': 32, 'num_attention_heads': 32, 'num_key_value_heads': 32, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': False, 'rope_theta': 10000.0, 'rope_scaling': None, 'attention_bias': False, 'attention_dropout': 0.0, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': False, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 1, 'pad_token_id': None, 'eos_token_id': 2, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'meta-llama/Llama-2-7b-chat-hf', 'transformers_version': '4.39.3', 'model_type': 'llama', 'quantization_config': {'quant_method': 'QuantizationMethod.BITS_AND_BYTES', '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': False, 'bnb_4bit_compute_dtype': 'float16', 'bnb_4bit_quant_storage': 'uint8', 'load_in_4bit': True, 'load_in_8bit': False}, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'cosine', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.1, 'warmup_steps': 0, 'log_level': 'info', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/May22_05-46-00_92dcc4555414', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 10, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 100, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 100, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': False, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'paged_adamw_32bit', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'VanCan23/DPO_Vietnamese_chatbot_lessData', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': {'use_reentrant': False}, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None}
wandb/run-20240522_054348-vgrzs6jq/run-vgrzs6jq.wandb ADDED
Binary file (464 kB). View file