Yhyu13 commited on Jan 21

Commit

d0574d4

•

1 Parent(s): 4891550

Upload

Browse files

Files changed (20) hide show

Predict_50/all_results.json +9 -0
Predict_50/generated_predictions.jsonl +50 -0
Predict_50/predict_log.txt +312 -0
Predict_50/predict_results.json +9 -0
README.md +73 -0
adapter_config.json +26 -0
adapter_model.safetensors +3 -0
added_tokens.json +3 -0
all_results.json +11 -0
eval_results.json +7 -0
special_tokens_map.json +30 -0
tokenizer.model +3 -0
tokenizer_config.json +0 -0
train_eval_log.txt +0 -0
train_results.json +7 -0
trainer_log.jsonl +24 -0
trainer_state.json +184 -0
training_args.bin +3 -0
training_eval_loss.png +0 -0
training_loss.png +0 -0

Predict_50/all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "predict_bleu-4": 90.129256,
+    "predict_rouge-1": 92.062058,
+    "predict_rouge-2": 86.382876,
+    "predict_rouge-l": 89.961892,
+    "predict_runtime": 218.7949,
+    "predict_samples_per_second": 0.229,
+    "predict_steps_per_second": 0.229
+}

Predict_50/generated_predictions.jsonl ADDED Viewed

	@@ -0,0 +1,50 @@

+{"label": "I'm sorry, but I don't have the capability to book flights. My current function allows me to get the exchange rate between two currencies. If you need help with that, feel free to ask!", "predict": "I'm sorry, but I don't have the capability to book flights. My current function allows me to get the exchange rate between two currencies. If you need that information, feel free to ask."}
+{"label": "<functioncall> {\"name\": \"get_news_headlines\", \"arguments\": '{\"country\": \"United States\"}'} </functioncall>", "predict": "<functioncall> {\"name\": \"get_news_headlines\", \"arguments\": '{\"country\": \"United States\"}'} </functioncall>"}
+{"label": "Here are the latest news headlines for the United States:\n1. Biden announces new vaccine mandates\n2. Hurricane Ida devastates Louisiana\n3. Apple unveils new iPhone\n4. NASA's Perseverance rover collects first Mars rock sample", "predict": "Here are the latest news headlines for the United States:\n1. Biden announces new vaccine mandates\n2. Hurricane Ida devastates Louisiana\n3. Apple unveils new iPhone\n4. NASA's Perseverance rover collects first Mars rock sample"}
+{"label": "<functioncall> {\"name\": \"get_news_headlines\", \"arguments\": '{\"country\": \"France\"}'} </functioncall>", "predict": "<functioncall> {\"name\": \"get_news_headlines\", \"arguments\": '{\"country\": \"France\"}'} </functioncall>"}
+{"label": "Here are the latest news headlines for France:\n1. France recalls ambassadors to US and Australia\n2. French election: Macron's party braces for tough fight\n3. Louvre Museum to undergo major overhaul\n4. France to offer free birth control to all women under 25", "predict": "Here are the latest news headlines for France:\n1. France recalls ambassadors to US and Australia\n2. French election: Macron's party braces for tough fight\n3. Louvre Museum to undergo major overhaul\n4. France to offer free birth control to all women under 25"}
+{"label": "Of course. How long would you like your password to be? And would you like it to include symbols?", "predict": "Of course, I can help with that. How long would you like your password to be? And would you like it to include symbols?"}
+{"label": "<functioncall> {\"name\": \"generate_password\", \"arguments\": '{\"length\": 12, \"include_symbols\": true}'} </functioncall>", "predict": "<functioncall> {\"name\": \"generate_password\", \"arguments\": '{\"length\": 12, \"include_symbols\": true}'} </functioncall>"}
+{"label": "Here is your new password: 4&7j#9@1Q6*. Please make sure to save it in a secure location.", "predict": "Here is your new password: 4&7j#9@1Q6*. Please make sure to save it in a secure place."}
+{"label": "Of course! How long would you like your password to be? And do you want it to include numbers and special characters?", "predict": "Of course! How long would you like your password to be? And would you like it to include numbers and/or special characters?"}
+{"label": "<functioncall> {\"name\": \"generate_password\", \"arguments\": '{\"length\": 12, \"include_numbers\": true, \"include_special_characters\": true}'} </functioncall>", "predict": "<functioncall> {\"name\": \"generate_password\", \"arguments\": '{\"length\": 12, \"include_numbers\": true, \"include_special_characters\": true}'} </functioncall>"}
+{"label": "Here is your new password: 4#7gB6&9L1!0. Please make sure to save it in a secure place.", "predict": "Here is your new password: 4#7gB6&9L1!0. Please make sure to save it in a secure place."}
+{"label": "Of course, I can help you with that. Please provide me with the list of numbers.", "predict": "Of course, I can help you with that. Please provide me with the list of numbers."}
+{"label": "<functioncall> {\"name\": \"calculate_median\", \"arguments\": '{\"numbers\": [5, 2, 9, 1, 7, 4, 6, 3, 8]}'} </functioncall>", "predict": "<functioncall> {\"name\": \"calculate_median\", \"arguments\": '{\"numbers\": [5, 2, 9, 1, 7, 4, 6, 3, 8]}'} </functioncall>"}
+{"label": "The median of the provided list of numbers is 5.", "predict": "The median of the numbers you provided is 5."}
+{"label": "Of course, I can help you with that. Let me calculate it for you.\n<functioncall> {\"name\": \"calculate_loan_payment\", \"arguments\": '{\"principal\": 50000, \"interest_rate\": 5, \"loan_term\": 10}'} </functioncall>", "predict": "<functioncall> {\"name\": \"calculate_loan_payment\", \"arguments\": '{\"principal\": 50000, \"interest_rate\": 5, \"loan_term\": 10}'} </functioncall>"}
+{"label": "Based on the information provided, your monthly loan payment would be $530.33.", "predict": "The monthly payment for your loan would be approximately $530.33."}
+{"label": "I'm sorry, but as an AI, I don't have the capability to book flights or perform external tasks. My current function is to calculate loan payments. I recommend using a travel booking website or app for your flight booking needs.", "predict": "I'm sorry, but as an AI, I don't have the capability to book flights or perform external tasks. My current functions are limited to the ones provided to me. In this case, I can only calculate loan payments."}
+{"label": "<functioncall> {\"name\": \"convert_temperature\", \"arguments\": '{\"temperature\": 30, \"from_unit\": \"Celsius\", \"to_unit\": \"Fahrenheit\"}'} </functioncall>", "predict": "<functioncall> {\"name\": \"convert_temperature\", \"arguments\": '{\"temperature\": 30, \"from_unit\": \"Celsius\", \"to_unit\": \"Fahrenheit\"}'} </functioncall>"}
+{"label": "The converted temperature from 30 degrees Celsius to Fahrenheit is 86 degrees Fahrenheit.", "predict": "The converted temperature from 30 degrees Celsius to Fahrenheit is 86 degrees Fahrenheit."}
+{"label": "I'm sorry, but I can't assist with that. My current capabilities allow me to provide information about movies based on their titles. I don't have the ability to place orders or perform external tasks.", "predict": "I'm sorry, but I don't have the capability to perform external tasks such as ordering a pizza. My current function allows me to retrieve movie details based on the title provided. If you need more information about a movie, feel free to ask!"}
+{"label": "Sure, let me calculate that for you.\n<functioncall> {\"name\": \"calculate_tip\", \"arguments\": '{\"bill_amount\": 50, \"tip_percentage\": 20}'} </functioncall>", "predict": "<functioncall> {\"name\": \"calculate_tip\", \"arguments\": '{\"bill_amount\": 50, \"tip_percentage\": 20}'} </functioncall>"}
+{"label": "Based on the bill amount and the tip percentage you provided, the tip you should leave is $10.", "predict": "Based on the bill amount and the tip percentage you provided, the tip you should leave is $10."}
+{"label": "<functioncall> {\"name\": \"create_contact\", \"arguments\": '{\"name\": \"John Doe\", \"email\": \"johndoe@example.com\"}'} </functioncall>", "predict": "<functioncall> {\"name\": \"create_contact\", \"arguments\": '{\"name\": \"John Doe\", \"email\": \"johndoe@example.com\"}'} </functioncall>"}
+{"label": "I have successfully created a new contact for your friend John Doe with the email johndoe@example.com.", "predict": "The contact for John Doe with email johndoe@example.com has been created successfully."}
+{"label": "Of course, I can help with that. Could you please provide me with the title of the event, and the start and end times?", "predict": "Of course! I can help with that. Please provide me with the title of the event, the start time, and the end time in the format YYYY-MM-DD HH:MM."}
+{"label": "<functioncall> {\"name\": \"create_calendar_event\", \"arguments\": '{\n  \"title\": \"Team Meeting\",\n  \"start_time\": \"2022-03-15 10:00\",\n  \"end_time\": \"2022-03-15 11:00\"\n}'} </functioncall>", "predict": "<functioncall> {\"name\": \"create_calendar_event\", \"arguments\": '{\"title\": \"Team Meeting\", \"start_time\": \"2022-09-23 10:00\", \"end_time\": \"2022-09-23 11:00\"}'} </functioncall>"}
+{"label": "The event 'Team Meeting' has been successfully created for the time slot 10:00 AM to 11:00 AM on March 15, 2022.", "predict": "The event 'Team Meeting' has been successfully created for the time slot 10:00 AM to 11:00 AM on 2022-03-15."}
+{"label": "I'm sorry, but I'm unable to send emails as it's an external task and I don't have a function provided for that task. However, I can assist you in creating calendar events.", "predict": "I'm sorry, but as an AI, I don't have the capability to send emails. My current functions allow me to create calendar events, but I can't perform external tasks like sending emails. You might want to use your email client for that."}
+{"label": "<functioncall> {\"name\": \"search_movie\", \"arguments\": '{\"title\": \"Inception\"}'} </functioncall>", "predict": "<functioncall> {\"name\": \"search_movie\", \"arguments\": '{\"title\": \"Inception\"}'} </functioncall>"}
+{"label": "The movie \"Inception\" was directed by Christopher Nolan and was released in the year 2010. It falls under the genre of Science fiction and has a rating of 8.8/10.", "predict": "The movie \"Inception\" is a science fiction film directed by Christopher Nolan. It was released in 2010 and has a rating of 8.8 out of 10."}
+{"label": "<functioncall> {\"name\": \"search_movie\", \"arguments\": '{\"title\": \"The Godfather\"}'} </functioncall>", "predict": "<functioncall> {\"name\": \"search_movie\", \"arguments\": '{\"title\": \"The Godfather\"}'} </functioncall>"}
+{"label": "The movie \"The Godfather\" was directed by Francis Ford Coppola and was released in the year 1972. It falls under the genre of Crime and Drama and has a rating of 9.2/10.", "predict": "The movie \"The Godfather\" was directed by Francis Ford Coppola and was released in the year 1972. It falls under the genre of Crime, Drama and has a rating of 9.2/10."}
+{"label": "<functioncall> {\"name\": \"get_stock_price\", \"arguments\": '{\"stock_symbol\": \"AAPL\"}'} </functioncall>", "predict": "<functioncall> {\"name\": \"get_stock_price\", \"arguments\": '{\"stock_symbol\": \"AAPL\"}'} </functioncall>"}
+{"label": "The current price of Apple's stock (AAPL) is $150.75.", "predict": "The current price of Apple's stock (AAPL) is $150.75."}
+{"label": "<functioncall> {\"name\": \"get_stock_price\", \"arguments\": '{\"stock_symbol\": \"MSFT\"}'} </functioncall>", "predict": "<functioncall> {\"name\": \"get_stock_price\", \"arguments\": '{\"stock_symbol\": \"MSFT\"}'} </functioncall>"}
+{"label": "The current price of Microsoft's stock (MSFT) is $210.22.", "predict": "The current price of Microsoft's stock (MSFT) is $210.22."}
+{"label": "You're welcome! If you have any other questions, feel free to ask.", "predict": "You're welcome! If you have any other questions, feel free to ask."}
+{"label": "Of course, I'd be happy to help. Could you please provide me with the total loan amount, the annual interest rate, and the duration of the loan in months?", "predict": "Of course, I'd be happy to help. Could you please provide me with the total loan amount, the annual interest rate, and the duration of the loan in months?"}
+{"label": "<functioncall> {\"name\": \"calculate_loan_payment\", \"arguments\": '{\"loan_amount\": 200000, \"interest_rate\": 3.5, \"loan_term\": 360}'} </functioncall>", "predict": "<functioncall> {\"name\": \"calculate_loan_payment\", \"arguments\": '{\"loan_amount\": 200000, \"interest_rate\": 3.5, \"loan_term\": 30*12}'} </functioncall>"}
+{"label": "Based on the information you provided, your estimated monthly payment for this loan would be approximately $898.09.", "predict": "Based on the information you provided, your monthly payment for this loan would be approximately $898.09."}
+{"label": "I'm sorry, but I'm unable to perform external tasks such as ordering a pizza. My current function allows me to fetch the latest news headlines for you. If you need any information regarding that, feel free to ask.", "predict": "I'm sorry, but I don't have the capability to order a pizza as it's an external task and I don't have a function provided for that task. I can assist you with the latest news headlines if you'd like."}
+{"label": "<functioncall> {\"name\": \"calculate_tip\", \"arguments\": '{\"bill_amount\": 50, \"tip_percentage\": 15}'} </functioncall>", "predict": "<functioncall> {\"name\": \"calculate_tip\", \"arguments\": '{\"bill_amount\": 50, \"tip_percentage\": 15}'} </functioncall>"}
+{"label": "Based on the bill amount and the tip percentage you provided, the tip amount you should give is $7.5.", "predict": "Based on your bill amount and the tip percentage you want to give, the tip amount you should give is $7.5."}
+{"label": "You're welcome! If you have any other questions, feel free to ask.", "predict": "You're welcome! If you have any other questions, feel free to ask."}
+{"label": "<functioncall> {\"name\": \"generate_random_number\", \"arguments\": '{\"min\": 1, \"max\": 100}'} </functioncall>", "predict": "<functioncall> {\"name\": \"generate_random_number\", \"arguments\": '{\"min\": 1, \"max\": 100}'} </functioncall>"}
+{"label": "The random number generated between 1 and 100 is 42.", "predict": "The random number generated between 1 and 100 is 42."}
+{"label": "<functioncall> {\"name\": \"generate_random_number\", \"arguments\": '{\"min\": 200, \"max\": 300}'} </functioncall>", "predict": "<functioncall> {\"name\": \"generate_random_number\", \"arguments\": '{\"min\": 200, \"max\": 300}'} </functioncall>"}
+{"label": "The random number generated between 200 and 300 is 278.", "predict": "The random number generated between 200 and 300 is 278."}
+{"label": "You're welcome! If you need anything else, feel free to ask.", "predict": "You're welcome! If you need anything else, feel free to ask."}
+{"label": "<functioncall> {\"name\": \"search_books\", \"arguments\": '{\"author\": \"George Orwell\"}'} </functioncall>", "predict": "<functioncall> {\"name\": \"search_books\", \"arguments\": '{\"author\": \"George Orwell\"}'} </functioncall>"}

Predict_50/predict_log.txt ADDED Viewed

	@@ -0,0 +1,312 @@

+01/21/2024 14:10:45 - WARNING - llmtuner.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+[INFO|training_args.py:1838] 2024-01-21 14:10:45,405 >> PyTorch: setting up devices
+/home/hangyu5/anaconda3/envs/llama_factory/lib/python3.11/site-packages/transformers/training_args.py:1751: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead.
+  warnings.warn(
+01/21/2024 14:10:45 - INFO - llmtuner.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1
+  distributed training: True, compute dtype: None
+01/21/2024 14:10:45 - INFO - llmtuner.hparams.parser - Training/evaluation parameters Seq2SeqTrainingArguments(
+_n_gpu=1,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_persistent_workers=False,
+dataloader_pin_memory=True,
+ddp_backend=None,
+ddp_broadcast_buffers=None,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=False,
+ddp_timeout=1800,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+dispatch_batches=None,
+do_eval=False,
+do_predict=True,
+do_train=False,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_steps=None,
+evaluation_strategy=IntervalStrategy.NO,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+generation_config=None,
+generation_max_length=None,
+generation_num_beams=None,
+gradient_accumulation_steps=1,
+gradient_checkpointing=False,
+gradient_checkpointing_kwargs=None,
+greater_is_better=None,
+group_by_length=False,
+half_precision_backend=auto,
+hub_always_push=False,
+hub_model_id=None,
+hub_private_repo=False,
+hub_strategy=HubStrategy.EVERY_SAVE,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+include_num_input_tokens_seen=False,
+include_tokens_per_second=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=5e-05,
+length_column_name=length,
+load_best_model_at_end=False,
+local_rank=0,
+log_level=passive,
+log_level_replica=warning,
+log_on_each_node=True,
+logging_dir=./models/sft/dolphin-2.6-mistral-7b-dpo-laser-sft-glaive-function-calling-v2-ep1-lora/Predict_50/runs/Jan21_14-10-45_yhyu13fuwuqi,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=500,
+logging_strategy=IntervalStrategy.STEPS,
+lr_scheduler_kwargs={},
+lr_scheduler_type=SchedulerType.LINEAR,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+neftune_noise_alpha=None,
+no_cuda=False,
+num_train_epochs=3.0,
+optim=OptimizerNames.ADAMW_TORCH,
+optim_args=None,
+output_dir=./models/sft/dolphin-2.6-mistral-7b-dpo-laser-sft-glaive-function-calling-v2-ep1-lora/Predict_50,
+overwrite_output_dir=False,
+past_index=-1,
+per_device_eval_batch_size=1,
+per_device_train_batch_size=8,
+predict_with_generate=True,
+prediction_loss_only=False,
+push_to_hub=False,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard'],
+resume_from_checkpoint=None,
+run_name=./models/sft/dolphin-2.6-mistral-7b-dpo-laser-sft-glaive-function-calling-v2-ep1-lora/Predict_50,
+save_on_each_node=False,
+save_only_model=False,
+save_safetensors=True,
+save_steps=500,
+save_strategy=IntervalStrategy.STEPS,
+save_total_limit=None,
+seed=42,
+skip_memory_metrics=True,
+sortish_sampler=False,
+split_batches=False,
+tf32=None,
+torch_compile=False,
+torch_compile_backend=None,
+torch_compile_mode=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_cpu=False,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+use_mps_device=False,
+warmup_ratio=0.0,
+warmup_steps=0,
+weight_decay=0.0,
+)
+[INFO|tokenization_utils_base.py:2024] 2024-01-21 14:10:45,433 >> loading file tokenizer.model
+[INFO|tokenization_utils_base.py:2024] 2024-01-21 14:10:45,433 >> loading file added_tokens.json
+[INFO|tokenization_utils_base.py:2024] 2024-01-21 14:10:45,433 >> loading file special_tokens_map.json
+[INFO|tokenization_utils_base.py:2024] 2024-01-21 14:10:45,433 >> loading file tokenizer_config.json
+[INFO|tokenization_utils_base.py:2024] 2024-01-21 14:10:45,433 >> loading file tokenizer.json
+[INFO|configuration_utils.py:737] 2024-01-21 14:10:45,521 >> loading configuration file cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser/config.json
+[INFO|configuration_utils.py:802] 2024-01-21 14:10:45,522 >> Model config MistralConfig {
+  "_name_or_path": "cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser",
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 32768,
+  "model_type": "mistral",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 10000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.36.2",
+  "use_cache": false,
+  "vocab_size": 32001
+}
+[INFO|modeling_utils.py:3341] 2024-01-21 14:10:45,538 >> loading weights file cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser/model.safetensors.index.json
+[INFO|modeling_utils.py:1341] 2024-01-21 14:10:45,538 >> Instantiating MistralForCausalLM model under default dtype torch.bfloat16.
+[INFO|configuration_utils.py:826] 2024-01-21 14:10:45,539 >> Generate config GenerationConfig {
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "use_cache": false
+}
+Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]
+Loading checkpoint shards:  33%|███▎      | 1/3 [00:00<00:00,  5.99it/s]
+Loading checkpoint shards:  67%|██████▋   | 2/3 [00:00<00:00,  5.87it/s]cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser
+Loading checkpoint shards: 100%|██████████| 3/3 [00:00<00:00,  5.91it/s]
+Loading checkpoint shards: 100%|██████████| 3/3 [00:00<00:00,  5.91it/s]cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser
+[INFO|modeling_utils.py:4185] 2024-01-21 14:10:46,324 >> All model checkpoint weights were used when initializing MistralForCausalLM.
+[INFO|modeling_utils.py:4193] 2024-01-21 14:10:46,324 >> All the weights of MistralForCausalLM were initialized from the model checkpoint at ./models/dolphin-2.6-mistral-7b-dpo-laser.
+If your task is similar to the task the model of the checkpoint was trained on, you can already use MistralForCausalLM for predictions without further training.
+[INFO|configuration_utils.py:779] 2024-01-21 14:10:46,327 >> loading configuration file ./models/dolphin-2.6-mistral-7b-dpo-laser/generation_config.json
+[INFO|configuration_utils.py:826] 2024-01-21 14:10:46,327 >> Generate config GenerationConfig {
+  "bos_token_id": 1,
+  "eos_token_id": 2
+}
+01/21/2024 14:10:46 - INFO - llmtuner.model.adapter - Fine-tuning method: LoRA
+01/21/2024 14:10:54 - INFO - llmtuner.model.adapter - Merged 1 adapter(s).
+01/21/2024 14:10:54 - INFO - llmtuner.model.adapter - Loaded adapter(s): ./models/sft/dolphin-2.6-mistral-7b-dpo-laser-sft-glaive-function-calling-v2-ep1-lora
+01/21/2024 14:10:54 - INFO - llmtuner.model.loader - trainable params: 0 || all params: 7241740288 || trainable%: 0.0000
+01/21/2024 14:10:54 - INFO - llmtuner.model.loader - This IS expected that the trainable params is 0 if you are using model for inference only.
+01/21/2024 14:10:54 - WARNING - llmtuner.data.utils - Checksum failed: missing SHA-1 hash value in dataset_info.json.
+Using custom data configuration default-f901fb7e685ba757
+Loading Dataset Infos from /home/hangyu5/anaconda3/envs/llama_factory/lib/python3.11/site-packages/datasets/packaged_modules/json
+Overwrite dataset info from restored data version if exists.
+Loading Dataset info from /home/hangyu5/.cache/huggingface/datasets/json/default-f901fb7e685ba757/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96
+Found cached dataset json (/home/hangyu5/.cache/huggingface/datasets/json/default-f901fb7e685ba757/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)
+Loading Dataset info from /home/hangyu5/.cache/huggingface/datasets/json/default-f901fb7e685ba757/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96
+Converting format of dataset:   0%|          | 0/50 [00:00<?, ? examples/s]Caching processed dataset at /home/hangyu5/.cache/huggingface/datasets/json/default-f901fb7e685ba757/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-27f6909b28d1bff6.arrow
+Converting format of dataset: 100%|██████████| 50/50 [00:00<00:00, 7038.13 examples/s]
+Running tokenizer on dataset:   0%|          | 0/50 [00:00<?, ? examples/s]Caching processed dataset at /home/hangyu5/.cache/huggingface/datasets/json/default-f901fb7e685ba757/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-33f31fb80585e7d9.arrow
+Running tokenizer on dataset: 100%|██████████| 50/50 [00:00<00:00, 556.47 examples/s]
+[INFO|training_args.py:1838] 2024-01-21 14:10:55,484 >> PyTorch: setting up devices
+Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
+[INFO|trainer.py:3166] 2024-01-21 14:10:57,294 >> ***** Running Prediction *****
+[INFO|trainer.py:3168] 2024-01-21 14:10:57,294 >>   Num examples = 50
+[INFO|trainer.py:3171] 2024-01-21 14:10:57,294 >>   Batch size = 1
+[INFO|configuration_utils.py:826] 2024-01-21 14:10:57,306 >> Generate config GenerationConfig {
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "use_cache": false
+}
+/home/hangyu5/anaconda3/envs/llama_factory/lib/python3.11/site-packages/transformers/generation/utils.py:1518: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use and modify the model generation configuration (see https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration )
+  warnings.warn(
+input_ids:
+[1, 733, 16289, 28793, 17121, 22526, 28747, 995, 460, 264, 10865, 13892, 395, 2735, 298, 272, 2296, 5572, 28723, 5938, 706, 513, 3030, 387, 13, 28751, 13, 2287, 345, 861, 1264, 345, 527, 28730, 720, 4078, 28730, 6036, 548, 13, 2287, 345, 6518, 1264, 345, 1458, 272, 8877, 4338, 1444, 989, 1191, 951, 20023, 548, 13, 2287, 345, 11438, 1264, 371, 13, 5390, 345, 1123, 1264, 345, 2814, 548, 13, 5390, 345, 10723, 1264, 371, 13, 17422, 345, 2893, 28730, 16714, 1264, 371, 13, 1417, 28705, 345, 1123, 1264, 345, 1427, 548, 13, 1417, 28705, 345, 6518, 1264, 345, 1014, 15547, 298, 6603, 477, 28739, 13, 17422, 1630, 13, 17422, 345, 3731, 28730, 16714, 1264, 371, 13, 1417, 28705, 345, 1123, 1264, 345, 1427, 548, 13, 1417, 28705, 345, 6518, 1264, 345, 1014, 15547, 298, 6603, 298, 28739, 13, 17422, 443, 13, 5390, 1630, 13, 5390, 345, 10893, 1264, 733, 13, 17422, 345, 2893, 28730, 16714, 548, 13, 17422, 345, 3731, 28730, 16714, 28739, 13, 5390, 4709, 13, 2287, 443, 13, 28752, 13, 13, 6325, 368, 1820, 264, 9314, 354, 528, 477, 1450, 2726, 298, 4222, 28804, 733, 28748, 16289, 28793]
+inputs:
+<s>[INST] SYSTEM: You are a helpful assistant with access to the following functions. Use them if required -
+{
+    "name": "get_exchange_rate",
+    "description": "Get the exchange rate between two currencies",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "base_currency": {
+                "type": "string",
+                "description": "The currency to convert from"
+            },
+            "target_currency": {
+                "type": "string",
+                "description": "The currency to convert to"
+            }
+        },
+        "required": [
+            "base_currency",
+            "target_currency"
+        ]
+    }
+}
+Can you book a flight for me from New York to London? [/INST]
+  0%|          | 0/50 [00:00<?, ?it/s]
+  4%|▍         | 2/50 [00:02<00:48,  1.00s/it]
+  6%|▌         | 3/50 [00:08<02:40,  3.42s/it]
+  8%|▊         | 4/50 [00:12<02:44,  3.57s/it]
+ 10%|█         | 5/50 [00:22<04:14,  5.65s/it]
+ 12%|█▏        | 6/50 [00:26<03:42,  5.07s/it]
+ 14%|█▍        | 7/50 [00:31<03:37,  5.07s/it]
+ 16%|█▌        | 8/50 [00:36<03:30,  5.00s/it]
+ 18%|█▊        | 9/50 [00:37<02:46,  4.07s/it]
+ 20%|██        | 10/50 [00:43<03:04,  4.61s/it]
+ 22%|██▏       | 11/50 [00:47<02:50,  4.36s/it]
+ 24%|██▍       | 12/50 [00:48<02:10,  3.42s/it]
+ 26%|██▌       | 13/50 [00:54<02:29,  4.05s/it]
+ 28%|██▊       | 14/50 [00:55<01:57,  3.28s/it]
+ 30%|███       | 15/50 [01:02<02:30,  4.31s/it]
+ 32%|███▏      | 16/50 [01:04<02:05,  3.70s/it]
+ 34%|███▍      | 17/50 [01:11<02:31,  4.59s/it]
+ 36%|███▌      | 18/50 [01:17<02:43,  5.12s/it]
+ 38%|███▊      | 19/50 [01:20<02:19,  4.49s/it]
+ 40%|████      | 20/50 [01:24<02:03,  4.10s/it]
+ 42%|████▏     | 21/50 [01:29<02:14,  4.62s/it]
+ 44%|████▍     | 22/50 [01:33<01:58,  4.23s/it]
+ 46%|████▌     | 23/50 [01:36<01:44,  3.87s/it]
+ 48%|████▊     | 24/50 [01:39<01:32,  3.55s/it]
+ 50%|█████     | 25/50 [01:44<01:39,  3.98s/it]
+ 52%|█████▏    | 26/50 [01:53<02:13,  5.55s/it]
+ 54%|█████▍    | 27/50 [01:59<02:11,  5.71s/it]
+ 56%|█████▌    | 28/50 [02:08<02:28,  6.77s/it]
+ 58%|█████▊    | 29/50 [02:10<01:51,  5.31s/it]
+ 60%|██████    | 30/50 [02:14<01:38,  4.92s/it]
+ 62%|██████▏   | 31/50 [02:18<01:27,  4.60s/it]
+ 64%|██████▍   | 32/50 [02:25<01:35,  5.31s/it]
+ 66%|██████▌   | 33/50 [02:29<01:25,  5.04s/it]
+ 68%|██████▊   | 34/50 [02:32<01:09,  4.35s/it]
+ 70%|███████   | 35/50 [02:36<01:05,  4.40s/it]
+ 72%|███████▏  | 36/50 [02:40<00:56,  4.01s/it]
+ 74%|███████▍  | 37/50 [02:42<00:46,  3.60s/it]
+ 76%|███████▌  | 38/50 [02:47<00:48,  4.04s/it]
+ 78%|███████▊  | 39/50 [02:57<01:02,  5.72s/it]
+ 80%|████████  | 40/50 [03:01<00:53,  5.31s/it]
+ 82%|████████▏ | 41/50 [03:04<00:40,  4.51s/it]
+ 84%|████████▍ | 42/50 [03:07<00:32,  4.08s/it]
+ 86%|████████▌ | 43/50 [03:10<00:27,  3.89s/it]
+ 88%|████████▊ | 44/50 [03:13<00:20,  3.37s/it]
+ 90%|█████████ | 45/50 [03:17<00:18,  3.71s/it]
+ 92%|█████████▏| 46/50 [03:20<00:13,  3.35s/it]
+ 94%|█████████▍| 47/50 [03:25<00:12,  4.01s/it]
+ 96%|█████████▌| 48/50 [03:28<00:07,  3.76s/it]
+ 98%|█████████▊| 49/50 [03:31<00:03,  3.52s/it]
+100%|██████████| 50/50 [03:34<00:00,  3.15s/it]Building prefix dict from the default dictionary ...
+Dumping model to file cache /tmp/jieba.cache
+Loading model cost 0.537 seconds.
+Prefix dict has been built successfully.
+100%|██████████| 50/50 [03:35<00:00,  4.30s/it]
+***** predict metrics *****
+  predict_bleu-4             =    90.1293
+  predict_rouge-1            =    92.0621
+  predict_rouge-2            =    86.3829
+  predict_rouge-l            =    89.9619
+  predict_runtime            = 0:03:38.79
+  predict_samples_per_second =      0.229
+  predict_steps_per_second   =      0.229
+01/21/2024 14:14:36 - INFO - llmtuner.train.sft.trainer - Saving prediction results to ./models/sft/dolphin-2.6-mistral-7b-dpo-laser-sft-glaive-function-calling-v2-ep1-lora/Predict_50/generated_predictions.jsonl

Predict_50/predict_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "predict_bleu-4": 90.129256,
+    "predict_rouge-1": 92.062058,
+    "predict_rouge-2": 86.382876,
+    "predict_rouge-l": 89.961892,
+    "predict_runtime": 218.7949,
+    "predict_samples_per_second": 0.229,
+    "predict_steps_per_second": 0.229
+}

README.md CHANGED Viewed

@@ -1,3 +1,76 @@
 ---
 license: apache-2.0
 ---

 ---
 license: apache-2.0
+library_name: peft
+tags:
+- llama-factory
+- lora
+- generated_from_trainer
+base_model: cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser
+model-index:
+- name: dolphin-2.6-mistral-7b-dpo-laser-sft-glaive-function-calling-v2-ep1-lora
+  results: []
 ---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# dolphin-2.6-mistral-7b-dpo-laser-sft-glaive-function-calling-v2-ep1-lora
+This model is a fine-tuned version of [cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser](https://huggingface.co/cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser) on the https://huggingface.co/datasets/Yhyu13/glaive-function-calling-v2-llama-factory-convert/blob/main/simple-function-calling-v2_converted_5000_with_function_call_only.json dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.0605
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 1
+- eval_batch_size: 1
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 2
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 8
+- total_eval_batch_size: 2
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- num_epochs: 1.0
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 0.2548        | 0.09  | 100  | 0.1148          |
+| 0.1149        | 0.18  | 200  | 0.0914          |
+| 0.0871        | 0.27  | 300  | 0.0831          |
+| 0.0865        | 0.35  | 400  | 0.0760          |
+| 0.0802        | 0.44  | 500  | 0.0718          |
+| 0.0689        | 0.53  | 600  | 0.0702          |
+| 0.0649        | 0.62  | 700  | 0.0649          |
+| 0.0637        | 0.71  | 800  | 0.0632          |
+| 0.0698        | 0.8   | 900  | 0.0619          |
+| 0.0648        | 0.88  | 1000 | 0.0608          |
+| 0.0654        | 0.97  | 1100 | 0.0605          |
+### Framework versions
+- PEFT 0.7.1
+- Transformers 4.36.2
+- Pytorch 2.1.2+cu121
+- Datasets 2.16.1
+- Tokenizers 0.15.0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26a7e657ce6e3f08056d34d3e0ac164fbd7ff505cfb751800cb9fdb081e54491
+size 13648432

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<|im_start|>": 32000
+}

all_results.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+    "epoch": 1.0,
+    "eval_loss": 0.06051653251051903,
+    "eval_runtime": 109.2969,
+    "eval_samples_per_second": 9.204,
+    "eval_steps_per_second": 4.602,
+    "train_loss": 0.09174010732532603,
+    "train_runtime": 4073.2029,
+    "train_samples_per_second": 2.221,
+    "train_steps_per_second": 0.277
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 1.0,
+    "eval_loss": 0.06051653251051903,
+    "eval_runtime": 109.2969,
+    "eval_samples_per_second": 9.204,
+    "eval_steps_per_second": 4.602
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

train_eval_log.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

train_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 1.0,
+    "train_loss": 0.09174010732532603,
+    "train_runtime": 4073.2029,
+    "train_samples_per_second": 2.221,
+    "train_steps_per_second": 0.277
+}

trainer_log.jsonl ADDED Viewed

	@@ -0,0 +1,24 @@

+{"current_steps": 100, "total_steps": 1130, "loss": 0.2548, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.904003870460323e-05, "epoch": 0.09, "percentage": 8.85, "elapsed_time": "0:04:11", "remaining_time": "0:43:07"}
+{"current_steps": 100, "total_steps": 1130, "loss": null, "eval_loss": 0.11478812247514725, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.09, "percentage": 8.85, "elapsed_time": "0:04:11", "remaining_time": "0:43:07"}
+{"current_steps": 200, "total_steps": 1130, "loss": 0.1149, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6233876873505694e-05, "epoch": 0.18, "percentage": 17.7, "elapsed_time": "0:10:15", "remaining_time": "0:47:44"}
+{"current_steps": 200, "total_steps": 1130, "loss": null, "eval_loss": 0.0914301872253418, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.18, "percentage": 17.7, "elapsed_time": "0:10:15", "remaining_time": "0:47:44"}
+{"current_steps": 300, "total_steps": 1130, "loss": 0.0871, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1797019046425264e-05, "epoch": 0.27, "percentage": 26.55, "elapsed_time": "0:16:20", "remaining_time": "0:45:13"}
+{"current_steps": 300, "total_steps": 1130, "loss": null, "eval_loss": 0.08308113366365433, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.27, "percentage": 26.55, "elapsed_time": "0:16:20", "remaining_time": "0:45:13"}
+{"current_steps": 400, "total_steps": 1130, "loss": 0.0865, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.607020216633599e-05, "epoch": 0.35, "percentage": 35.4, "elapsed_time": "0:22:25", "remaining_time": "0:40:55"}
+{"current_steps": 400, "total_steps": 1130, "loss": null, "eval_loss": 0.07597807794809341, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.35, "percentage": 35.4, "elapsed_time": "0:22:25", "remaining_time": "0:40:55"}
+{"current_steps": 500, "total_steps": 1130, "loss": 0.0802, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9493228037294702e-05, "epoch": 0.44, "percentage": 44.25, "elapsed_time": "0:28:31", "remaining_time": "0:35:56"}
+{"current_steps": 500, "total_steps": 1130, "loss": null, "eval_loss": 0.07179657369852066, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.44, "percentage": 44.25, "elapsed_time": "0:28:31", "remaining_time": "0:35:56"}
+{"current_steps": 600, "total_steps": 1130, "loss": 0.0689, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2571187907677853e-05, "epoch": 0.53, "percentage": 53.1, "elapsed_time": "0:34:34", "remaining_time": "0:30:32"}
+{"current_steps": 600, "total_steps": 1130, "loss": null, "eval_loss": 0.0701972097158432, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.53, "percentage": 53.1, "elapsed_time": "0:34:34", "remaining_time": "0:30:32"}
+{"current_steps": 700, "total_steps": 1130, "loss": 0.0649, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.583567302625469e-05, "epoch": 0.62, "percentage": 61.95, "elapsed_time": "0:40:34", "remaining_time": "0:24:55"}
+{"current_steps": 700, "total_steps": 1130, "loss": null, "eval_loss": 0.0648578405380249, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.62, "percentage": 61.95, "elapsed_time": "0:40:34", "remaining_time": "0:24:55"}
+{"current_steps": 800, "total_steps": 1130, "loss": 0.0637, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.803950080284005e-06, "epoch": 0.71, "percentage": 70.8, "elapsed_time": "0:46:39", "remaining_time": "0:19:14"}
+{"current_steps": 800, "total_steps": 1130, "loss": null, "eval_loss": 0.06319624185562134, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.71, "percentage": 70.8, "elapsed_time": "0:46:39", "remaining_time": "0:19:14"}
+{"current_steps": 900, "total_steps": 1130, "loss": 0.0698, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.939236715580884e-06, "epoch": 0.8, "percentage": 79.65, "elapsed_time": "0:52:42", "remaining_time": "0:13:28"}
+{"current_steps": 900, "total_steps": 1130, "loss": null, "eval_loss": 0.061882246285676956, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.8, "percentage": 79.65, "elapsed_time": "0:52:42", "remaining_time": "0:13:28"}
+{"current_steps": 1000, "total_steps": 1130, "loss": 0.0648, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.615127855610496e-06, "epoch": 0.88, "percentage": 88.5, "elapsed_time": "0:58:44", "remaining_time": "0:07:38"}
+{"current_steps": 1000, "total_steps": 1130, "loss": null, "eval_loss": 0.060829270631074905, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.88, "percentage": 88.5, "elapsed_time": "0:58:44", "remaining_time": "0:07:38"}
+{"current_steps": 1100, "total_steps": 1130, "loss": 0.0654, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.690476815339244e-08, "epoch": 0.97, "percentage": 97.35, "elapsed_time": "1:04:46", "remaining_time": "0:01:45"}
+{"current_steps": 1100, "total_steps": 1130, "loss": null, "eval_loss": 0.06053200736641884, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.97, "percentage": 97.35, "elapsed_time": "1:04:46", "remaining_time": "0:01:45"}
+{"current_steps": 1130, "total_steps": 1130, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "1:07:51", "remaining_time": "0:00:00"}
+{"current_steps": 503, "total_steps": 503, "loss": null, "eval_loss": 0.06051653251051903, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "1:09:44", "remaining_time": "0:00:00"}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,184 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9993367234136635,
+  "eval_steps": 100,
+  "global_step": 1130,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.09,
+      "learning_rate": 4.904003870460323e-05,
+      "loss": 0.2548,
+      "step": 100
+    },
+    {
+      "epoch": 0.09,
+      "eval_loss": 0.11478812247514725,
+      "eval_runtime": 109.7793,
+      "eval_samples_per_second": 9.164,
+      "eval_steps_per_second": 4.582,
+      "step": 100
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 4.6233876873505694e-05,
+      "loss": 0.1149,
+      "step": 200
+    },
+    {
+      "epoch": 0.18,
+      "eval_loss": 0.0914301872253418,
+      "eval_runtime": 109.8254,
+      "eval_samples_per_second": 9.16,
+      "eval_steps_per_second": 4.58,
+      "step": 200
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 4.1797019046425264e-05,
+      "loss": 0.0871,
+      "step": 300
+    },
+    {
+      "epoch": 0.27,
+      "eval_loss": 0.08308113366365433,
+      "eval_runtime": 110.073,
+      "eval_samples_per_second": 9.139,
+      "eval_steps_per_second": 4.57,
+      "step": 300
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 3.607020216633599e-05,
+      "loss": 0.0865,
+      "step": 400
+    },
+    {
+      "epoch": 0.35,
+      "eval_loss": 0.07597807794809341,
+      "eval_runtime": 109.949,
+      "eval_samples_per_second": 9.15,
+      "eval_steps_per_second": 4.575,
+      "step": 400
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 2.9493228037294702e-05,
+      "loss": 0.0802,
+      "step": 500
+    },
+    {
+      "epoch": 0.44,
+      "eval_loss": 0.07179657369852066,
+      "eval_runtime": 110.0004,
+      "eval_samples_per_second": 9.145,
+      "eval_steps_per_second": 4.573,
+      "step": 500
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 2.2571187907677853e-05,
+      "loss": 0.0689,
+      "step": 600
+    },
+    {
+      "epoch": 0.53,
+      "eval_loss": 0.0701972097158432,
+      "eval_runtime": 109.5978,
+      "eval_samples_per_second": 9.179,
+      "eval_steps_per_second": 4.59,
+      "step": 600
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 1.583567302625469e-05,
+      "loss": 0.0649,
+      "step": 700
+    },
+    {
+      "epoch": 0.62,
+      "eval_loss": 0.0648578405380249,
+      "eval_runtime": 109.8826,
+      "eval_samples_per_second": 9.155,
+      "eval_steps_per_second": 4.578,
+      "step": 700
+    },
+    {
+      "epoch": 0.71,
+      "learning_rate": 9.803950080284005e-06,
+      "loss": 0.0637,
+      "step": 800
+    },
+    {
+      "epoch": 0.71,
+      "eval_loss": 0.06319624185562134,
+      "eval_runtime": 109.8291,
+      "eval_samples_per_second": 9.16,
+      "eval_steps_per_second": 4.58,
+      "step": 800
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 4.939236715580884e-06,
+      "loss": 0.0698,
+      "step": 900
+    },
+    {
+      "epoch": 0.8,
+      "eval_loss": 0.061882246285676956,
+      "eval_runtime": 109.8313,
+      "eval_samples_per_second": 9.16,
+      "eval_steps_per_second": 4.58,
+      "step": 900
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 1.615127855610496e-06,
+      "loss": 0.0648,
+      "step": 1000
+    },
+    {
+      "epoch": 0.88,
+      "eval_loss": 0.060829270631074905,
+      "eval_runtime": 109.3936,
+      "eval_samples_per_second": 9.196,
+      "eval_steps_per_second": 4.598,
+      "step": 1000
+    },
+    {
+      "epoch": 0.97,
+      "learning_rate": 8.690476815339244e-08,
+      "loss": 0.0654,
+      "step": 1100
+    },
+    {
+      "epoch": 0.97,
+      "eval_loss": 0.06053200736641884,
+      "eval_runtime": 109.4208,
+      "eval_samples_per_second": 9.194,
+      "eval_steps_per_second": 4.597,
+      "step": 1100
+    },
+    {
+      "epoch": 1.0,
+      "step": 1130,
+      "total_flos": 1.4019616490441933e+17,
+      "train_loss": 0.09174010732532603,
+      "train_runtime": 4073.2029,
+      "train_samples_per_second": 2.221,
+      "train_steps_per_second": 0.277
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 1130,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 100,
+  "total_flos": 1.4019616490441933e+17,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64729ffa529a85cd7112b11d8207aa70ac9cdece464413d61f307c245695dbb0
+size 4984

training_eval_loss.png ADDED Viewed

training_loss.png ADDED Viewed