Upload
Browse files- Predict_50/all_results.json +9 -0
- Predict_50/generated_predictions.jsonl +50 -0
- Predict_50/predict_log.txt +312 -0
- Predict_50/predict_results.json +9 -0
- README.md +73 -0
- adapter_config.json +26 -0
- adapter_model.safetensors +3 -0
- added_tokens.json +3 -0
- all_results.json +11 -0
- eval_results.json +7 -0
- special_tokens_map.json +30 -0
- tokenizer.model +3 -0
- tokenizer_config.json +0 -0
- train_eval_log.txt +0 -0
- train_results.json +7 -0
- trainer_log.jsonl +24 -0
- trainer_state.json +184 -0
- training_args.bin +3 -0
- training_eval_loss.png +0 -0
- training_loss.png +0 -0
Predict_50/all_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"predict_bleu-4": 90.129256,
|
3 |
+
"predict_rouge-1": 92.062058,
|
4 |
+
"predict_rouge-2": 86.382876,
|
5 |
+
"predict_rouge-l": 89.961892,
|
6 |
+
"predict_runtime": 218.7949,
|
7 |
+
"predict_samples_per_second": 0.229,
|
8 |
+
"predict_steps_per_second": 0.229
|
9 |
+
}
|
Predict_50/generated_predictions.jsonl
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"label": "I'm sorry, but I don't have the capability to book flights. My current function allows me to get the exchange rate between two currencies. If you need help with that, feel free to ask!", "predict": "I'm sorry, but I don't have the capability to book flights. My current function allows me to get the exchange rate between two currencies. If you need that information, feel free to ask."}
|
2 |
+
{"label": "<functioncall> {\"name\": \"get_news_headlines\", \"arguments\": '{\"country\": \"United States\"}'} </functioncall>", "predict": "<functioncall> {\"name\": \"get_news_headlines\", \"arguments\": '{\"country\": \"United States\"}'} </functioncall>"}
|
3 |
+
{"label": "Here are the latest news headlines for the United States:\n1. Biden announces new vaccine mandates\n2. Hurricane Ida devastates Louisiana\n3. Apple unveils new iPhone\n4. NASA's Perseverance rover collects first Mars rock sample", "predict": "Here are the latest news headlines for the United States:\n1. Biden announces new vaccine mandates\n2. Hurricane Ida devastates Louisiana\n3. Apple unveils new iPhone\n4. NASA's Perseverance rover collects first Mars rock sample"}
|
4 |
+
{"label": "<functioncall> {\"name\": \"get_news_headlines\", \"arguments\": '{\"country\": \"France\"}'} </functioncall>", "predict": "<functioncall> {\"name\": \"get_news_headlines\", \"arguments\": '{\"country\": \"France\"}'} </functioncall>"}
|
5 |
+
{"label": "Here are the latest news headlines for France:\n1. France recalls ambassadors to US and Australia\n2. French election: Macron's party braces for tough fight\n3. Louvre Museum to undergo major overhaul\n4. France to offer free birth control to all women under 25", "predict": "Here are the latest news headlines for France:\n1. France recalls ambassadors to US and Australia\n2. French election: Macron's party braces for tough fight\n3. Louvre Museum to undergo major overhaul\n4. France to offer free birth control to all women under 25"}
|
6 |
+
{"label": "Of course. How long would you like your password to be? And would you like it to include symbols?", "predict": "Of course, I can help with that. How long would you like your password to be? And would you like it to include symbols?"}
|
7 |
+
{"label": "<functioncall> {\"name\": \"generate_password\", \"arguments\": '{\"length\": 12, \"include_symbols\": true}'} </functioncall>", "predict": "<functioncall> {\"name\": \"generate_password\", \"arguments\": '{\"length\": 12, \"include_symbols\": true}'} </functioncall>"}
|
8 |
+
{"label": "Here is your new password: 4&7j#9@1Q6*. Please make sure to save it in a secure location.", "predict": "Here is your new password: 4&7j#9@1Q6*. Please make sure to save it in a secure place."}
|
9 |
+
{"label": "Of course! How long would you like your password to be? And do you want it to include numbers and special characters?", "predict": "Of course! How long would you like your password to be? And would you like it to include numbers and/or special characters?"}
|
10 |
+
{"label": "<functioncall> {\"name\": \"generate_password\", \"arguments\": '{\"length\": 12, \"include_numbers\": true, \"include_special_characters\": true}'} </functioncall>", "predict": "<functioncall> {\"name\": \"generate_password\", \"arguments\": '{\"length\": 12, \"include_numbers\": true, \"include_special_characters\": true}'} </functioncall>"}
|
11 |
+
{"label": "Here is your new password: 4#7gB6&9L1!0. Please make sure to save it in a secure place.", "predict": "Here is your new password: 4#7gB6&9L1!0. Please make sure to save it in a secure place."}
|
12 |
+
{"label": "Of course, I can help you with that. Please provide me with the list of numbers.", "predict": "Of course, I can help you with that. Please provide me with the list of numbers."}
|
13 |
+
{"label": "<functioncall> {\"name\": \"calculate_median\", \"arguments\": '{\"numbers\": [5, 2, 9, 1, 7, 4, 6, 3, 8]}'} </functioncall>", "predict": "<functioncall> {\"name\": \"calculate_median\", \"arguments\": '{\"numbers\": [5, 2, 9, 1, 7, 4, 6, 3, 8]}'} </functioncall>"}
|
14 |
+
{"label": "The median of the provided list of numbers is 5.", "predict": "The median of the numbers you provided is 5."}
|
15 |
+
{"label": "Of course, I can help you with that. Let me calculate it for you.\n<functioncall> {\"name\": \"calculate_loan_payment\", \"arguments\": '{\"principal\": 50000, \"interest_rate\": 5, \"loan_term\": 10}'} </functioncall>", "predict": "<functioncall> {\"name\": \"calculate_loan_payment\", \"arguments\": '{\"principal\": 50000, \"interest_rate\": 5, \"loan_term\": 10}'} </functioncall>"}
|
16 |
+
{"label": "Based on the information provided, your monthly loan payment would be $530.33.", "predict": "The monthly payment for your loan would be approximately $530.33."}
|
17 |
+
{"label": "I'm sorry, but as an AI, I don't have the capability to book flights or perform external tasks. My current function is to calculate loan payments. I recommend using a travel booking website or app for your flight booking needs.", "predict": "I'm sorry, but as an AI, I don't have the capability to book flights or perform external tasks. My current functions are limited to the ones provided to me. In this case, I can only calculate loan payments."}
|
18 |
+
{"label": "<functioncall> {\"name\": \"convert_temperature\", \"arguments\": '{\"temperature\": 30, \"from_unit\": \"Celsius\", \"to_unit\": \"Fahrenheit\"}'} </functioncall>", "predict": "<functioncall> {\"name\": \"convert_temperature\", \"arguments\": '{\"temperature\": 30, \"from_unit\": \"Celsius\", \"to_unit\": \"Fahrenheit\"}'} </functioncall>"}
|
19 |
+
{"label": "The converted temperature from 30 degrees Celsius to Fahrenheit is 86 degrees Fahrenheit.", "predict": "The converted temperature from 30 degrees Celsius to Fahrenheit is 86 degrees Fahrenheit."}
|
20 |
+
{"label": "I'm sorry, but I can't assist with that. My current capabilities allow me to provide information about movies based on their titles. I don't have the ability to place orders or perform external tasks.", "predict": "I'm sorry, but I don't have the capability to perform external tasks such as ordering a pizza. My current function allows me to retrieve movie details based on the title provided. If you need more information about a movie, feel free to ask!"}
|
21 |
+
{"label": "Sure, let me calculate that for you.\n<functioncall> {\"name\": \"calculate_tip\", \"arguments\": '{\"bill_amount\": 50, \"tip_percentage\": 20}'} </functioncall>", "predict": "<functioncall> {\"name\": \"calculate_tip\", \"arguments\": '{\"bill_amount\": 50, \"tip_percentage\": 20}'} </functioncall>"}
|
22 |
+
{"label": "Based on the bill amount and the tip percentage you provided, the tip you should leave is $10.", "predict": "Based on the bill amount and the tip percentage you provided, the tip you should leave is $10."}
|
23 |
+
{"label": "<functioncall> {\"name\": \"create_contact\", \"arguments\": '{\"name\": \"John Doe\", \"email\": \"johndoe@example.com\"}'} </functioncall>", "predict": "<functioncall> {\"name\": \"create_contact\", \"arguments\": '{\"name\": \"John Doe\", \"email\": \"johndoe@example.com\"}'} </functioncall>"}
|
24 |
+
{"label": "I have successfully created a new contact for your friend John Doe with the email johndoe@example.com.", "predict": "The contact for John Doe with email johndoe@example.com has been created successfully."}
|
25 |
+
{"label": "Of course, I can help with that. Could you please provide me with the title of the event, and the start and end times?", "predict": "Of course! I can help with that. Please provide me with the title of the event, the start time, and the end time in the format YYYY-MM-DD HH:MM."}
|
26 |
+
{"label": "<functioncall> {\"name\": \"create_calendar_event\", \"arguments\": '{\n \"title\": \"Team Meeting\",\n \"start_time\": \"2022-03-15 10:00\",\n \"end_time\": \"2022-03-15 11:00\"\n}'} </functioncall>", "predict": "<functioncall> {\"name\": \"create_calendar_event\", \"arguments\": '{\"title\": \"Team Meeting\", \"start_time\": \"2022-09-23 10:00\", \"end_time\": \"2022-09-23 11:00\"}'} </functioncall>"}
|
27 |
+
{"label": "The event 'Team Meeting' has been successfully created for the time slot 10:00 AM to 11:00 AM on March 15, 2022.", "predict": "The event 'Team Meeting' has been successfully created for the time slot 10:00 AM to 11:00 AM on 2022-03-15."}
|
28 |
+
{"label": "I'm sorry, but I'm unable to send emails as it's an external task and I don't have a function provided for that task. However, I can assist you in creating calendar events.", "predict": "I'm sorry, but as an AI, I don't have the capability to send emails. My current functions allow me to create calendar events, but I can't perform external tasks like sending emails. You might want to use your email client for that."}
|
29 |
+
{"label": "<functioncall> {\"name\": \"search_movie\", \"arguments\": '{\"title\": \"Inception\"}'} </functioncall>", "predict": "<functioncall> {\"name\": \"search_movie\", \"arguments\": '{\"title\": \"Inception\"}'} </functioncall>"}
|
30 |
+
{"label": "The movie \"Inception\" was directed by Christopher Nolan and was released in the year 2010. It falls under the genre of Science fiction and has a rating of 8.8/10.", "predict": "The movie \"Inception\" is a science fiction film directed by Christopher Nolan. It was released in 2010 and has a rating of 8.8 out of 10."}
|
31 |
+
{"label": "<functioncall> {\"name\": \"search_movie\", \"arguments\": '{\"title\": \"The Godfather\"}'} </functioncall>", "predict": "<functioncall> {\"name\": \"search_movie\", \"arguments\": '{\"title\": \"The Godfather\"}'} </functioncall>"}
|
32 |
+
{"label": "The movie \"The Godfather\" was directed by Francis Ford Coppola and was released in the year 1972. It falls under the genre of Crime and Drama and has a rating of 9.2/10.", "predict": "The movie \"The Godfather\" was directed by Francis Ford Coppola and was released in the year 1972. It falls under the genre of Crime, Drama and has a rating of 9.2/10."}
|
33 |
+
{"label": "<functioncall> {\"name\": \"get_stock_price\", \"arguments\": '{\"stock_symbol\": \"AAPL\"}'} </functioncall>", "predict": "<functioncall> {\"name\": \"get_stock_price\", \"arguments\": '{\"stock_symbol\": \"AAPL\"}'} </functioncall>"}
|
34 |
+
{"label": "The current price of Apple's stock (AAPL) is $150.75.", "predict": "The current price of Apple's stock (AAPL) is $150.75."}
|
35 |
+
{"label": "<functioncall> {\"name\": \"get_stock_price\", \"arguments\": '{\"stock_symbol\": \"MSFT\"}'} </functioncall>", "predict": "<functioncall> {\"name\": \"get_stock_price\", \"arguments\": '{\"stock_symbol\": \"MSFT\"}'} </functioncall>"}
|
36 |
+
{"label": "The current price of Microsoft's stock (MSFT) is $210.22.", "predict": "The current price of Microsoft's stock (MSFT) is $210.22."}
|
37 |
+
{"label": "You're welcome! If you have any other questions, feel free to ask.", "predict": "You're welcome! If you have any other questions, feel free to ask."}
|
38 |
+
{"label": "Of course, I'd be happy to help. Could you please provide me with the total loan amount, the annual interest rate, and the duration of the loan in months?", "predict": "Of course, I'd be happy to help. Could you please provide me with the total loan amount, the annual interest rate, and the duration of the loan in months?"}
|
39 |
+
{"label": "<functioncall> {\"name\": \"calculate_loan_payment\", \"arguments\": '{\"loan_amount\": 200000, \"interest_rate\": 3.5, \"loan_term\": 360}'} </functioncall>", "predict": "<functioncall> {\"name\": \"calculate_loan_payment\", \"arguments\": '{\"loan_amount\": 200000, \"interest_rate\": 3.5, \"loan_term\": 30*12}'} </functioncall>"}
|
40 |
+
{"label": "Based on the information you provided, your estimated monthly payment for this loan would be approximately $898.09.", "predict": "Based on the information you provided, your monthly payment for this loan would be approximately $898.09."}
|
41 |
+
{"label": "I'm sorry, but I'm unable to perform external tasks such as ordering a pizza. My current function allows me to fetch the latest news headlines for you. If you need any information regarding that, feel free to ask.", "predict": "I'm sorry, but I don't have the capability to order a pizza as it's an external task and I don't have a function provided for that task. I can assist you with the latest news headlines if you'd like."}
|
42 |
+
{"label": "<functioncall> {\"name\": \"calculate_tip\", \"arguments\": '{\"bill_amount\": 50, \"tip_percentage\": 15}'} </functioncall>", "predict": "<functioncall> {\"name\": \"calculate_tip\", \"arguments\": '{\"bill_amount\": 50, \"tip_percentage\": 15}'} </functioncall>"}
|
43 |
+
{"label": "Based on the bill amount and the tip percentage you provided, the tip amount you should give is $7.5.", "predict": "Based on your bill amount and the tip percentage you want to give, the tip amount you should give is $7.5."}
|
44 |
+
{"label": "You're welcome! If you have any other questions, feel free to ask.", "predict": "You're welcome! If you have any other questions, feel free to ask."}
|
45 |
+
{"label": "<functioncall> {\"name\": \"generate_random_number\", \"arguments\": '{\"min\": 1, \"max\": 100}'} </functioncall>", "predict": "<functioncall> {\"name\": \"generate_random_number\", \"arguments\": '{\"min\": 1, \"max\": 100}'} </functioncall>"}
|
46 |
+
{"label": "The random number generated between 1 and 100 is 42.", "predict": "The random number generated between 1 and 100 is 42."}
|
47 |
+
{"label": "<functioncall> {\"name\": \"generate_random_number\", \"arguments\": '{\"min\": 200, \"max\": 300}'} </functioncall>", "predict": "<functioncall> {\"name\": \"generate_random_number\", \"arguments\": '{\"min\": 200, \"max\": 300}'} </functioncall>"}
|
48 |
+
{"label": "The random number generated between 200 and 300 is 278.", "predict": "The random number generated between 200 and 300 is 278."}
|
49 |
+
{"label": "You're welcome! If you need anything else, feel free to ask.", "predict": "You're welcome! If you need anything else, feel free to ask."}
|
50 |
+
{"label": "<functioncall> {\"name\": \"search_books\", \"arguments\": '{\"author\": \"George Orwell\"}'} </functioncall>", "predict": "<functioncall> {\"name\": \"search_books\", \"arguments\": '{\"author\": \"George Orwell\"}'} </functioncall>"}
|
Predict_50/predict_log.txt
ADDED
@@ -0,0 +1,312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
01/21/2024 14:10:45 - WARNING - llmtuner.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
|
2 |
+
[INFO|training_args.py:1838] 2024-01-21 14:10:45,405 >> PyTorch: setting up devices
|
3 |
+
/home/hangyu5/anaconda3/envs/llama_factory/lib/python3.11/site-packages/transformers/training_args.py:1751: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead.
|
4 |
+
warnings.warn(
|
5 |
+
01/21/2024 14:10:45 - INFO - llmtuner.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1
|
6 |
+
distributed training: True, compute dtype: None
|
7 |
+
01/21/2024 14:10:45 - INFO - llmtuner.hparams.parser - Training/evaluation parameters Seq2SeqTrainingArguments(
|
8 |
+
_n_gpu=1,
|
9 |
+
adafactor=False,
|
10 |
+
adam_beta1=0.9,
|
11 |
+
adam_beta2=0.999,
|
12 |
+
adam_epsilon=1e-08,
|
13 |
+
auto_find_batch_size=False,
|
14 |
+
bf16=False,
|
15 |
+
bf16_full_eval=False,
|
16 |
+
data_seed=None,
|
17 |
+
dataloader_drop_last=False,
|
18 |
+
dataloader_num_workers=0,
|
19 |
+
dataloader_persistent_workers=False,
|
20 |
+
dataloader_pin_memory=True,
|
21 |
+
ddp_backend=None,
|
22 |
+
ddp_broadcast_buffers=None,
|
23 |
+
ddp_bucket_cap_mb=None,
|
24 |
+
ddp_find_unused_parameters=False,
|
25 |
+
ddp_timeout=1800,
|
26 |
+
debug=[],
|
27 |
+
deepspeed=None,
|
28 |
+
disable_tqdm=False,
|
29 |
+
dispatch_batches=None,
|
30 |
+
do_eval=False,
|
31 |
+
do_predict=True,
|
32 |
+
do_train=False,
|
33 |
+
eval_accumulation_steps=None,
|
34 |
+
eval_delay=0,
|
35 |
+
eval_steps=None,
|
36 |
+
evaluation_strategy=IntervalStrategy.NO,
|
37 |
+
fp16=False,
|
38 |
+
fp16_backend=auto,
|
39 |
+
fp16_full_eval=False,
|
40 |
+
fp16_opt_level=O1,
|
41 |
+
fsdp=[],
|
42 |
+
fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
|
43 |
+
fsdp_min_num_params=0,
|
44 |
+
fsdp_transformer_layer_cls_to_wrap=None,
|
45 |
+
full_determinism=False,
|
46 |
+
generation_config=None,
|
47 |
+
generation_max_length=None,
|
48 |
+
generation_num_beams=None,
|
49 |
+
gradient_accumulation_steps=1,
|
50 |
+
gradient_checkpointing=False,
|
51 |
+
gradient_checkpointing_kwargs=None,
|
52 |
+
greater_is_better=None,
|
53 |
+
group_by_length=False,
|
54 |
+
half_precision_backend=auto,
|
55 |
+
hub_always_push=False,
|
56 |
+
hub_model_id=None,
|
57 |
+
hub_private_repo=False,
|
58 |
+
hub_strategy=HubStrategy.EVERY_SAVE,
|
59 |
+
hub_token=<HUB_TOKEN>,
|
60 |
+
ignore_data_skip=False,
|
61 |
+
include_inputs_for_metrics=False,
|
62 |
+
include_num_input_tokens_seen=False,
|
63 |
+
include_tokens_per_second=False,
|
64 |
+
jit_mode_eval=False,
|
65 |
+
label_names=None,
|
66 |
+
label_smoothing_factor=0.0,
|
67 |
+
learning_rate=5e-05,
|
68 |
+
length_column_name=length,
|
69 |
+
load_best_model_at_end=False,
|
70 |
+
local_rank=0,
|
71 |
+
log_level=passive,
|
72 |
+
log_level_replica=warning,
|
73 |
+
log_on_each_node=True,
|
74 |
+
logging_dir=./models/sft/dolphin-2.6-mistral-7b-dpo-laser-sft-glaive-function-calling-v2-ep1-lora/Predict_50/runs/Jan21_14-10-45_yhyu13fuwuqi,
|
75 |
+
logging_first_step=False,
|
76 |
+
logging_nan_inf_filter=True,
|
77 |
+
logging_steps=500,
|
78 |
+
logging_strategy=IntervalStrategy.STEPS,
|
79 |
+
lr_scheduler_kwargs={},
|
80 |
+
lr_scheduler_type=SchedulerType.LINEAR,
|
81 |
+
max_grad_norm=1.0,
|
82 |
+
max_steps=-1,
|
83 |
+
metric_for_best_model=None,
|
84 |
+
mp_parameters=,
|
85 |
+
neftune_noise_alpha=None,
|
86 |
+
no_cuda=False,
|
87 |
+
num_train_epochs=3.0,
|
88 |
+
optim=OptimizerNames.ADAMW_TORCH,
|
89 |
+
optim_args=None,
|
90 |
+
output_dir=./models/sft/dolphin-2.6-mistral-7b-dpo-laser-sft-glaive-function-calling-v2-ep1-lora/Predict_50,
|
91 |
+
overwrite_output_dir=False,
|
92 |
+
past_index=-1,
|
93 |
+
per_device_eval_batch_size=1,
|
94 |
+
per_device_train_batch_size=8,
|
95 |
+
predict_with_generate=True,
|
96 |
+
prediction_loss_only=False,
|
97 |
+
push_to_hub=False,
|
98 |
+
push_to_hub_model_id=None,
|
99 |
+
push_to_hub_organization=None,
|
100 |
+
push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
|
101 |
+
ray_scope=last,
|
102 |
+
remove_unused_columns=True,
|
103 |
+
report_to=['tensorboard'],
|
104 |
+
resume_from_checkpoint=None,
|
105 |
+
run_name=./models/sft/dolphin-2.6-mistral-7b-dpo-laser-sft-glaive-function-calling-v2-ep1-lora/Predict_50,
|
106 |
+
save_on_each_node=False,
|
107 |
+
save_only_model=False,
|
108 |
+
save_safetensors=True,
|
109 |
+
save_steps=500,
|
110 |
+
save_strategy=IntervalStrategy.STEPS,
|
111 |
+
save_total_limit=None,
|
112 |
+
seed=42,
|
113 |
+
skip_memory_metrics=True,
|
114 |
+
sortish_sampler=False,
|
115 |
+
split_batches=False,
|
116 |
+
tf32=None,
|
117 |
+
torch_compile=False,
|
118 |
+
torch_compile_backend=None,
|
119 |
+
torch_compile_mode=None,
|
120 |
+
torchdynamo=None,
|
121 |
+
tpu_metrics_debug=False,
|
122 |
+
tpu_num_cores=None,
|
123 |
+
use_cpu=False,
|
124 |
+
use_ipex=False,
|
125 |
+
use_legacy_prediction_loop=False,
|
126 |
+
use_mps_device=False,
|
127 |
+
warmup_ratio=0.0,
|
128 |
+
warmup_steps=0,
|
129 |
+
weight_decay=0.0,
|
130 |
+
)
|
131 |
+
[INFO|tokenization_utils_base.py:2024] 2024-01-21 14:10:45,433 >> loading file tokenizer.model
|
132 |
+
[INFO|tokenization_utils_base.py:2024] 2024-01-21 14:10:45,433 >> loading file added_tokens.json
|
133 |
+
[INFO|tokenization_utils_base.py:2024] 2024-01-21 14:10:45,433 >> loading file special_tokens_map.json
|
134 |
+
[INFO|tokenization_utils_base.py:2024] 2024-01-21 14:10:45,433 >> loading file tokenizer_config.json
|
135 |
+
[INFO|tokenization_utils_base.py:2024] 2024-01-21 14:10:45,433 >> loading file tokenizer.json
|
136 |
+
[INFO|configuration_utils.py:737] 2024-01-21 14:10:45,521 >> loading configuration file cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser/config.json
|
137 |
+
[INFO|configuration_utils.py:802] 2024-01-21 14:10:45,522 >> Model config MistralConfig {
|
138 |
+
"_name_or_path": "cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser",
|
139 |
+
"architectures": [
|
140 |
+
"MistralForCausalLM"
|
141 |
+
],
|
142 |
+
"attention_dropout": 0.0,
|
143 |
+
"bos_token_id": 1,
|
144 |
+
"eos_token_id": 2,
|
145 |
+
"hidden_act": "silu",
|
146 |
+
"hidden_size": 4096,
|
147 |
+
"initializer_range": 0.02,
|
148 |
+
"intermediate_size": 14336,
|
149 |
+
"max_position_embeddings": 32768,
|
150 |
+
"model_type": "mistral",
|
151 |
+
"num_attention_heads": 32,
|
152 |
+
"num_hidden_layers": 32,
|
153 |
+
"num_key_value_heads": 8,
|
154 |
+
"rms_norm_eps": 1e-05,
|
155 |
+
"rope_theta": 10000.0,
|
156 |
+
"sliding_window": null,
|
157 |
+
"tie_word_embeddings": false,
|
158 |
+
"torch_dtype": "bfloat16",
|
159 |
+
"transformers_version": "4.36.2",
|
160 |
+
"use_cache": false,
|
161 |
+
"vocab_size": 32001
|
162 |
+
}
|
163 |
+
|
164 |
+
[INFO|modeling_utils.py:3341] 2024-01-21 14:10:45,538 >> loading weights file cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser/model.safetensors.index.json
|
165 |
+
[INFO|modeling_utils.py:1341] 2024-01-21 14:10:45,538 >> Instantiating MistralForCausalLM model under default dtype torch.bfloat16.
|
166 |
+
[INFO|configuration_utils.py:826] 2024-01-21 14:10:45,539 >> Generate config GenerationConfig {
|
167 |
+
"bos_token_id": 1,
|
168 |
+
"eos_token_id": 2,
|
169 |
+
"use_cache": false
|
170 |
+
}
|
171 |
+
|
172 |
+
|
173 |
+
Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]
|
174 |
+
Loading checkpoint shards: 33%|███▎ | 1/3 [00:00<00:00, 5.99it/s]
|
175 |
+
Loading checkpoint shards: 67%|██████▋ | 2/3 [00:00<00:00, 5.87it/s]cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser
|
176 |
+
Loading checkpoint shards: 100%|██████████| 3/3 [00:00<00:00, 5.91it/s]
|
177 |
+
Loading checkpoint shards: 100%|██████████| 3/3 [00:00<00:00, 5.91it/s]cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser
|
178 |
+
[INFO|modeling_utils.py:4185] 2024-01-21 14:10:46,324 >> All model checkpoint weights were used when initializing MistralForCausalLM.
|
179 |
+
|
180 |
+
[INFO|modeling_utils.py:4193] 2024-01-21 14:10:46,324 >> All the weights of MistralForCausalLM were initialized from the model checkpoint at ./models/dolphin-2.6-mistral-7b-dpo-laser.
|
181 |
+
If your task is similar to the task the model of the checkpoint was trained on, you can already use MistralForCausalLM for predictions without further training.
|
182 |
+
[INFO|configuration_utils.py:779] 2024-01-21 14:10:46,327 >> loading configuration file ./models/dolphin-2.6-mistral-7b-dpo-laser/generation_config.json
|
183 |
+
[INFO|configuration_utils.py:826] 2024-01-21 14:10:46,327 >> Generate config GenerationConfig {
|
184 |
+
"bos_token_id": 1,
|
185 |
+
"eos_token_id": 2
|
186 |
+
}
|
187 |
+
|
188 |
+
01/21/2024 14:10:46 - INFO - llmtuner.model.adapter - Fine-tuning method: LoRA
|
189 |
+
01/21/2024 14:10:54 - INFO - llmtuner.model.adapter - Merged 1 adapter(s).
|
190 |
+
01/21/2024 14:10:54 - INFO - llmtuner.model.adapter - Loaded adapter(s): ./models/sft/dolphin-2.6-mistral-7b-dpo-laser-sft-glaive-function-calling-v2-ep1-lora
|
191 |
+
01/21/2024 14:10:54 - INFO - llmtuner.model.loader - trainable params: 0 || all params: 7241740288 || trainable%: 0.0000
|
192 |
+
01/21/2024 14:10:54 - INFO - llmtuner.model.loader - This IS expected that the trainable params is 0 if you are using model for inference only.
|
193 |
+
01/21/2024 14:10:54 - WARNING - llmtuner.data.utils - Checksum failed: missing SHA-1 hash value in dataset_info.json.
|
194 |
+
Using custom data configuration default-f901fb7e685ba757
|
195 |
+
Loading Dataset Infos from /home/hangyu5/anaconda3/envs/llama_factory/lib/python3.11/site-packages/datasets/packaged_modules/json
|
196 |
+
Overwrite dataset info from restored data version if exists.
|
197 |
+
Loading Dataset info from /home/hangyu5/.cache/huggingface/datasets/json/default-f901fb7e685ba757/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96
|
198 |
+
Found cached dataset json (/home/hangyu5/.cache/huggingface/datasets/json/default-f901fb7e685ba757/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)
|
199 |
+
Loading Dataset info from /home/hangyu5/.cache/huggingface/datasets/json/default-f901fb7e685ba757/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96
|
200 |
+
|
201 |
+
Converting format of dataset: 0%| | 0/50 [00:00<?, ? examples/s]Caching processed dataset at /home/hangyu5/.cache/huggingface/datasets/json/default-f901fb7e685ba757/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-27f6909b28d1bff6.arrow
|
202 |
+
|
203 |
+
Converting format of dataset: 100%|██████████| 50/50 [00:00<00:00, 7038.13 examples/s]
|
204 |
+
|
205 |
+
Running tokenizer on dataset: 0%| | 0/50 [00:00<?, ? examples/s]Caching processed dataset at /home/hangyu5/.cache/huggingface/datasets/json/default-f901fb7e685ba757/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-33f31fb80585e7d9.arrow
|
206 |
+
|
207 |
+
Running tokenizer on dataset: 100%|██████████| 50/50 [00:00<00:00, 556.47 examples/s]
|
208 |
+
[INFO|training_args.py:1838] 2024-01-21 14:10:55,484 >> PyTorch: setting up devices
|
209 |
+
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
210 |
+
[INFO|trainer.py:3166] 2024-01-21 14:10:57,294 >> ***** Running Prediction *****
|
211 |
+
[INFO|trainer.py:3168] 2024-01-21 14:10:57,294 >> Num examples = 50
|
212 |
+
[INFO|trainer.py:3171] 2024-01-21 14:10:57,294 >> Batch size = 1
|
213 |
+
[INFO|configuration_utils.py:826] 2024-01-21 14:10:57,306 >> Generate config GenerationConfig {
|
214 |
+
"bos_token_id": 1,
|
215 |
+
"eos_token_id": 2,
|
216 |
+
"use_cache": false
|
217 |
+
}
|
218 |
+
|
219 |
+
/home/hangyu5/anaconda3/envs/llama_factory/lib/python3.11/site-packages/transformers/generation/utils.py:1518: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use and modify the model generation configuration (see https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration )
|
220 |
+
warnings.warn(
|
221 |
+
input_ids:
|
222 |
+
[1, 733, 16289, 28793, 17121, 22526, 28747, 995, 460, 264, 10865, 13892, 395, 2735, 298, 272, 2296, 5572, 28723, 5938, 706, 513, 3030, 387, 13, 28751, 13, 2287, 345, 861, 1264, 345, 527, 28730, 720, 4078, 28730, 6036, 548, 13, 2287, 345, 6518, 1264, 345, 1458, 272, 8877, 4338, 1444, 989, 1191, 951, 20023, 548, 13, 2287, 345, 11438, 1264, 371, 13, 5390, 345, 1123, 1264, 345, 2814, 548, 13, 5390, 345, 10723, 1264, 371, 13, 17422, 345, 2893, 28730, 16714, 1264, 371, 13, 1417, 28705, 345, 1123, 1264, 345, 1427, 548, 13, 1417, 28705, 345, 6518, 1264, 345, 1014, 15547, 298, 6603, 477, 28739, 13, 17422, 1630, 13, 17422, 345, 3731, 28730, 16714, 1264, 371, 13, 1417, 28705, 345, 1123, 1264, 345, 1427, 548, 13, 1417, 28705, 345, 6518, 1264, 345, 1014, 15547, 298, 6603, 298, 28739, 13, 17422, 443, 13, 5390, 1630, 13, 5390, 345, 10893, 1264, 733, 13, 17422, 345, 2893, 28730, 16714, 548, 13, 17422, 345, 3731, 28730, 16714, 28739, 13, 5390, 4709, 13, 2287, 443, 13, 28752, 13, 13, 6325, 368, 1820, 264, 9314, 354, 528, 477, 1450, 2726, 298, 4222, 28804, 733, 28748, 16289, 28793]
|
223 |
+
inputs:
|
224 |
+
<s>[INST] SYSTEM: You are a helpful assistant with access to the following functions. Use them if required -
|
225 |
+
{
|
226 |
+
"name": "get_exchange_rate",
|
227 |
+
"description": "Get the exchange rate between two currencies",
|
228 |
+
"parameters": {
|
229 |
+
"type": "object",
|
230 |
+
"properties": {
|
231 |
+
"base_currency": {
|
232 |
+
"type": "string",
|
233 |
+
"description": "The currency to convert from"
|
234 |
+
},
|
235 |
+
"target_currency": {
|
236 |
+
"type": "string",
|
237 |
+
"description": "The currency to convert to"
|
238 |
+
}
|
239 |
+
},
|
240 |
+
"required": [
|
241 |
+
"base_currency",
|
242 |
+
"target_currency"
|
243 |
+
]
|
244 |
+
}
|
245 |
+
}
|
246 |
+
|
247 |
+
Can you book a flight for me from New York to London? [/INST]
|
248 |
+
|
249 |
+
0%| | 0/50 [00:00<?, ?it/s]
|
250 |
+
4%|▍ | 2/50 [00:02<00:48, 1.00s/it]
|
251 |
+
6%|▌ | 3/50 [00:08<02:40, 3.42s/it]
|
252 |
+
8%|▊ | 4/50 [00:12<02:44, 3.57s/it]
|
253 |
+
10%|█ | 5/50 [00:22<04:14, 5.65s/it]
|
254 |
+
12%|█▏ | 6/50 [00:26<03:42, 5.07s/it]
|
255 |
+
14%|█▍ | 7/50 [00:31<03:37, 5.07s/it]
|
256 |
+
16%|█▌ | 8/50 [00:36<03:30, 5.00s/it]
|
257 |
+
18%|█▊ | 9/50 [00:37<02:46, 4.07s/it]
|
258 |
+
20%|██ | 10/50 [00:43<03:04, 4.61s/it]
|
259 |
+
22%|██▏ | 11/50 [00:47<02:50, 4.36s/it]
|
260 |
+
24%|██▍ | 12/50 [00:48<02:10, 3.42s/it]
|
261 |
+
26%|██▌ | 13/50 [00:54<02:29, 4.05s/it]
|
262 |
+
28%|██▊ | 14/50 [00:55<01:57, 3.28s/it]
|
263 |
+
30%|███ | 15/50 [01:02<02:30, 4.31s/it]
|
264 |
+
32%|███▏ | 16/50 [01:04<02:05, 3.70s/it]
|
265 |
+
34%|███▍ | 17/50 [01:11<02:31, 4.59s/it]
|
266 |
+
36%|███▌ | 18/50 [01:17<02:43, 5.12s/it]
|
267 |
+
38%|███▊ | 19/50 [01:20<02:19, 4.49s/it]
|
268 |
+
40%|████ | 20/50 [01:24<02:03, 4.10s/it]
|
269 |
+
42%|████▏ | 21/50 [01:29<02:14, 4.62s/it]
|
270 |
+
44%|████▍ | 22/50 [01:33<01:58, 4.23s/it]
|
271 |
+
46%|████▌ | 23/50 [01:36<01:44, 3.87s/it]
|
272 |
+
48%|████▊ | 24/50 [01:39<01:32, 3.55s/it]
|
273 |
+
50%|█████ | 25/50 [01:44<01:39, 3.98s/it]
|
274 |
+
52%|█████▏ | 26/50 [01:53<02:13, 5.55s/it]
|
275 |
+
54%|█████▍ | 27/50 [01:59<02:11, 5.71s/it]
|
276 |
+
56%|█████▌ | 28/50 [02:08<02:28, 6.77s/it]
|
277 |
+
58%|█████▊ | 29/50 [02:10<01:51, 5.31s/it]
|
278 |
+
60%|██████ | 30/50 [02:14<01:38, 4.92s/it]
|
279 |
+
62%|██████▏ | 31/50 [02:18<01:27, 4.60s/it]
|
280 |
+
64%|██████▍ | 32/50 [02:25<01:35, 5.31s/it]
|
281 |
+
66%|██████▌ | 33/50 [02:29<01:25, 5.04s/it]
|
282 |
+
68%|██████▊ | 34/50 [02:32<01:09, 4.35s/it]
|
283 |
+
70%|███████ | 35/50 [02:36<01:05, 4.40s/it]
|
284 |
+
72%|███████▏ | 36/50 [02:40<00:56, 4.01s/it]
|
285 |
+
74%|███████▍ | 37/50 [02:42<00:46, 3.60s/it]
|
286 |
+
76%|███████▌ | 38/50 [02:47<00:48, 4.04s/it]
|
287 |
+
78%|███████▊ | 39/50 [02:57<01:02, 5.72s/it]
|
288 |
+
80%|████████ | 40/50 [03:01<00:53, 5.31s/it]
|
289 |
+
82%|████████▏ | 41/50 [03:04<00:40, 4.51s/it]
|
290 |
+
84%|████████▍ | 42/50 [03:07<00:32, 4.08s/it]
|
291 |
+
86%|████████▌ | 43/50 [03:10<00:27, 3.89s/it]
|
292 |
+
88%|████████▊ | 44/50 [03:13<00:20, 3.37s/it]
|
293 |
+
90%|█████████ | 45/50 [03:17<00:18, 3.71s/it]
|
294 |
+
92%|█████████▏| 46/50 [03:20<00:13, 3.35s/it]
|
295 |
+
94%|█████████▍| 47/50 [03:25<00:12, 4.01s/it]
|
296 |
+
96%|█████████▌| 48/50 [03:28<00:07, 3.76s/it]
|
297 |
+
98%|█████████▊| 49/50 [03:31<00:03, 3.52s/it]
|
298 |
+
100%|██████████| 50/50 [03:34<00:00, 3.15s/it]Building prefix dict from the default dictionary ...
|
299 |
+
Dumping model to file cache /tmp/jieba.cache
|
300 |
+
Loading model cost 0.537 seconds.
|
301 |
+
Prefix dict has been built successfully.
|
302 |
+
|
303 |
+
100%|██████████| 50/50 [03:35<00:00, 4.30s/it]
|
304 |
+
***** predict metrics *****
|
305 |
+
predict_bleu-4 = 90.1293
|
306 |
+
predict_rouge-1 = 92.0621
|
307 |
+
predict_rouge-2 = 86.3829
|
308 |
+
predict_rouge-l = 89.9619
|
309 |
+
predict_runtime = 0:03:38.79
|
310 |
+
predict_samples_per_second = 0.229
|
311 |
+
predict_steps_per_second = 0.229
|
312 |
+
01/21/2024 14:14:36 - INFO - llmtuner.train.sft.trainer - Saving prediction results to ./models/sft/dolphin-2.6-mistral-7b-dpo-laser-sft-glaive-function-calling-v2-ep1-lora/Predict_50/generated_predictions.jsonl
|
Predict_50/predict_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"predict_bleu-4": 90.129256,
|
3 |
+
"predict_rouge-1": 92.062058,
|
4 |
+
"predict_rouge-2": 86.382876,
|
5 |
+
"predict_rouge-l": 89.961892,
|
6 |
+
"predict_runtime": 218.7949,
|
7 |
+
"predict_samples_per_second": 0.229,
|
8 |
+
"predict_steps_per_second": 0.229
|
9 |
+
}
|
README.md
CHANGED
@@ -1,3 +1,76 @@
|
|
1 |
---
|
2 |
license: apache-2.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
license: apache-2.0
|
3 |
+
library_name: peft
|
4 |
+
tags:
|
5 |
+
- llama-factory
|
6 |
+
- lora
|
7 |
+
- generated_from_trainer
|
8 |
+
base_model: cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser
|
9 |
+
model-index:
|
10 |
+
- name: dolphin-2.6-mistral-7b-dpo-laser-sft-glaive-function-calling-v2-ep1-lora
|
11 |
+
results: []
|
12 |
---
|
13 |
+
|
14 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
15 |
+
should probably proofread and complete it, then remove this comment. -->
|
16 |
+
|
17 |
+
# dolphin-2.6-mistral-7b-dpo-laser-sft-glaive-function-calling-v2-ep1-lora
|
18 |
+
|
19 |
+
This model is a fine-tuned version of [cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser](https://huggingface.co/cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser) on the https://huggingface.co/datasets/Yhyu13/glaive-function-calling-v2-llama-factory-convert/blob/main/simple-function-calling-v2_converted_5000_with_function_call_only.json dataset.
|
20 |
+
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.0605
|
22 |
+
|
23 |
+
## Model description
|
24 |
+
|
25 |
+
More information needed
|
26 |
+
|
27 |
+
## Intended uses & limitations
|
28 |
+
|
29 |
+
More information needed
|
30 |
+
|
31 |
+
## Training and evaluation data
|
32 |
+
|
33 |
+
More information needed
|
34 |
+
|
35 |
+
## Training procedure
|
36 |
+
|
37 |
+
### Training hyperparameters
|
38 |
+
|
39 |
+
The following hyperparameters were used during training:
|
40 |
+
- learning_rate: 5e-05
|
41 |
+
- train_batch_size: 1
|
42 |
+
- eval_batch_size: 1
|
43 |
+
- seed: 42
|
44 |
+
- distributed_type: multi-GPU
|
45 |
+
- num_devices: 2
|
46 |
+
- gradient_accumulation_steps: 4
|
47 |
+
- total_train_batch_size: 8
|
48 |
+
- total_eval_batch_size: 2
|
49 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
50 |
+
- lr_scheduler_type: cosine
|
51 |
+
- num_epochs: 1.0
|
52 |
+
|
53 |
+
### Training results
|
54 |
+
|
55 |
+
| Training Loss | Epoch | Step | Validation Loss |
|
56 |
+
|:-------------:|:-----:|:----:|:---------------:|
|
57 |
+
| 0.2548 | 0.09 | 100 | 0.1148 |
|
58 |
+
| 0.1149 | 0.18 | 200 | 0.0914 |
|
59 |
+
| 0.0871 | 0.27 | 300 | 0.0831 |
|
60 |
+
| 0.0865 | 0.35 | 400 | 0.0760 |
|
61 |
+
| 0.0802 | 0.44 | 500 | 0.0718 |
|
62 |
+
| 0.0689 | 0.53 | 600 | 0.0702 |
|
63 |
+
| 0.0649 | 0.62 | 700 | 0.0649 |
|
64 |
+
| 0.0637 | 0.71 | 800 | 0.0632 |
|
65 |
+
| 0.0698 | 0.8 | 900 | 0.0619 |
|
66 |
+
| 0.0648 | 0.88 | 1000 | 0.0608 |
|
67 |
+
| 0.0654 | 0.97 | 1100 | 0.0605 |
|
68 |
+
|
69 |
+
|
70 |
+
### Framework versions
|
71 |
+
|
72 |
+
- PEFT 0.7.1
|
73 |
+
- Transformers 4.36.2
|
74 |
+
- Pytorch 2.1.2+cu121
|
75 |
+
- Datasets 2.16.1
|
76 |
+
- Tokenizers 0.15.0
|
adapter_config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layers_pattern": null,
|
10 |
+
"layers_to_transform": null,
|
11 |
+
"loftq_config": {},
|
12 |
+
"lora_alpha": 16,
|
13 |
+
"lora_dropout": 0.0,
|
14 |
+
"megatron_config": null,
|
15 |
+
"megatron_core": "megatron.core",
|
16 |
+
"modules_to_save": null,
|
17 |
+
"peft_type": "LORA",
|
18 |
+
"r": 8,
|
19 |
+
"rank_pattern": {},
|
20 |
+
"revision": null,
|
21 |
+
"target_modules": [
|
22 |
+
"q_proj",
|
23 |
+
"v_proj"
|
24 |
+
],
|
25 |
+
"task_type": "CAUSAL_LM"
|
26 |
+
}
|
adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26a7e657ce6e3f08056d34d3e0ac164fbd7ff505cfb751800cb9fdb081e54491
|
3 |
+
size 13648432
|
added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<|im_start|>": 32000
|
3 |
+
}
|
all_results.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"eval_loss": 0.06051653251051903,
|
4 |
+
"eval_runtime": 109.2969,
|
5 |
+
"eval_samples_per_second": 9.204,
|
6 |
+
"eval_steps_per_second": 4.602,
|
7 |
+
"train_loss": 0.09174010732532603,
|
8 |
+
"train_runtime": 4073.2029,
|
9 |
+
"train_samples_per_second": 2.221,
|
10 |
+
"train_steps_per_second": 0.277
|
11 |
+
}
|
eval_results.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"eval_loss": 0.06051653251051903,
|
4 |
+
"eval_runtime": 109.2969,
|
5 |
+
"eval_samples_per_second": 9.204,
|
6 |
+
"eval_steps_per_second": 4.602
|
7 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|im_end|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "<|im_end|>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"unk_token": {
|
24 |
+
"content": "<unk>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
}
|
30 |
+
}
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
|
3 |
+
size 493443
|
tokenizer_config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
train_eval_log.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
train_results.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"train_loss": 0.09174010732532603,
|
4 |
+
"train_runtime": 4073.2029,
|
5 |
+
"train_samples_per_second": 2.221,
|
6 |
+
"train_steps_per_second": 0.277
|
7 |
+
}
|
trainer_log.jsonl
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"current_steps": 100, "total_steps": 1130, "loss": 0.2548, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.904003870460323e-05, "epoch": 0.09, "percentage": 8.85, "elapsed_time": "0:04:11", "remaining_time": "0:43:07"}
|
2 |
+
{"current_steps": 100, "total_steps": 1130, "loss": null, "eval_loss": 0.11478812247514725, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.09, "percentage": 8.85, "elapsed_time": "0:04:11", "remaining_time": "0:43:07"}
|
3 |
+
{"current_steps": 200, "total_steps": 1130, "loss": 0.1149, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6233876873505694e-05, "epoch": 0.18, "percentage": 17.7, "elapsed_time": "0:10:15", "remaining_time": "0:47:44"}
|
4 |
+
{"current_steps": 200, "total_steps": 1130, "loss": null, "eval_loss": 0.0914301872253418, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.18, "percentage": 17.7, "elapsed_time": "0:10:15", "remaining_time": "0:47:44"}
|
5 |
+
{"current_steps": 300, "total_steps": 1130, "loss": 0.0871, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1797019046425264e-05, "epoch": 0.27, "percentage": 26.55, "elapsed_time": "0:16:20", "remaining_time": "0:45:13"}
|
6 |
+
{"current_steps": 300, "total_steps": 1130, "loss": null, "eval_loss": 0.08308113366365433, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.27, "percentage": 26.55, "elapsed_time": "0:16:20", "remaining_time": "0:45:13"}
|
7 |
+
{"current_steps": 400, "total_steps": 1130, "loss": 0.0865, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.607020216633599e-05, "epoch": 0.35, "percentage": 35.4, "elapsed_time": "0:22:25", "remaining_time": "0:40:55"}
|
8 |
+
{"current_steps": 400, "total_steps": 1130, "loss": null, "eval_loss": 0.07597807794809341, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.35, "percentage": 35.4, "elapsed_time": "0:22:25", "remaining_time": "0:40:55"}
|
9 |
+
{"current_steps": 500, "total_steps": 1130, "loss": 0.0802, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9493228037294702e-05, "epoch": 0.44, "percentage": 44.25, "elapsed_time": "0:28:31", "remaining_time": "0:35:56"}
|
10 |
+
{"current_steps": 500, "total_steps": 1130, "loss": null, "eval_loss": 0.07179657369852066, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.44, "percentage": 44.25, "elapsed_time": "0:28:31", "remaining_time": "0:35:56"}
|
11 |
+
{"current_steps": 600, "total_steps": 1130, "loss": 0.0689, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2571187907677853e-05, "epoch": 0.53, "percentage": 53.1, "elapsed_time": "0:34:34", "remaining_time": "0:30:32"}
|
12 |
+
{"current_steps": 600, "total_steps": 1130, "loss": null, "eval_loss": 0.0701972097158432, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.53, "percentage": 53.1, "elapsed_time": "0:34:34", "remaining_time": "0:30:32"}
|
13 |
+
{"current_steps": 700, "total_steps": 1130, "loss": 0.0649, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.583567302625469e-05, "epoch": 0.62, "percentage": 61.95, "elapsed_time": "0:40:34", "remaining_time": "0:24:55"}
|
14 |
+
{"current_steps": 700, "total_steps": 1130, "loss": null, "eval_loss": 0.0648578405380249, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.62, "percentage": 61.95, "elapsed_time": "0:40:34", "remaining_time": "0:24:55"}
|
15 |
+
{"current_steps": 800, "total_steps": 1130, "loss": 0.0637, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.803950080284005e-06, "epoch": 0.71, "percentage": 70.8, "elapsed_time": "0:46:39", "remaining_time": "0:19:14"}
|
16 |
+
{"current_steps": 800, "total_steps": 1130, "loss": null, "eval_loss": 0.06319624185562134, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.71, "percentage": 70.8, "elapsed_time": "0:46:39", "remaining_time": "0:19:14"}
|
17 |
+
{"current_steps": 900, "total_steps": 1130, "loss": 0.0698, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.939236715580884e-06, "epoch": 0.8, "percentage": 79.65, "elapsed_time": "0:52:42", "remaining_time": "0:13:28"}
|
18 |
+
{"current_steps": 900, "total_steps": 1130, "loss": null, "eval_loss": 0.061882246285676956, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.8, "percentage": 79.65, "elapsed_time": "0:52:42", "remaining_time": "0:13:28"}
|
19 |
+
{"current_steps": 1000, "total_steps": 1130, "loss": 0.0648, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.615127855610496e-06, "epoch": 0.88, "percentage": 88.5, "elapsed_time": "0:58:44", "remaining_time": "0:07:38"}
|
20 |
+
{"current_steps": 1000, "total_steps": 1130, "loss": null, "eval_loss": 0.060829270631074905, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.88, "percentage": 88.5, "elapsed_time": "0:58:44", "remaining_time": "0:07:38"}
|
21 |
+
{"current_steps": 1100, "total_steps": 1130, "loss": 0.0654, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.690476815339244e-08, "epoch": 0.97, "percentage": 97.35, "elapsed_time": "1:04:46", "remaining_time": "0:01:45"}
|
22 |
+
{"current_steps": 1100, "total_steps": 1130, "loss": null, "eval_loss": 0.06053200736641884, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.97, "percentage": 97.35, "elapsed_time": "1:04:46", "remaining_time": "0:01:45"}
|
23 |
+
{"current_steps": 1130, "total_steps": 1130, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "1:07:51", "remaining_time": "0:00:00"}
|
24 |
+
{"current_steps": 503, "total_steps": 503, "loss": null, "eval_loss": 0.06051653251051903, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "1:09:44", "remaining_time": "0:00:00"}
|
trainer_state.json
ADDED
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9993367234136635,
|
5 |
+
"eval_steps": 100,
|
6 |
+
"global_step": 1130,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.09,
|
13 |
+
"learning_rate": 4.904003870460323e-05,
|
14 |
+
"loss": 0.2548,
|
15 |
+
"step": 100
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"epoch": 0.09,
|
19 |
+
"eval_loss": 0.11478812247514725,
|
20 |
+
"eval_runtime": 109.7793,
|
21 |
+
"eval_samples_per_second": 9.164,
|
22 |
+
"eval_steps_per_second": 4.582,
|
23 |
+
"step": 100
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.18,
|
27 |
+
"learning_rate": 4.6233876873505694e-05,
|
28 |
+
"loss": 0.1149,
|
29 |
+
"step": 200
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 0.18,
|
33 |
+
"eval_loss": 0.0914301872253418,
|
34 |
+
"eval_runtime": 109.8254,
|
35 |
+
"eval_samples_per_second": 9.16,
|
36 |
+
"eval_steps_per_second": 4.58,
|
37 |
+
"step": 200
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.27,
|
41 |
+
"learning_rate": 4.1797019046425264e-05,
|
42 |
+
"loss": 0.0871,
|
43 |
+
"step": 300
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"epoch": 0.27,
|
47 |
+
"eval_loss": 0.08308113366365433,
|
48 |
+
"eval_runtime": 110.073,
|
49 |
+
"eval_samples_per_second": 9.139,
|
50 |
+
"eval_steps_per_second": 4.57,
|
51 |
+
"step": 300
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.35,
|
55 |
+
"learning_rate": 3.607020216633599e-05,
|
56 |
+
"loss": 0.0865,
|
57 |
+
"step": 400
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"epoch": 0.35,
|
61 |
+
"eval_loss": 0.07597807794809341,
|
62 |
+
"eval_runtime": 109.949,
|
63 |
+
"eval_samples_per_second": 9.15,
|
64 |
+
"eval_steps_per_second": 4.575,
|
65 |
+
"step": 400
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.44,
|
69 |
+
"learning_rate": 2.9493228037294702e-05,
|
70 |
+
"loss": 0.0802,
|
71 |
+
"step": 500
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 0.44,
|
75 |
+
"eval_loss": 0.07179657369852066,
|
76 |
+
"eval_runtime": 110.0004,
|
77 |
+
"eval_samples_per_second": 9.145,
|
78 |
+
"eval_steps_per_second": 4.573,
|
79 |
+
"step": 500
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.53,
|
83 |
+
"learning_rate": 2.2571187907677853e-05,
|
84 |
+
"loss": 0.0689,
|
85 |
+
"step": 600
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 0.53,
|
89 |
+
"eval_loss": 0.0701972097158432,
|
90 |
+
"eval_runtime": 109.5978,
|
91 |
+
"eval_samples_per_second": 9.179,
|
92 |
+
"eval_steps_per_second": 4.59,
|
93 |
+
"step": 600
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.62,
|
97 |
+
"learning_rate": 1.583567302625469e-05,
|
98 |
+
"loss": 0.0649,
|
99 |
+
"step": 700
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"epoch": 0.62,
|
103 |
+
"eval_loss": 0.0648578405380249,
|
104 |
+
"eval_runtime": 109.8826,
|
105 |
+
"eval_samples_per_second": 9.155,
|
106 |
+
"eval_steps_per_second": 4.578,
|
107 |
+
"step": 700
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.71,
|
111 |
+
"learning_rate": 9.803950080284005e-06,
|
112 |
+
"loss": 0.0637,
|
113 |
+
"step": 800
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"epoch": 0.71,
|
117 |
+
"eval_loss": 0.06319624185562134,
|
118 |
+
"eval_runtime": 109.8291,
|
119 |
+
"eval_samples_per_second": 9.16,
|
120 |
+
"eval_steps_per_second": 4.58,
|
121 |
+
"step": 800
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.8,
|
125 |
+
"learning_rate": 4.939236715580884e-06,
|
126 |
+
"loss": 0.0698,
|
127 |
+
"step": 900
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"epoch": 0.8,
|
131 |
+
"eval_loss": 0.061882246285676956,
|
132 |
+
"eval_runtime": 109.8313,
|
133 |
+
"eval_samples_per_second": 9.16,
|
134 |
+
"eval_steps_per_second": 4.58,
|
135 |
+
"step": 900
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.88,
|
139 |
+
"learning_rate": 1.615127855610496e-06,
|
140 |
+
"loss": 0.0648,
|
141 |
+
"step": 1000
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"epoch": 0.88,
|
145 |
+
"eval_loss": 0.060829270631074905,
|
146 |
+
"eval_runtime": 109.3936,
|
147 |
+
"eval_samples_per_second": 9.196,
|
148 |
+
"eval_steps_per_second": 4.598,
|
149 |
+
"step": 1000
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.97,
|
153 |
+
"learning_rate": 8.690476815339244e-08,
|
154 |
+
"loss": 0.0654,
|
155 |
+
"step": 1100
|
156 |
+
},
|
157 |
+
{
|
158 |
+
"epoch": 0.97,
|
159 |
+
"eval_loss": 0.06053200736641884,
|
160 |
+
"eval_runtime": 109.4208,
|
161 |
+
"eval_samples_per_second": 9.194,
|
162 |
+
"eval_steps_per_second": 4.597,
|
163 |
+
"step": 1100
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 1.0,
|
167 |
+
"step": 1130,
|
168 |
+
"total_flos": 1.4019616490441933e+17,
|
169 |
+
"train_loss": 0.09174010732532603,
|
170 |
+
"train_runtime": 4073.2029,
|
171 |
+
"train_samples_per_second": 2.221,
|
172 |
+
"train_steps_per_second": 0.277
|
173 |
+
}
|
174 |
+
],
|
175 |
+
"logging_steps": 100,
|
176 |
+
"max_steps": 1130,
|
177 |
+
"num_input_tokens_seen": 0,
|
178 |
+
"num_train_epochs": 1,
|
179 |
+
"save_steps": 100,
|
180 |
+
"total_flos": 1.4019616490441933e+17,
|
181 |
+
"train_batch_size": 1,
|
182 |
+
"trial_name": null,
|
183 |
+
"trial_params": null
|
184 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64729ffa529a85cd7112b11d8207aa70ac9cdece464413d61f307c245695dbb0
|
3 |
+
size 4984
|
training_eval_loss.png
ADDED
training_loss.png
ADDED