diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -0,0 +1,15237 @@ +07/25/2024 11:24:47 - INFO - __main__ - Distributed environment: MULTI_GPU Backend: nccl +Num processes: 4 +Process index: 0 +Local process index: 0 +Device: cuda:0 + +Mixed precision type: fp16 + +07/25/2024 11:24:47 - WARNING - huggingface_hub.repository - /dli/gptesla-small/./ is already a clone of https://huggingface.co/shng2025/gptesla-small. Make sure you pull the latest changes with `repo.git_pull()`. +07/25/2024 11:24:47 - WARNING - huggingface_hub.repository - Revision `jumping-cherry-144` does not exist. Created and checked out branch `jumping-cherry-144`. +07/25/2024 11:24:47 - WARNING - huggingface_hub.repository - +07/25/2024 11:24:48 - DEBUG - datasets.utils._dataset_viewer - Dataset info for shng2025/gptesla-train is not completely ready yet. +07/25/2024 11:24:49 - INFO - datasets.builder - No config specified, defaulting to the single config: gptesla-train/default +07/25/2024 11:24:49 - INFO - datasets.info - Loading Dataset Infos from /usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#0, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#1, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#2, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#3, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#4, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#5, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#6, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#7, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#8, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#9, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#10, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#18, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#37, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#12, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#14, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#49, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#50, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#53, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#51, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#65, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#56, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#54, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#55, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#68, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#71, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#11, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#74, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#13, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#76, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#91, ': Starting to iterate over 1/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#94, ': Starting to iterate over 1/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#93, ': Starting to iterate over 1/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#95, ': Starting to iterate over 1/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#16, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#15, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#17, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#20, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#19, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#22, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#23, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#26, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#24, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#27, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#21, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#29, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#28, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#25, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#31, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#33, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#30, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#32, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#36, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#34, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#38, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#41, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#35, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#43, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#40, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#39, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#44, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#42, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#46, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#45, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#48, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#47, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#60, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#59, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#58, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#52, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#62, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#67, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#64, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#66, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#75, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#72, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#70, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#63, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#77, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#57, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#79, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#78, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#81, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#83, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#80, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#82, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#87, ': Starting to iterate over 1/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#84, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#86, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#85, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#73, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#88, ': Starting to iterate over 1/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#61, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#92, ': Starting to iterate over 1/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#90, ': Starting to iterate over 1/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#89, ': Starting to iterate over 1/183 shards. +07/25/2024 11:24:54 - DEBUG - datasets.iterable_dataset - dataloader worker#69, ': Starting to iterate over 2/183 shards. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10562022 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10492277 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10485912 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10485912 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10511500 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10489635 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10497062 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10525688 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10536479 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10863935 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10497111 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10497111 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10489599 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10486616 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10486616 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10486397 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10522596 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10491547 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10949076 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10491272 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10486023 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10486023 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10485842 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488150 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10511515 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10499607 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10501535 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10491889 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10491327 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10492861 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10492861 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10509286 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10509286 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10686322 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10552417 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10552417 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488098 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10497218 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488651 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10515063 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488608 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488651 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10485847 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10512203 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10621496 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10553677 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10553677 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10498167 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10509262 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10485918 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10500930 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10610581 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10493913 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10486276 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10500290 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10487790 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10525926 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10511604 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10486172 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10751338 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 10495973 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 10495520 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 10495520 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:24:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 10598254 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 10530453 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:24:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 10668116 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 10489575 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 10486801 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488385 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 10499106 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 10487725 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 10487482 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 11286262 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 10640425 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 10497335 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 11286262 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:24:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 10492554 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 11115863 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:57 - DEBUG - datasets.packaged_modules.json.json - Batch of 10487097 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:24:57 - DEBUG - datasets.packaged_modules.json.json - Batch of 10676628 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:25:12 - INFO - __main__ - Step 1: {'lr': 0.0, 'samples': 48, 'steps': 0, 'loss/train': 10.554669380187988} +07/25/2024 11:25:12 - INFO - __main__ - Step 2: {'lr': 7.142857142857143e-07, 'samples': 96, 'steps': 1, 'loss/train': 10.494059562683105} +07/25/2024 11:25:13 - INFO - __main__ - Step 3: {'lr': 1.4285714285714286e-06, 'samples': 144, 'steps': 2, 'loss/train': 10.507988929748535} +07/25/2024 11:25:13 - INFO - __main__ - Step 4: {'lr': 2.142857142857143e-06, 'samples': 192, 'steps': 3, 'loss/train': 10.415447235107422} +07/25/2024 11:25:13 - INFO - __main__ - Step 5: {'lr': 2.8571428571428573e-06, 'samples': 240, 'steps': 4, 'loss/train': 10.345850944519043} +07/25/2024 11:25:14 - INFO - __main__ - Step 6: {'lr': 3.5714285714285714e-06, 'samples': 288, 'steps': 5, 'loss/train': 10.195524215698242} +07/25/2024 11:25:14 - INFO - __main__ - Step 7: {'lr': 4.285714285714286e-06, 'samples': 336, 'steps': 6, 'loss/train': 10.09341812133789} +07/25/2024 11:25:14 - INFO - __main__ - Step 8: {'lr': 5e-06, 'samples': 384, 'steps': 7, 'loss/train': 9.965239524841309} +07/25/2024 11:25:14 - INFO - __main__ - Step 9: {'lr': 5.7142857142857145e-06, 'samples': 432, 'steps': 8, 'loss/train': 9.698853492736816} +07/25/2024 11:25:15 - INFO - __main__ - Step 10: {'lr': 6.428571428571429e-06, 'samples': 480, 'steps': 9, 'loss/train': 9.80683708190918} +07/25/2024 11:25:15 - INFO - __main__ - Step 11: {'lr': 7.142857142857143e-06, 'samples': 528, 'steps': 10, 'loss/train': 9.633079528808594} +07/25/2024 11:25:15 - INFO - __main__ - Step 12: {'lr': 7.857142857142858e-06, 'samples': 576, 'steps': 11, 'loss/train': 9.700591087341309} +07/25/2024 11:25:16 - INFO - __main__ - Step 13: {'lr': 8.571428571428573e-06, 'samples': 624, 'steps': 12, 'loss/train': 9.603139877319336} +07/25/2024 11:25:16 - INFO - __main__ - Step 14: {'lr': 9.285714285714286e-06, 'samples': 672, 'steps': 13, 'loss/train': 9.30308723449707} +07/25/2024 11:25:16 - INFO - __main__ - Step 15: {'lr': 1e-05, 'samples': 720, 'steps': 14, 'loss/train': 9.333526611328125} +07/25/2024 11:25:16 - INFO - __main__ - Step 16: {'lr': 1.0714285714285714e-05, 'samples': 768, 'steps': 15, 'loss/train': 8.336181640625} +07/25/2024 11:25:17 - INFO - __main__ - Step 17: {'lr': 1.1428571428571429e-05, 'samples': 816, 'steps': 16, 'loss/train': 9.075631141662598} +07/25/2024 11:25:17 - INFO - __main__ - Step 18: {'lr': 1.2142857142857142e-05, 'samples': 864, 'steps': 17, 'loss/train': 9.18478012084961} +07/25/2024 11:25:17 - INFO - __main__ - Step 19: {'lr': 1.2857142857142857e-05, 'samples': 912, 'steps': 18, 'loss/train': 8.96328353881836} +07/25/2024 11:25:17 - INFO - __main__ - Step 20: {'lr': 1.3571428571428572e-05, 'samples': 960, 'steps': 19, 'loss/train': 9.45018196105957} +07/25/2024 11:25:18 - INFO - __main__ - Step 21: {'lr': 1.4285714285714285e-05, 'samples': 1008, 'steps': 20, 'loss/train': 8.517333984375} +07/25/2024 11:25:18 - INFO - __main__ - Step 22: {'lr': 1.5e-05, 'samples': 1056, 'steps': 21, 'loss/train': 9.207684516906738} +07/25/2024 11:25:18 - INFO - __main__ - Step 23: {'lr': 1.5714285714285715e-05, 'samples': 1104, 'steps': 22, 'loss/train': 8.681092262268066} +07/25/2024 11:25:19 - INFO - __main__ - Step 24: {'lr': 1.642857142857143e-05, 'samples': 1152, 'steps': 23, 'loss/train': 8.316036224365234} +07/25/2024 11:25:19 - INFO - __main__ - Step 25: {'lr': 1.7142857142857145e-05, 'samples': 1200, 'steps': 24, 'loss/train': 8.944169044494629} +07/25/2024 11:25:19 - INFO - __main__ - Step 26: {'lr': 1.7857142857142855e-05, 'samples': 1248, 'steps': 25, 'loss/train': 8.878201484680176} +07/25/2024 11:25:19 - INFO - __main__ - Step 27: {'lr': 1.8571428571428572e-05, 'samples': 1296, 'steps': 26, 'loss/train': 9.158102989196777} +07/25/2024 11:25:20 - INFO - __main__ - Step 28: {'lr': 1.9285714285714285e-05, 'samples': 1344, 'steps': 27, 'loss/train': 9.14354419708252} +07/25/2024 11:25:20 - INFO - __main__ - Step 29: {'lr': 2e-05, 'samples': 1392, 'steps': 28, 'loss/train': 8.860624313354492} +07/25/2024 11:25:20 - INFO - __main__ - Step 30: {'lr': 2.0714285714285715e-05, 'samples': 1440, 'steps': 29, 'loss/train': 8.876450538635254} +07/25/2024 11:25:21 - INFO - __main__ - Step 31: {'lr': 2.1428571428571428e-05, 'samples': 1488, 'steps': 30, 'loss/train': 8.425738334655762} +07/25/2024 11:25:21 - INFO - __main__ - Step 32: {'lr': 2.214285714285714e-05, 'samples': 1536, 'steps': 31, 'loss/train': 8.942279815673828} +07/25/2024 11:25:21 - INFO - __main__ - Step 33: {'lr': 2.2857142857142858e-05, 'samples': 1584, 'steps': 32, 'loss/train': 8.757084846496582} +07/25/2024 11:25:21 - INFO - __main__ - Step 34: {'lr': 2.3571428571428575e-05, 'samples': 1632, 'steps': 33, 'loss/train': 8.699286460876465} +07/25/2024 11:25:22 - INFO - __main__ - Step 35: {'lr': 2.4285714285714285e-05, 'samples': 1680, 'steps': 34, 'loss/train': 8.857367515563965} +07/25/2024 11:25:22 - INFO - __main__ - Step 36: {'lr': 2.5e-05, 'samples': 1728, 'steps': 35, 'loss/train': 8.830195426940918} +07/25/2024 11:25:22 - INFO - __main__ - Step 37: {'lr': 2.5714285714285714e-05, 'samples': 1776, 'steps': 36, 'loss/train': 8.944982528686523} +07/25/2024 11:25:22 - INFO - __main__ - Step 38: {'lr': 2.642857142857143e-05, 'samples': 1824, 'steps': 37, 'loss/train': 8.670278549194336} +07/25/2024 11:25:23 - INFO - __main__ - Step 39: {'lr': 2.7142857142857144e-05, 'samples': 1872, 'steps': 38, 'loss/train': 8.710525512695312} +07/25/2024 11:25:23 - INFO - __main__ - Step 40: {'lr': 2.7857142857142858e-05, 'samples': 1920, 'steps': 39, 'loss/train': 7.902089595794678} +07/25/2024 11:25:23 - INFO - __main__ - Step 41: {'lr': 2.857142857142857e-05, 'samples': 1968, 'steps': 40, 'loss/train': 8.400484085083008} +07/25/2024 11:25:24 - INFO - __main__ - Step 42: {'lr': 2.9285714285714288e-05, 'samples': 2016, 'steps': 41, 'loss/train': 8.789310455322266} +07/25/2024 11:25:24 - INFO - __main__ - Step 43: {'lr': 3e-05, 'samples': 2064, 'steps': 42, 'loss/train': 8.754344940185547} +07/25/2024 11:25:24 - INFO - __main__ - Step 44: {'lr': 3.071428571428572e-05, 'samples': 2112, 'steps': 43, 'loss/train': 8.84192943572998} +07/25/2024 11:25:24 - INFO - __main__ - Step 45: {'lr': 3.142857142857143e-05, 'samples': 2160, 'steps': 44, 'loss/train': 8.784793853759766} +07/25/2024 11:25:25 - INFO - __main__ - Step 46: {'lr': 3.214285714285714e-05, 'samples': 2208, 'steps': 45, 'loss/train': 8.67403793334961} +07/25/2024 11:25:25 - INFO - __main__ - Step 47: {'lr': 3.285714285714286e-05, 'samples': 2256, 'steps': 46, 'loss/train': 8.51427173614502} +07/25/2024 11:25:25 - INFO - __main__ - Step 48: {'lr': 3.357142857142857e-05, 'samples': 2304, 'steps': 47, 'loss/train': 8.48193073272705} +07/25/2024 11:25:26 - INFO - __main__ - Step 49: {'lr': 3.428571428571429e-05, 'samples': 2352, 'steps': 48, 'loss/train': 8.518038749694824} +07/25/2024 11:25:26 - INFO - __main__ - Step 50: {'lr': 3.5000000000000004e-05, 'samples': 2400, 'steps': 49, 'loss/train': 8.63569450378418} +07/25/2024 11:25:26 - INFO - __main__ - Step 51: {'lr': 3.571428571428571e-05, 'samples': 2448, 'steps': 50, 'loss/train': 8.343396186828613} +07/25/2024 11:25:26 - INFO - __main__ - Step 52: {'lr': 3.642857142857143e-05, 'samples': 2496, 'steps': 51, 'loss/train': 8.461634635925293} +07/25/2024 11:25:27 - INFO - __main__ - Step 53: {'lr': 3.7142857142857143e-05, 'samples': 2544, 'steps': 52, 'loss/train': 8.43316650390625} +07/25/2024 11:25:27 - INFO - __main__ - Step 54: {'lr': 3.7857142857142864e-05, 'samples': 2592, 'steps': 53, 'loss/train': 8.464268684387207} +07/25/2024 11:25:27 - INFO - __main__ - Step 55: {'lr': 3.857142857142857e-05, 'samples': 2640, 'steps': 54, 'loss/train': 8.371450424194336} +07/25/2024 11:25:27 - INFO - __main__ - Step 56: {'lr': 3.928571428571428e-05, 'samples': 2688, 'steps': 55, 'loss/train': 8.155680656433105} +07/25/2024 11:25:28 - INFO - __main__ - Step 57: {'lr': 4e-05, 'samples': 2736, 'steps': 56, 'loss/train': 8.359997749328613} +07/25/2024 11:25:28 - INFO - __main__ - Step 58: {'lr': 4.0714285714285717e-05, 'samples': 2784, 'steps': 57, 'loss/train': 7.883953094482422} +07/25/2024 11:25:28 - INFO - __main__ - Step 59: {'lr': 4.142857142857143e-05, 'samples': 2832, 'steps': 58, 'loss/train': 8.425983428955078} +07/25/2024 11:25:29 - INFO - __main__ - Step 60: {'lr': 4.214285714285714e-05, 'samples': 2880, 'steps': 59, 'loss/train': 8.220914840698242} +07/25/2024 11:25:29 - INFO - __main__ - Step 61: {'lr': 4.2857142857142856e-05, 'samples': 2928, 'steps': 60, 'loss/train': 8.216103553771973} +07/25/2024 11:25:29 - INFO - __main__ - Step 62: {'lr': 4.3571428571428576e-05, 'samples': 2976, 'steps': 61, 'loss/train': 8.129951477050781} +07/25/2024 11:25:29 - INFO - __main__ - Step 63: {'lr': 4.428571428571428e-05, 'samples': 3024, 'steps': 62, 'loss/train': 7.993805885314941} +07/25/2024 11:25:30 - INFO - __main__ - Step 64: {'lr': 4.4999999999999996e-05, 'samples': 3072, 'steps': 63, 'loss/train': 6.955376625061035} +07/25/2024 11:25:30 - INFO - __main__ - Step 65: {'lr': 4.5714285714285716e-05, 'samples': 3120, 'steps': 64, 'loss/train': 7.9038238525390625} +07/25/2024 11:25:30 - INFO - __main__ - Step 66: {'lr': 4.642857142857143e-05, 'samples': 3168, 'steps': 65, 'loss/train': 7.659880638122559} +07/25/2024 11:25:31 - INFO - __main__ - Step 67: {'lr': 4.714285714285715e-05, 'samples': 3216, 'steps': 66, 'loss/train': 7.462357521057129} +07/25/2024 11:25:31 - INFO - __main__ - Step 68: {'lr': 4.7857142857142856e-05, 'samples': 3264, 'steps': 67, 'loss/train': 7.9803571701049805} +07/25/2024 11:25:31 - INFO - __main__ - Step 69: {'lr': 4.857142857142857e-05, 'samples': 3312, 'steps': 68, 'loss/train': 7.895639896392822} +07/25/2024 11:25:31 - INFO - __main__ - Step 70: {'lr': 4.928571428571429e-05, 'samples': 3360, 'steps': 69, 'loss/train': 7.726537704467773} +07/25/2024 11:25:32 - INFO - __main__ - Step 71: {'lr': 5e-05, 'samples': 3408, 'steps': 70, 'loss/train': 7.8505425453186035} +07/25/2024 11:25:32 - INFO - __main__ - Step 72: {'lr': 5.0714285714285716e-05, 'samples': 3456, 'steps': 71, 'loss/train': 7.492800235748291} +07/25/2024 11:25:32 - INFO - __main__ - Step 73: {'lr': 5.142857142857143e-05, 'samples': 3504, 'steps': 72, 'loss/train': 7.890054225921631} +07/25/2024 11:25:33 - INFO - __main__ - Step 74: {'lr': 5.214285714285714e-05, 'samples': 3552, 'steps': 73, 'loss/train': 7.429488182067871} +07/25/2024 11:25:33 - INFO - __main__ - Step 75: {'lr': 5.285714285714286e-05, 'samples': 3600, 'steps': 74, 'loss/train': 7.520913600921631} +07/25/2024 11:25:33 - INFO - __main__ - Step 76: {'lr': 5.357142857142857e-05, 'samples': 3648, 'steps': 75, 'loss/train': 7.66839075088501} +07/25/2024 11:25:33 - INFO - __main__ - Step 77: {'lr': 5.428571428571429e-05, 'samples': 3696, 'steps': 76, 'loss/train': 7.810487270355225} +07/25/2024 11:25:34 - INFO - __main__ - Step 78: {'lr': 5.5e-05, 'samples': 3744, 'steps': 77, 'loss/train': 7.009271621704102} +07/25/2024 11:25:34 - INFO - __main__ - Step 79: {'lr': 5.5714285714285715e-05, 'samples': 3792, 'steps': 78, 'loss/train': 7.631109714508057} +07/25/2024 11:25:34 - INFO - __main__ - Step 80: {'lr': 5.642857142857143e-05, 'samples': 3840, 'steps': 79, 'loss/train': 6.9839606285095215} +07/25/2024 11:25:35 - INFO - __main__ - Step 81: {'lr': 5.714285714285714e-05, 'samples': 3888, 'steps': 80, 'loss/train': 7.642471790313721} +07/25/2024 11:25:35 - INFO - __main__ - Step 82: {'lr': 5.7857142857142855e-05, 'samples': 3936, 'steps': 81, 'loss/train': 7.183259010314941} +07/25/2024 11:25:35 - INFO - __main__ - Step 83: {'lr': 5.8571428571428575e-05, 'samples': 3984, 'steps': 82, 'loss/train': 7.3919596672058105} +07/25/2024 11:25:35 - INFO - __main__ - Step 84: {'lr': 5.928571428571429e-05, 'samples': 4032, 'steps': 83, 'loss/train': 7.52573299407959} +07/25/2024 11:25:36 - INFO - __main__ - Step 85: {'lr': 6e-05, 'samples': 4080, 'steps': 84, 'loss/train': 7.169320583343506} +07/25/2024 11:25:36 - INFO - __main__ - Step 86: {'lr': 6.0714285714285715e-05, 'samples': 4128, 'steps': 85, 'loss/train': 7.095631122589111} +07/25/2024 11:25:36 - INFO - __main__ - Step 87: {'lr': 6.142857142857143e-05, 'samples': 4176, 'steps': 86, 'loss/train': 7.257204532623291} +07/25/2024 11:25:36 - INFO - __main__ - Step 88: {'lr': 6.214285714285714e-05, 'samples': 4224, 'steps': 87, 'loss/train': 6.010106563568115} +07/25/2024 11:25:37 - INFO - __main__ - Step 89: {'lr': 6.285714285714286e-05, 'samples': 4272, 'steps': 88, 'loss/train': 7.189196586608887} +07/25/2024 11:25:37 - INFO - __main__ - Step 90: {'lr': 6.357142857142857e-05, 'samples': 4320, 'steps': 89, 'loss/train': 6.902089595794678} +07/25/2024 11:25:37 - INFO - __main__ - Step 91: {'lr': 6.428571428571427e-05, 'samples': 4368, 'steps': 90, 'loss/train': 6.5942535400390625} +07/25/2024 11:25:38 - INFO - __main__ - Step 92: {'lr': 6.500000000000001e-05, 'samples': 4416, 'steps': 91, 'loss/train': 7.392148017883301} +07/25/2024 11:25:38 - INFO - __main__ - Step 93: {'lr': 6.571428571428571e-05, 'samples': 4464, 'steps': 92, 'loss/train': 6.586553573608398} +07/25/2024 11:25:38 - INFO - __main__ - Step 94: {'lr': 6.642857142857143e-05, 'samples': 4512, 'steps': 93, 'loss/train': 7.5296549797058105} +07/25/2024 11:25:38 - INFO - __main__ - Step 95: {'lr': 6.714285714285714e-05, 'samples': 4560, 'steps': 94, 'loss/train': 7.048985481262207} +07/25/2024 11:25:39 - INFO - __main__ - Step 96: {'lr': 6.785714285714285e-05, 'samples': 4608, 'steps': 95, 'loss/train': 4.687469959259033} +07/25/2024 11:25:39 - INFO - __main__ - Step 97: {'lr': 6.857142857142858e-05, 'samples': 4656, 'steps': 96, 'loss/train': 7.1623854637146} +07/25/2024 11:25:39 - INFO - __main__ - Step 98: {'lr': 6.928571428571429e-05, 'samples': 4704, 'steps': 97, 'loss/train': 6.722190856933594} +07/25/2024 11:25:40 - INFO - __main__ - Step 99: {'lr': 7.000000000000001e-05, 'samples': 4752, 'steps': 98, 'loss/train': 6.930887699127197} +07/25/2024 11:25:40 - INFO - __main__ - Step 100: {'lr': 7.071428571428571e-05, 'samples': 4800, 'steps': 99, 'loss/train': 7.2268805503845215} +07/25/2024 11:25:40 - INFO - __main__ - Step 101: {'lr': 7.142857142857142e-05, 'samples': 4848, 'steps': 100, 'loss/train': 6.872276306152344} +07/25/2024 11:25:40 - INFO - __main__ - Step 102: {'lr': 7.214285714285715e-05, 'samples': 4896, 'steps': 101, 'loss/train': 5.04807710647583} +07/25/2024 11:25:41 - INFO - __main__ - Step 103: {'lr': 7.285714285714286e-05, 'samples': 4944, 'steps': 102, 'loss/train': 6.8386383056640625} +07/25/2024 11:25:41 - INFO - __main__ - Step 104: {'lr': 7.357142857142857e-05, 'samples': 4992, 'steps': 103, 'loss/train': 6.707127571105957} +07/25/2024 11:25:41 - INFO - __main__ - Step 105: {'lr': 7.428571428571429e-05, 'samples': 5040, 'steps': 104, 'loss/train': 6.885215759277344} +07/25/2024 11:25:42 - INFO - __main__ - Step 106: {'lr': 7.5e-05, 'samples': 5088, 'steps': 105, 'loss/train': 6.762844562530518} +07/25/2024 11:25:42 - INFO - __main__ - Step 107: {'lr': 7.571428571428573e-05, 'samples': 5136, 'steps': 106, 'loss/train': 6.92085599899292} +07/25/2024 11:25:42 - INFO - __main__ - Step 108: {'lr': 7.642857142857143e-05, 'samples': 5184, 'steps': 107, 'loss/train': 6.639281749725342} +07/25/2024 11:25:42 - INFO - __main__ - Step 109: {'lr': 7.714285714285714e-05, 'samples': 5232, 'steps': 108, 'loss/train': 6.710461616516113} +07/25/2024 11:25:43 - INFO - __main__ - Step 110: {'lr': 7.785714285714286e-05, 'samples': 5280, 'steps': 109, 'loss/train': 3.4145185947418213} +07/25/2024 11:25:43 - INFO - __main__ - Step 111: {'lr': 7.857142857142857e-05, 'samples': 5328, 'steps': 110, 'loss/train': 6.69966983795166} +07/25/2024 11:25:43 - INFO - __main__ - Step 112: {'lr': 7.928571428571429e-05, 'samples': 5376, 'steps': 111, 'loss/train': 6.780115127563477} +07/25/2024 11:25:44 - INFO - __main__ - Step 113: {'lr': 8e-05, 'samples': 5424, 'steps': 112, 'loss/train': 6.512848377227783} +07/25/2024 11:25:44 - INFO - __main__ - Step 114: {'lr': 8.071428571428571e-05, 'samples': 5472, 'steps': 113, 'loss/train': 6.558418273925781} +07/25/2024 11:25:44 - INFO - __main__ - Step 115: {'lr': 8.142857142857143e-05, 'samples': 5520, 'steps': 114, 'loss/train': 6.531116485595703} +07/25/2024 11:25:44 - INFO - __main__ - Step 116: {'lr': 8.214285714285714e-05, 'samples': 5568, 'steps': 115, 'loss/train': 6.557308197021484} +07/25/2024 11:25:45 - INFO - __main__ - Step 117: {'lr': 8.285714285714286e-05, 'samples': 5616, 'steps': 116, 'loss/train': 6.023952960968018} +07/25/2024 11:25:45 - INFO - __main__ - Step 118: {'lr': 8.357142857142858e-05, 'samples': 5664, 'steps': 117, 'loss/train': 7.063660144805908} +07/25/2024 11:25:45 - INFO - __main__ - Step 119: {'lr': 8.428571428571429e-05, 'samples': 5712, 'steps': 118, 'loss/train': 6.6882853507995605} +07/25/2024 11:25:45 - INFO - __main__ - Step 120: {'lr': 8.5e-05, 'samples': 5760, 'steps': 119, 'loss/train': 5.413237571716309} +07/25/2024 11:25:46 - INFO - __main__ - Step 121: {'lr': 8.571428571428571e-05, 'samples': 5808, 'steps': 120, 'loss/train': 6.166462421417236} +07/25/2024 11:25:46 - INFO - __main__ - Step 122: {'lr': 8.642857142857143e-05, 'samples': 5856, 'steps': 121, 'loss/train': 6.413567543029785} +07/25/2024 11:25:46 - INFO - __main__ - Step 123: {'lr': 8.714285714285715e-05, 'samples': 5904, 'steps': 122, 'loss/train': 6.3801727294921875} +07/25/2024 11:25:47 - INFO - __main__ - Step 124: {'lr': 8.785714285714286e-05, 'samples': 5952, 'steps': 123, 'loss/train': 7.042605400085449} +07/25/2024 11:25:47 - INFO - __main__ - Step 125: {'lr': 8.857142857142857e-05, 'samples': 6000, 'steps': 124, 'loss/train': 6.735599517822266} +07/25/2024 11:25:47 - INFO - __main__ - Step 126: {'lr': 8.928571428571429e-05, 'samples': 6048, 'steps': 125, 'loss/train': 6.620289325714111} +07/25/2024 11:25:47 - INFO - __main__ - Step 127: {'lr': 8.999999999999999e-05, 'samples': 6096, 'steps': 126, 'loss/train': 6.738864421844482} +07/25/2024 11:25:48 - INFO - __main__ - Step 128: {'lr': 9.071428571428573e-05, 'samples': 6144, 'steps': 127, 'loss/train': 6.406912326812744} +07/25/2024 11:25:48 - INFO - __main__ - Step 129: {'lr': 9.142857142857143e-05, 'samples': 6192, 'steps': 128, 'loss/train': 6.422929286956787} +07/25/2024 11:25:48 - INFO - __main__ - Step 130: {'lr': 9.214285714285714e-05, 'samples': 6240, 'steps': 129, 'loss/train': 6.476966381072998} +07/25/2024 11:25:49 - INFO - __main__ - Step 131: {'lr': 9.285714285714286e-05, 'samples': 6288, 'steps': 130, 'loss/train': 6.289211273193359} +07/25/2024 11:25:49 - INFO - __main__ - Step 132: {'lr': 9.357142857142857e-05, 'samples': 6336, 'steps': 131, 'loss/train': 6.4881696701049805} +07/25/2024 11:25:49 - INFO - __main__ - Step 133: {'lr': 9.42857142857143e-05, 'samples': 6384, 'steps': 132, 'loss/train': 6.840321063995361} +07/25/2024 11:25:49 - INFO - __main__ - Step 134: {'lr': 9.5e-05, 'samples': 6432, 'steps': 133, 'loss/train': 6.22948694229126} +07/25/2024 11:25:50 - INFO - __main__ - Step 135: {'lr': 9.571428571428571e-05, 'samples': 6480, 'steps': 134, 'loss/train': 5.924211025238037} +07/25/2024 11:25:50 - INFO - __main__ - Step 136: {'lr': 9.642857142857143e-05, 'samples': 6528, 'steps': 135, 'loss/train': 8.402527809143066} +07/25/2024 11:25:50 - INFO - __main__ - Step 137: {'lr': 9.714285714285714e-05, 'samples': 6576, 'steps': 136, 'loss/train': 6.357081413269043} +07/25/2024 11:25:51 - INFO - __main__ - Step 138: {'lr': 9.785714285714286e-05, 'samples': 6624, 'steps': 137, 'loss/train': 6.335728168487549} +07/25/2024 11:25:51 - INFO - __main__ - Step 139: {'lr': 9.857142857142858e-05, 'samples': 6672, 'steps': 138, 'loss/train': 6.388386249542236} +07/25/2024 11:25:51 - INFO - __main__ - Step 140: {'lr': 9.928571428571428e-05, 'samples': 6720, 'steps': 139, 'loss/train': 6.144318103790283} +07/25/2024 11:25:51 - INFO - __main__ - Step 141: {'lr': 0.0001, 'samples': 6768, 'steps': 140, 'loss/train': 5.887519359588623} +07/25/2024 11:25:52 - INFO - __main__ - Step 142: {'lr': 0.00010071428571428571, 'samples': 6816, 'steps': 141, 'loss/train': 6.515809059143066} +07/25/2024 11:25:52 - INFO - __main__ - Step 143: {'lr': 0.00010142857142857143, 'samples': 6864, 'steps': 142, 'loss/train': 6.273582458496094} +07/25/2024 11:25:52 - INFO - __main__ - Step 144: {'lr': 0.00010214285714285715, 'samples': 6912, 'steps': 143, 'loss/train': 6.12056303024292} +07/25/2024 11:25:52 - INFO - __main__ - Step 145: {'lr': 0.00010285714285714286, 'samples': 6960, 'steps': 144, 'loss/train': 6.281930446624756} +07/25/2024 11:25:53 - INFO - __main__ - Step 146: {'lr': 0.00010357142857142858, 'samples': 7008, 'steps': 145, 'loss/train': 6.347898483276367} +07/25/2024 11:25:53 - INFO - __main__ - Step 147: {'lr': 0.00010428571428571428, 'samples': 7056, 'steps': 146, 'loss/train': 6.053178787231445} +07/25/2024 11:25:53 - INFO - __main__ - Step 148: {'lr': 0.000105, 'samples': 7104, 'steps': 147, 'loss/train': 6.299071311950684} +07/25/2024 11:25:54 - INFO - __main__ - Step 149: {'lr': 0.00010571428571428572, 'samples': 7152, 'steps': 148, 'loss/train': 6.214033603668213} +07/25/2024 11:25:54 - INFO - __main__ - Step 150: {'lr': 0.00010642857142857143, 'samples': 7200, 'steps': 149, 'loss/train': 6.36629056930542} +07/25/2024 11:25:54 - INFO - __main__ - Step 151: {'lr': 0.00010714285714285714, 'samples': 7248, 'steps': 150, 'loss/train': 6.574682235717773} +07/25/2024 11:25:54 - INFO - __main__ - Step 152: {'lr': 0.00010785714285714286, 'samples': 7296, 'steps': 151, 'loss/train': 5.2919840812683105} +07/25/2024 11:25:55 - INFO - __main__ - Step 153: {'lr': 0.00010857142857142858, 'samples': 7344, 'steps': 152, 'loss/train': 6.282163143157959} +07/25/2024 11:25:55 - INFO - __main__ - Step 154: {'lr': 0.0001092857142857143, 'samples': 7392, 'steps': 153, 'loss/train': 6.462711334228516} +07/25/2024 11:25:55 - INFO - __main__ - Step 155: {'lr': 0.00011, 'samples': 7440, 'steps': 154, 'loss/train': 5.595396518707275} +07/25/2024 11:25:56 - INFO - __main__ - Step 156: {'lr': 0.00011071428571428571, 'samples': 7488, 'steps': 155, 'loss/train': 6.128833293914795} +07/25/2024 11:25:56 - INFO - __main__ - Step 157: {'lr': 0.00011142857142857143, 'samples': 7536, 'steps': 156, 'loss/train': 6.035909652709961} +07/25/2024 11:25:56 - INFO - __main__ - Step 158: {'lr': 0.00011214285714285715, 'samples': 7584, 'steps': 157, 'loss/train': 6.275477886199951} +07/25/2024 11:25:56 - INFO - __main__ - Step 159: {'lr': 0.00011285714285714286, 'samples': 7632, 'steps': 158, 'loss/train': 6.1195969581604} +07/25/2024 11:25:57 - INFO - __main__ - Step 160: {'lr': 0.00011357142857142858, 'samples': 7680, 'steps': 159, 'loss/train': 8.316116333007812} +07/25/2024 11:25:57 - INFO - __main__ - Step 161: {'lr': 0.00011428571428571428, 'samples': 7728, 'steps': 160, 'loss/train': 6.287449836730957} +07/25/2024 11:25:57 - INFO - __main__ - Step 162: {'lr': 0.000115, 'samples': 7776, 'steps': 161, 'loss/train': 5.879787445068359} +07/25/2024 11:25:58 - INFO - __main__ - Step 163: {'lr': 0.00011571428571428571, 'samples': 7824, 'steps': 162, 'loss/train': 6.221517086029053} +07/25/2024 11:25:58 - INFO - __main__ - Step 164: {'lr': 0.00011642857142857143, 'samples': 7872, 'steps': 163, 'loss/train': 5.967787265777588} +07/25/2024 11:25:58 - INFO - __main__ - Step 165: {'lr': 0.00011714285714285715, 'samples': 7920, 'steps': 164, 'loss/train': 6.09508752822876} +07/25/2024 11:25:58 - INFO - __main__ - Step 166: {'lr': 0.00011785714285714286, 'samples': 7968, 'steps': 165, 'loss/train': 6.462942123413086} +07/25/2024 11:25:59 - INFO - __main__ - Step 167: {'lr': 0.00011857142857142858, 'samples': 8016, 'steps': 166, 'loss/train': 6.146663188934326} +07/25/2024 11:25:59 - INFO - __main__ - Step 168: {'lr': 0.00011928571428571428, 'samples': 8064, 'steps': 167, 'loss/train': 6.4038286209106445} +07/25/2024 11:25:59 - INFO - __main__ - Step 169: {'lr': 0.00012, 'samples': 8112, 'steps': 168, 'loss/train': 6.267633438110352} +07/25/2024 11:25:59 - INFO - __main__ - Step 170: {'lr': 0.00012071428571428572, 'samples': 8160, 'steps': 169, 'loss/train': 6.64249324798584} +07/25/2024 11:26:00 - INFO - __main__ - Step 171: {'lr': 0.00012142857142857143, 'samples': 8208, 'steps': 170, 'loss/train': 6.448271751403809} +07/25/2024 11:26:00 - INFO - __main__ - Step 172: {'lr': 0.00012214285714285715, 'samples': 8256, 'steps': 171, 'loss/train': 6.485412120819092} +07/25/2024 11:26:00 - INFO - __main__ - Step 173: {'lr': 0.00012285714285714287, 'samples': 8304, 'steps': 172, 'loss/train': 6.213407516479492} +07/25/2024 11:26:01 - INFO - __main__ - Step 174: {'lr': 0.00012357142857142856, 'samples': 8352, 'steps': 173, 'loss/train': 5.832103729248047} +07/25/2024 11:26:01 - INFO - __main__ - Step 175: {'lr': 0.00012428571428571428, 'samples': 8400, 'steps': 174, 'loss/train': 5.645206928253174} +07/25/2024 11:26:01 - INFO - __main__ - Step 176: {'lr': 0.000125, 'samples': 8448, 'steps': 175, 'loss/train': 5.942577838897705} +07/25/2024 11:26:01 - INFO - __main__ - Step 177: {'lr': 0.00012571428571428572, 'samples': 8496, 'steps': 176, 'loss/train': 6.108009338378906} +07/25/2024 11:26:02 - INFO - __main__ - Step 178: {'lr': 0.00012642857142857142, 'samples': 8544, 'steps': 177, 'loss/train': 6.048696994781494} +07/25/2024 11:26:02 - INFO - __main__ - Step 179: {'lr': 0.00012714285714285714, 'samples': 8592, 'steps': 178, 'loss/train': 6.014152526855469} +07/25/2024 11:26:02 - INFO - __main__ - Step 180: {'lr': 0.00012785714285714286, 'samples': 8640, 'steps': 179, 'loss/train': 6.590332508087158} +07/25/2024 11:26:03 - INFO - __main__ - Step 181: {'lr': 0.00012857142857142855, 'samples': 8688, 'steps': 180, 'loss/train': 6.095800399780273} +07/25/2024 11:26:03 - INFO - __main__ - Step 182: {'lr': 0.0001292857142857143, 'samples': 8736, 'steps': 181, 'loss/train': 5.968374729156494} +07/25/2024 11:26:03 - INFO - __main__ - Step 183: {'lr': 0.00013000000000000002, 'samples': 8784, 'steps': 182, 'loss/train': 6.073035717010498} +07/25/2024 11:26:03 - INFO - __main__ - Step 184: {'lr': 0.00013071428571428574, 'samples': 8832, 'steps': 183, 'loss/train': 7.681509494781494} +07/25/2024 11:26:04 - INFO - __main__ - Step 185: {'lr': 0.00013142857142857143, 'samples': 8880, 'steps': 184, 'loss/train': 5.806171417236328} +07/25/2024 11:26:04 - INFO - __main__ - Step 186: {'lr': 0.00013214285714285715, 'samples': 8928, 'steps': 185, 'loss/train': 5.868297576904297} +07/25/2024 11:26:04 - INFO - __main__ - Step 187: {'lr': 0.00013285714285714287, 'samples': 8976, 'steps': 186, 'loss/train': 5.532838344573975} +07/25/2024 11:26:05 - INFO - __main__ - Step 188: {'lr': 0.00013357142857142856, 'samples': 9024, 'steps': 187, 'loss/train': 6.210916042327881} +07/25/2024 11:26:05 - INFO - __main__ - Step 189: {'lr': 0.00013428571428571428, 'samples': 9072, 'steps': 188, 'loss/train': 5.803860187530518} +07/25/2024 11:26:05 - INFO - __main__ - Step 190: {'lr': 0.000135, 'samples': 9120, 'steps': 189, 'loss/train': 6.666335105895996} +07/25/2024 11:26:05 - INFO - __main__ - Step 191: {'lr': 0.0001357142857142857, 'samples': 9168, 'steps': 190, 'loss/train': 5.624790668487549} +07/25/2024 11:26:06 - INFO - __main__ - Step 192: {'lr': 0.00013642857142857144, 'samples': 9216, 'steps': 191, 'loss/train': 5.217100143432617} +07/25/2024 11:26:06 - INFO - __main__ - Step 193: {'lr': 0.00013714285714285716, 'samples': 9264, 'steps': 192, 'loss/train': 5.951303482055664} +07/25/2024 11:26:06 - INFO - __main__ - Step 194: {'lr': 0.00013785714285714285, 'samples': 9312, 'steps': 193, 'loss/train': 5.851853847503662} +07/25/2024 11:26:06 - INFO - __main__ - Step 195: {'lr': 0.00013857142857142857, 'samples': 9360, 'steps': 194, 'loss/train': 5.776468276977539} +07/25/2024 11:26:07 - INFO - __main__ - Step 196: {'lr': 0.0001392857142857143, 'samples': 9408, 'steps': 195, 'loss/train': 5.7882866859436035} +07/25/2024 11:26:07 - INFO - __main__ - Step 197: {'lr': 0.00014000000000000001, 'samples': 9456, 'steps': 196, 'loss/train': 5.621963024139404} +07/25/2024 11:26:07 - INFO - __main__ - Step 198: {'lr': 0.0001407142857142857, 'samples': 9504, 'steps': 197, 'loss/train': 5.277397632598877} +07/25/2024 11:26:08 - INFO - __main__ - Step 199: {'lr': 0.00014142857142857143, 'samples': 9552, 'steps': 198, 'loss/train': 5.9324951171875} +07/25/2024 11:26:08 - INFO - __main__ - Step 200: {'lr': 0.00014214285714285715, 'samples': 9600, 'steps': 199, 'loss/train': 6.0901618003845215} +07/25/2024 11:26:08 - INFO - __main__ - Step 201: {'lr': 0.00014285714285714284, 'samples': 9648, 'steps': 200, 'loss/train': 5.745926856994629} +07/25/2024 11:26:08 - INFO - __main__ - Step 202: {'lr': 0.0001435714285714286, 'samples': 9696, 'steps': 201, 'loss/train': 6.288934707641602} +07/25/2024 11:26:09 - INFO - __main__ - Step 203: {'lr': 0.0001442857142857143, 'samples': 9744, 'steps': 202, 'loss/train': 6.304495811462402} +07/25/2024 11:26:09 - INFO - __main__ - Step 204: {'lr': 0.000145, 'samples': 9792, 'steps': 203, 'loss/train': 6.896693706512451} +07/25/2024 11:26:09 - INFO - __main__ - Step 205: {'lr': 0.00014571428571428572, 'samples': 9840, 'steps': 204, 'loss/train': 5.75565767288208} +07/25/2024 11:26:10 - INFO - __main__ - Step 206: {'lr': 0.00014642857142857144, 'samples': 9888, 'steps': 205, 'loss/train': 6.053487300872803} +07/25/2024 11:26:10 - INFO - __main__ - Step 207: {'lr': 0.00014714285714285713, 'samples': 9936, 'steps': 206, 'loss/train': 5.872729301452637} +07/25/2024 11:26:10 - INFO - __main__ - Step 208: {'lr': 0.00014785714285714285, 'samples': 9984, 'steps': 207, 'loss/train': 7.389420509338379} +07/25/2024 11:26:10 - INFO - __main__ - Step 209: {'lr': 0.00014857142857142857, 'samples': 10032, 'steps': 208, 'loss/train': 6.749051570892334} +07/25/2024 11:26:11 - INFO - __main__ - Step 210: {'lr': 0.0001492857142857143, 'samples': 10080, 'steps': 209, 'loss/train': 5.964937210083008} +07/25/2024 11:26:11 - INFO - __main__ - Step 211: {'lr': 0.00015, 'samples': 10128, 'steps': 210, 'loss/train': 6.29296350479126} +07/25/2024 11:26:11 - INFO - __main__ - Step 212: {'lr': 0.0001507142857142857, 'samples': 10176, 'steps': 211, 'loss/train': 6.124290466308594} +07/25/2024 11:26:12 - INFO - __main__ - Step 213: {'lr': 0.00015142857142857145, 'samples': 10224, 'steps': 212, 'loss/train': 6.875829219818115} +07/25/2024 11:26:12 - INFO - __main__ - Step 214: {'lr': 0.00015214285714285715, 'samples': 10272, 'steps': 213, 'loss/train': 6.973008155822754} +07/25/2024 11:26:12 - INFO - __main__ - Step 215: {'lr': 0.00015285714285714287, 'samples': 10320, 'steps': 214, 'loss/train': 6.136086940765381} +07/25/2024 11:26:12 - INFO - __main__ - Step 216: {'lr': 0.0001535714285714286, 'samples': 10368, 'steps': 215, 'loss/train': 5.827876567840576} +07/25/2024 11:26:13 - INFO - __main__ - Step 217: {'lr': 0.00015428571428571428, 'samples': 10416, 'steps': 216, 'loss/train': 6.297738552093506} +07/25/2024 11:26:13 - INFO - __main__ - Step 218: {'lr': 0.000155, 'samples': 10464, 'steps': 217, 'loss/train': 5.124302387237549} +07/25/2024 11:26:13 - INFO - __main__ - Step 219: {'lr': 0.00015571428571428572, 'samples': 10512, 'steps': 218, 'loss/train': 5.82398796081543} +07/25/2024 11:26:14 - INFO - __main__ - Step 220: {'lr': 0.0001564285714285714, 'samples': 10560, 'steps': 219, 'loss/train': 5.920914649963379} +07/25/2024 11:26:14 - INFO - __main__ - Step 221: {'lr': 0.00015714285714285713, 'samples': 10608, 'steps': 220, 'loss/train': 5.506519317626953} +07/25/2024 11:26:14 - INFO - __main__ - Step 222: {'lr': 0.00015785714285714285, 'samples': 10656, 'steps': 221, 'loss/train': 5.194490432739258} +07/25/2024 11:26:14 - INFO - __main__ - Step 223: {'lr': 0.00015857142857142857, 'samples': 10704, 'steps': 222, 'loss/train': 6.241917610168457} +07/25/2024 11:26:15 - INFO - __main__ - Step 224: {'lr': 0.0001592857142857143, 'samples': 10752, 'steps': 223, 'loss/train': 5.662716388702393} +07/25/2024 11:26:15 - INFO - __main__ - Step 225: {'lr': 0.00016, 'samples': 10800, 'steps': 224, 'loss/train': 5.275988578796387} +07/25/2024 11:26:15 - INFO - __main__ - Step 226: {'lr': 0.00016071428571428573, 'samples': 10848, 'steps': 225, 'loss/train': 5.916398048400879} +07/25/2024 11:26:15 - INFO - __main__ - Step 227: {'lr': 0.00016142857142857143, 'samples': 10896, 'steps': 226, 'loss/train': 5.93534517288208} +07/25/2024 11:26:16 - INFO - __main__ - Step 228: {'lr': 0.00016214285714285715, 'samples': 10944, 'steps': 227, 'loss/train': 6.050380229949951} +07/25/2024 11:26:16 - INFO - __main__ - Step 229: {'lr': 0.00016285714285714287, 'samples': 10992, 'steps': 228, 'loss/train': 6.600334644317627} +07/25/2024 11:26:16 - INFO - __main__ - Step 230: {'lr': 0.00016357142857142856, 'samples': 11040, 'steps': 229, 'loss/train': 6.150309085845947} +07/25/2024 11:26:17 - INFO - __main__ - Step 231: {'lr': 0.00016428571428571428, 'samples': 11088, 'steps': 230, 'loss/train': 6.019353866577148} +07/25/2024 11:26:17 - INFO - __main__ - Step 232: {'lr': 0.000165, 'samples': 11136, 'steps': 231, 'loss/train': 7.122209548950195} +07/25/2024 11:26:17 - INFO - __main__ - Step 233: {'lr': 0.00016571428571428572, 'samples': 11184, 'steps': 232, 'loss/train': 5.891404151916504} +07/25/2024 11:26:17 - INFO - __main__ - Step 234: {'lr': 0.00016642857142857144, 'samples': 11232, 'steps': 233, 'loss/train': 5.697052955627441} +07/25/2024 11:26:18 - INFO - __main__ - Step 235: {'lr': 0.00016714285714285716, 'samples': 11280, 'steps': 234, 'loss/train': 5.768013954162598} +07/25/2024 11:26:18 - INFO - __main__ - Step 236: {'lr': 0.00016785714285714285, 'samples': 11328, 'steps': 235, 'loss/train': 5.943960666656494} +07/25/2024 11:26:18 - INFO - __main__ - Step 237: {'lr': 0.00016857142857142857, 'samples': 11376, 'steps': 236, 'loss/train': 7.096799850463867} +07/25/2024 11:26:19 - INFO - __main__ - Step 238: {'lr': 0.0001692857142857143, 'samples': 11424, 'steps': 237, 'loss/train': 7.258213996887207} +07/25/2024 11:26:19 - INFO - __main__ - Step 239: {'lr': 0.00017, 'samples': 11472, 'steps': 238, 'loss/train': 5.474708080291748} +07/25/2024 11:26:19 - INFO - __main__ - Step 240: {'lr': 0.0001707142857142857, 'samples': 11520, 'steps': 239, 'loss/train': 5.929581642150879} +07/25/2024 11:26:19 - INFO - __main__ - Step 241: {'lr': 0.00017142857142857143, 'samples': 11568, 'steps': 240, 'loss/train': 5.396873950958252} +07/25/2024 11:26:20 - INFO - __main__ - Step 242: {'lr': 0.00017214285714285715, 'samples': 11616, 'steps': 241, 'loss/train': 5.90254020690918} +07/25/2024 11:26:20 - INFO - __main__ - Step 243: {'lr': 0.00017285714285714287, 'samples': 11664, 'steps': 242, 'loss/train': 5.579410076141357} +07/25/2024 11:26:20 - INFO - __main__ - Step 244: {'lr': 0.00017357142857142859, 'samples': 11712, 'steps': 243, 'loss/train': 6.5500946044921875} +07/25/2024 11:26:21 - INFO - __main__ - Step 245: {'lr': 0.0001742857142857143, 'samples': 11760, 'steps': 244, 'loss/train': 6.13820219039917} +07/25/2024 11:26:21 - INFO - __main__ - Step 246: {'lr': 0.000175, 'samples': 11808, 'steps': 245, 'loss/train': 5.283195972442627} +07/25/2024 11:26:21 - INFO - __main__ - Step 247: {'lr': 0.00017571428571428572, 'samples': 11856, 'steps': 246, 'loss/train': 5.3597211837768555} +07/25/2024 11:26:21 - INFO - __main__ - Step 248: {'lr': 0.00017642857142857144, 'samples': 11904, 'steps': 247, 'loss/train': 5.715787410736084} +07/25/2024 11:26:22 - INFO - __main__ - Step 249: {'lr': 0.00017714285714285713, 'samples': 11952, 'steps': 248, 'loss/train': 5.988589286804199} +07/25/2024 11:26:22 - INFO - __main__ - Step 250: {'lr': 0.00017785714285714285, 'samples': 12000, 'steps': 249, 'loss/train': 6.131600856781006} +07/25/2024 11:26:22 - INFO - __main__ - Step 251: {'lr': 0.00017857142857142857, 'samples': 12048, 'steps': 250, 'loss/train': 5.627201557159424} +07/25/2024 11:26:23 - INFO - __main__ - Step 252: {'lr': 0.0001792857142857143, 'samples': 12096, 'steps': 251, 'loss/train': 6.002392292022705} +07/25/2024 11:26:23 - INFO - __main__ - Step 253: {'lr': 0.00017999999999999998, 'samples': 12144, 'steps': 252, 'loss/train': 5.872100353240967} +07/25/2024 11:26:23 - INFO - __main__ - Step 254: {'lr': 0.00018071428571428573, 'samples': 12192, 'steps': 253, 'loss/train': 6.0609612464904785} +07/25/2024 11:26:23 - INFO - __main__ - Step 255: {'lr': 0.00018142857142857145, 'samples': 12240, 'steps': 254, 'loss/train': 6.275620460510254} +07/25/2024 11:26:24 - INFO - __main__ - Step 256: {'lr': 0.00018214285714285714, 'samples': 12288, 'steps': 255, 'loss/train': 6.78406286239624} +07/25/2024 11:26:24 - INFO - __main__ - Step 257: {'lr': 0.00018285714285714286, 'samples': 12336, 'steps': 256, 'loss/train': 6.069532871246338} +07/25/2024 11:26:24 - INFO - __main__ - Step 258: {'lr': 0.00018357142857142858, 'samples': 12384, 'steps': 257, 'loss/train': 5.567933559417725} +07/25/2024 11:26:25 - INFO - __main__ - Step 259: {'lr': 0.00018428571428571428, 'samples': 12432, 'steps': 258, 'loss/train': 6.152994632720947} +07/25/2024 11:26:25 - INFO - __main__ - Step 260: {'lr': 0.000185, 'samples': 12480, 'steps': 259, 'loss/train': 5.771788120269775} +07/25/2024 11:26:25 - INFO - __main__ - Step 261: {'lr': 0.00018571428571428572, 'samples': 12528, 'steps': 260, 'loss/train': 5.717995643615723} +07/25/2024 11:26:25 - INFO - __main__ - Step 262: {'lr': 0.0001864285714285714, 'samples': 12576, 'steps': 261, 'loss/train': 5.839302062988281} +07/25/2024 11:26:26 - INFO - __main__ - Step 263: {'lr': 0.00018714285714285713, 'samples': 12624, 'steps': 262, 'loss/train': 5.257016658782959} +07/25/2024 11:26:26 - INFO - __main__ - Step 264: {'lr': 0.00018785714285714288, 'samples': 12672, 'steps': 263, 'loss/train': 6.241714000701904} +07/25/2024 11:26:26 - INFO - __main__ - Step 265: {'lr': 0.0001885714285714286, 'samples': 12720, 'steps': 264, 'loss/train': 6.639944553375244} +07/25/2024 11:26:26 - INFO - __main__ - Step 266: {'lr': 0.0001892857142857143, 'samples': 12768, 'steps': 265, 'loss/train': 5.12101936340332} +07/25/2024 11:26:27 - INFO - __main__ - Step 267: {'lr': 0.00019, 'samples': 12816, 'steps': 266, 'loss/train': 5.190861701965332} +07/25/2024 11:26:27 - INFO - __main__ - Step 268: {'lr': 0.00019071428571428573, 'samples': 12864, 'steps': 267, 'loss/train': 6.486904621124268} +07/25/2024 11:26:27 - INFO - __main__ - Step 269: {'lr': 0.00019142857142857142, 'samples': 12912, 'steps': 268, 'loss/train': 5.638678073883057} +07/25/2024 11:26:28 - INFO - __main__ - Step 270: {'lr': 0.00019214285714285714, 'samples': 12960, 'steps': 269, 'loss/train': 5.088951110839844} +07/25/2024 11:26:28 - INFO - __main__ - Step 271: {'lr': 0.00019285714285714286, 'samples': 13008, 'steps': 270, 'loss/train': 5.137499809265137} +07/25/2024 11:26:28 - INFO - __main__ - Step 272: {'lr': 0.00019357142857142856, 'samples': 13056, 'steps': 271, 'loss/train': 4.604417324066162} +07/25/2024 11:26:28 - INFO - __main__ - Step 273: {'lr': 0.00019428571428571428, 'samples': 13104, 'steps': 272, 'loss/train': 5.781164646148682} +07/25/2024 11:26:29 - INFO - __main__ - Step 274: {'lr': 0.00019500000000000002, 'samples': 13152, 'steps': 273, 'loss/train': 6.4048309326171875} +07/25/2024 11:26:29 - INFO - __main__ - Step 275: {'lr': 0.00019571428571428572, 'samples': 13200, 'steps': 274, 'loss/train': 6.040492057800293} +07/25/2024 11:26:29 - INFO - __main__ - Step 276: {'lr': 0.00019642857142857144, 'samples': 13248, 'steps': 275, 'loss/train': 5.667052745819092} +07/25/2024 11:26:30 - INFO - __main__ - Step 277: {'lr': 0.00019714285714285716, 'samples': 13296, 'steps': 276, 'loss/train': 5.5247483253479} +07/25/2024 11:26:30 - INFO - __main__ - Step 278: {'lr': 0.00019785714285714288, 'samples': 13344, 'steps': 277, 'loss/train': 5.584035396575928} +07/25/2024 11:26:30 - INFO - __main__ - Step 279: {'lr': 0.00019857142857142857, 'samples': 13392, 'steps': 278, 'loss/train': 5.613864898681641} +07/25/2024 11:26:30 - INFO - __main__ - Step 280: {'lr': 0.0001992857142857143, 'samples': 13440, 'steps': 279, 'loss/train': 5.550878524780273} +07/25/2024 11:26:31 - INFO - __main__ - Step 281: {'lr': 0.0002, 'samples': 13488, 'steps': 280, 'loss/train': 6.560573101043701} +07/25/2024 11:26:31 - INFO - __main__ - Step 282: {'lr': 0.0002007142857142857, 'samples': 13536, 'steps': 281, 'loss/train': 5.38557767868042} +07/25/2024 11:26:31 - INFO - __main__ - Step 283: {'lr': 0.00020142857142857142, 'samples': 13584, 'steps': 282, 'loss/train': 6.759729862213135} +07/25/2024 11:26:32 - INFO - __main__ - Step 284: {'lr': 0.00020214285714285714, 'samples': 13632, 'steps': 283, 'loss/train': 6.179801940917969} +07/25/2024 11:26:32 - INFO - __main__ - Step 285: {'lr': 0.00020285714285714286, 'samples': 13680, 'steps': 284, 'loss/train': 5.904941082000732} +07/25/2024 11:26:32 - INFO - __main__ - Step 286: {'lr': 0.00020357142857142858, 'samples': 13728, 'steps': 285, 'loss/train': 5.76945161819458} +07/25/2024 11:26:32 - INFO - __main__ - Step 287: {'lr': 0.0002042857142857143, 'samples': 13776, 'steps': 286, 'loss/train': 8.2332124710083} +07/25/2024 11:26:33 - INFO - __main__ - Step 288: {'lr': 0.000205, 'samples': 13824, 'steps': 287, 'loss/train': 5.863339900970459} +07/25/2024 11:26:33 - INFO - __main__ - Step 289: {'lr': 0.00020571428571428572, 'samples': 13872, 'steps': 288, 'loss/train': 6.213030815124512} +07/25/2024 11:26:33 - INFO - __main__ - Step 290: {'lr': 0.00020642857142857144, 'samples': 13920, 'steps': 289, 'loss/train': 4.734172821044922} +07/25/2024 11:26:34 - INFO - __main__ - Step 291: {'lr': 0.00020714285714285716, 'samples': 13968, 'steps': 290, 'loss/train': 5.674801349639893} +07/25/2024 11:26:34 - INFO - __main__ - Step 292: {'lr': 0.00020785714285714285, 'samples': 14016, 'steps': 291, 'loss/train': 5.784888744354248} +07/25/2024 11:26:34 - INFO - __main__ - Step 293: {'lr': 0.00020857142857142857, 'samples': 14064, 'steps': 292, 'loss/train': 5.5319390296936035} +07/25/2024 11:26:34 - INFO - __main__ - Step 294: {'lr': 0.0002092857142857143, 'samples': 14112, 'steps': 293, 'loss/train': 5.685769557952881} +07/25/2024 11:26:35 - INFO - __main__ - Step 295: {'lr': 0.00021, 'samples': 14160, 'steps': 294, 'loss/train': 5.418774604797363} +07/25/2024 11:26:35 - INFO - __main__ - Step 296: {'lr': 0.00021071428571428573, 'samples': 14208, 'steps': 295, 'loss/train': 4.068847179412842} +07/25/2024 11:26:35 - INFO - __main__ - Step 297: {'lr': 0.00021142857142857145, 'samples': 14256, 'steps': 296, 'loss/train': 5.367792129516602} +07/25/2024 11:26:36 - INFO - __main__ - Step 298: {'lr': 0.00021214285714285714, 'samples': 14304, 'steps': 297, 'loss/train': 5.713776588439941} +07/25/2024 11:26:36 - INFO - __main__ - Step 299: {'lr': 0.00021285714285714286, 'samples': 14352, 'steps': 298, 'loss/train': 5.603511810302734} +07/25/2024 11:26:36 - INFO - __main__ - Step 300: {'lr': 0.00021357142857142858, 'samples': 14400, 'steps': 299, 'loss/train': 6.163950443267822} +07/25/2024 11:26:36 - INFO - __main__ - Step 301: {'lr': 0.00021428571428571427, 'samples': 14448, 'steps': 300, 'loss/train': 5.406757354736328} +07/25/2024 11:26:37 - INFO - __main__ - Step 302: {'lr': 0.000215, 'samples': 14496, 'steps': 301, 'loss/train': 5.90996789932251} +07/25/2024 11:26:37 - INFO - __main__ - Step 303: {'lr': 0.00021571428571428571, 'samples': 14544, 'steps': 302, 'loss/train': 6.092479228973389} +07/25/2024 11:26:37 - INFO - __main__ - Step 304: {'lr': 0.00021642857142857143, 'samples': 14592, 'steps': 303, 'loss/train': 5.216100215911865} +07/25/2024 11:26:38 - INFO - __main__ - Step 305: {'lr': 0.00021714285714285715, 'samples': 14640, 'steps': 304, 'loss/train': 5.621682643890381} +07/25/2024 11:26:38 - INFO - __main__ - Step 306: {'lr': 0.00021785714285714287, 'samples': 14688, 'steps': 305, 'loss/train': 5.823093414306641} +07/25/2024 11:26:38 - INFO - __main__ - Step 307: {'lr': 0.0002185714285714286, 'samples': 14736, 'steps': 306, 'loss/train': 6.228525161743164} +07/25/2024 11:26:38 - INFO - __main__ - Step 308: {'lr': 0.0002192857142857143, 'samples': 14784, 'steps': 307, 'loss/train': 5.9510087966918945} +07/25/2024 11:26:39 - INFO - __main__ - Step 309: {'lr': 0.00022, 'samples': 14832, 'steps': 308, 'loss/train': 5.266091346740723} +07/25/2024 11:26:39 - INFO - __main__ - Step 310: {'lr': 0.00022071428571428573, 'samples': 14880, 'steps': 309, 'loss/train': 5.217267036437988} +07/25/2024 11:26:39 - INFO - __main__ - Step 311: {'lr': 0.00022142857142857142, 'samples': 14928, 'steps': 310, 'loss/train': 7.697060585021973} +07/25/2024 11:26:39 - INFO - __main__ - Step 312: {'lr': 0.00022214285714285714, 'samples': 14976, 'steps': 311, 'loss/train': 5.666650772094727} +07/25/2024 11:26:40 - INFO - __main__ - Step 313: {'lr': 0.00022285714285714286, 'samples': 15024, 'steps': 312, 'loss/train': 6.425085067749023} +07/25/2024 11:26:40 - INFO - __main__ - Step 314: {'lr': 0.00022357142857142855, 'samples': 15072, 'steps': 313, 'loss/train': 4.396389007568359} +07/25/2024 11:26:40 - INFO - __main__ - Step 315: {'lr': 0.0002242857142857143, 'samples': 15120, 'steps': 314, 'loss/train': 5.2941131591796875} +07/25/2024 11:26:41 - INFO - __main__ - Step 316: {'lr': 0.00022500000000000002, 'samples': 15168, 'steps': 315, 'loss/train': 5.752312183380127} +07/25/2024 11:26:41 - INFO - __main__ - Step 317: {'lr': 0.00022571428571428571, 'samples': 15216, 'steps': 316, 'loss/train': 6.089960098266602} +07/25/2024 11:26:41 - INFO - __main__ - Step 318: {'lr': 0.00022642857142857143, 'samples': 15264, 'steps': 317, 'loss/train': 5.828670978546143} +07/25/2024 11:26:41 - INFO - __main__ - Step 319: {'lr': 0.00022714285714285715, 'samples': 15312, 'steps': 318, 'loss/train': 5.34361457824707} +07/25/2024 11:26:42 - INFO - __main__ - Step 320: {'lr': 0.00022785714285714287, 'samples': 15360, 'steps': 319, 'loss/train': 3.9433271884918213} +07/25/2024 11:26:42 - INFO - __main__ - Step 321: {'lr': 0.00022857142857142857, 'samples': 15408, 'steps': 320, 'loss/train': 5.489405632019043} +07/25/2024 11:26:42 - INFO - __main__ - Step 322: {'lr': 0.0002292857142857143, 'samples': 15456, 'steps': 321, 'loss/train': 5.065426826477051} +07/25/2024 11:26:43 - INFO - __main__ - Step 323: {'lr': 0.00023, 'samples': 15504, 'steps': 322, 'loss/train': 4.657402038574219} +07/25/2024 11:26:43 - INFO - __main__ - Step 324: {'lr': 0.0002307142857142857, 'samples': 15552, 'steps': 323, 'loss/train': 6.042489528656006} +07/25/2024 11:26:43 - INFO - __main__ - Step 325: {'lr': 0.00023142857142857142, 'samples': 15600, 'steps': 324, 'loss/train': 5.562082290649414} +07/25/2024 11:26:43 - INFO - __main__ - Step 326: {'lr': 0.00023214285714285717, 'samples': 15648, 'steps': 325, 'loss/train': 5.726541519165039} +07/25/2024 11:26:44 - INFO - __main__ - Step 327: {'lr': 0.00023285714285714286, 'samples': 15696, 'steps': 326, 'loss/train': 5.573945045471191} +07/25/2024 11:26:44 - INFO - __main__ - Step 328: {'lr': 0.00023357142857142858, 'samples': 15744, 'steps': 327, 'loss/train': 6.105917930603027} +07/25/2024 11:26:44 - INFO - __main__ - Step 329: {'lr': 0.0002342857142857143, 'samples': 15792, 'steps': 328, 'loss/train': 5.546865463256836} +07/25/2024 11:26:45 - INFO - __main__ - Step 330: {'lr': 0.000235, 'samples': 15840, 'steps': 329, 'loss/train': 5.543821334838867} +07/25/2024 11:26:45 - INFO - __main__ - Step 331: {'lr': 0.0002357142857142857, 'samples': 15888, 'steps': 330, 'loss/train': 5.6774582862854} +07/25/2024 11:26:45 - INFO - __main__ - Step 332: {'lr': 0.00023642857142857143, 'samples': 15936, 'steps': 331, 'loss/train': 5.767722129821777} +07/25/2024 11:26:45 - INFO - __main__ - Step 333: {'lr': 0.00023714285714285715, 'samples': 15984, 'steps': 332, 'loss/train': 5.70899772644043} +07/25/2024 11:26:46 - INFO - __main__ - Step 334: {'lr': 0.00023785714285714285, 'samples': 16032, 'steps': 333, 'loss/train': 5.67036247253418} +07/25/2024 11:26:46 - INFO - __main__ - Step 335: {'lr': 0.00023857142857142857, 'samples': 16080, 'steps': 334, 'loss/train': 5.325812339782715} +07/25/2024 11:26:46 - INFO - __main__ - Step 336: {'lr': 0.0002392857142857143, 'samples': 16128, 'steps': 335, 'loss/train': 5.349172592163086} +07/25/2024 11:26:47 - INFO - __main__ - Step 337: {'lr': 0.00024, 'samples': 16176, 'steps': 336, 'loss/train': 5.448930263519287} +07/25/2024 11:26:47 - INFO - __main__ - Step 338: {'lr': 0.00024071428571428573, 'samples': 16224, 'steps': 337, 'loss/train': 3.7934205532073975} +07/25/2024 11:26:47 - INFO - __main__ - Step 339: {'lr': 0.00024142857142857145, 'samples': 16272, 'steps': 338, 'loss/train': 5.1056013107299805} +07/25/2024 11:26:47 - INFO - __main__ - Step 340: {'lr': 0.00024214285714285714, 'samples': 16320, 'steps': 339, 'loss/train': 5.9682464599609375} +07/25/2024 11:26:48 - INFO - __main__ - Step 341: {'lr': 0.00024285714285714286, 'samples': 16368, 'steps': 340, 'loss/train': 5.546884536743164} +07/25/2024 11:26:48 - INFO - __main__ - Step 342: {'lr': 0.00024357142857142858, 'samples': 16416, 'steps': 341, 'loss/train': 6.586970329284668} +07/25/2024 11:26:48 - INFO - __main__ - Step 343: {'lr': 0.0002442857142857143, 'samples': 16464, 'steps': 342, 'loss/train': 5.654937744140625} +07/25/2024 11:26:49 - INFO - __main__ - Step 344: {'lr': 0.000245, 'samples': 16512, 'steps': 343, 'loss/train': 3.9033658504486084} +07/25/2024 11:26:49 - INFO - __main__ - Step 345: {'lr': 0.00024571428571428574, 'samples': 16560, 'steps': 344, 'loss/train': 6.266292095184326} +07/25/2024 11:26:49 - INFO - __main__ - Step 346: {'lr': 0.00024642857142857143, 'samples': 16608, 'steps': 345, 'loss/train': 5.5901007652282715} +07/25/2024 11:26:49 - INFO - __main__ - Step 347: {'lr': 0.0002471428571428571, 'samples': 16656, 'steps': 346, 'loss/train': 5.836148738861084} +07/25/2024 11:26:50 - INFO - __main__ - Step 348: {'lr': 0.00024785714285714287, 'samples': 16704, 'steps': 347, 'loss/train': 5.447431564331055} +07/25/2024 11:26:50 - INFO - __main__ - Step 349: {'lr': 0.00024857142857142857, 'samples': 16752, 'steps': 348, 'loss/train': 5.124023914337158} +07/25/2024 11:26:50 - INFO - __main__ - Step 350: {'lr': 0.00024928571428571426, 'samples': 16800, 'steps': 349, 'loss/train': 5.541380405426025} +07/25/2024 11:26:51 - INFO - __main__ - Step 351: {'lr': 0.00025, 'samples': 16848, 'steps': 350, 'loss/train': 5.657571792602539} +07/25/2024 11:26:51 - INFO - __main__ - Step 352: {'lr': 0.00025071428571428575, 'samples': 16896, 'steps': 351, 'loss/train': 5.562744617462158} +07/25/2024 11:26:51 - INFO - __main__ - Step 353: {'lr': 0.00025142857142857145, 'samples': 16944, 'steps': 352, 'loss/train': 5.914762496948242} +07/25/2024 11:26:51 - INFO - __main__ - Step 354: {'lr': 0.0002521428571428572, 'samples': 16992, 'steps': 353, 'loss/train': 5.341904640197754} +07/25/2024 11:26:52 - INFO - __main__ - Step 355: {'lr': 0.00025285714285714283, 'samples': 17040, 'steps': 354, 'loss/train': 5.9697489738464355} +07/25/2024 11:26:52 - INFO - __main__ - Step 356: {'lr': 0.0002535714285714286, 'samples': 17088, 'steps': 355, 'loss/train': 5.630176544189453} +07/25/2024 11:26:52 - INFO - __main__ - Step 357: {'lr': 0.00025428571428571427, 'samples': 17136, 'steps': 356, 'loss/train': 5.371885299682617} +07/25/2024 11:26:53 - INFO - __main__ - Step 358: {'lr': 0.000255, 'samples': 17184, 'steps': 357, 'loss/train': 5.519773006439209} +07/25/2024 11:26:53 - INFO - __main__ - Step 359: {'lr': 0.0002557142857142857, 'samples': 17232, 'steps': 358, 'loss/train': 5.623300075531006} +07/25/2024 11:26:53 - INFO - __main__ - Step 360: {'lr': 0.00025642857142857146, 'samples': 17280, 'steps': 359, 'loss/train': 5.866531848907471} +07/25/2024 11:26:53 - INFO - __main__ - Step 361: {'lr': 0.0002571428571428571, 'samples': 17328, 'steps': 360, 'loss/train': 5.218587875366211} +07/25/2024 11:26:54 - INFO - __main__ - Step 362: {'lr': 0.00025785714285714284, 'samples': 17376, 'steps': 361, 'loss/train': 4.218661785125732} +07/25/2024 11:26:54 - INFO - __main__ - Step 363: {'lr': 0.0002585714285714286, 'samples': 17424, 'steps': 362, 'loss/train': 6.053807258605957} +07/25/2024 11:26:54 - INFO - __main__ - Step 364: {'lr': 0.0002592857142857143, 'samples': 17472, 'steps': 363, 'loss/train': 5.527558326721191} +07/25/2024 11:26:54 - INFO - __main__ - Step 365: {'lr': 0.00026000000000000003, 'samples': 17520, 'steps': 364, 'loss/train': 5.967709541320801} +07/25/2024 11:26:55 - INFO - __main__ - Step 366: {'lr': 0.0002607142857142857, 'samples': 17568, 'steps': 365, 'loss/train': 5.558084011077881} +07/25/2024 11:26:55 - INFO - __main__ - Step 367: {'lr': 0.00026142857142857147, 'samples': 17616, 'steps': 366, 'loss/train': 6.144757270812988} +07/25/2024 11:26:55 - INFO - __main__ - Step 368: {'lr': 0.0002621428571428571, 'samples': 17664, 'steps': 367, 'loss/train': 3.5946810245513916} +07/25/2024 11:26:56 - INFO - __main__ - Step 369: {'lr': 0.00026285714285714286, 'samples': 17712, 'steps': 368, 'loss/train': 5.867280960083008} +07/25/2024 11:26:56 - INFO - __main__ - Step 370: {'lr': 0.00026357142857142855, 'samples': 17760, 'steps': 369, 'loss/train': 5.74919319152832} +07/25/2024 11:26:56 - INFO - __main__ - Step 371: {'lr': 0.0002642857142857143, 'samples': 17808, 'steps': 370, 'loss/train': 5.880192279815674} +07/25/2024 11:26:56 - INFO - __main__ - Step 372: {'lr': 0.00026500000000000004, 'samples': 17856, 'steps': 371, 'loss/train': 5.501766204833984} +07/25/2024 11:26:57 - INFO - __main__ - Step 373: {'lr': 0.00026571428571428574, 'samples': 17904, 'steps': 372, 'loss/train': 5.399500370025635} +07/25/2024 11:26:57 - INFO - __main__ - Step 374: {'lr': 0.00026642857142857143, 'samples': 17952, 'steps': 373, 'loss/train': 5.503784656524658} +07/25/2024 11:26:57 - INFO - __main__ - Step 375: {'lr': 0.0002671428571428571, 'samples': 18000, 'steps': 374, 'loss/train': 6.608977794647217} +07/25/2024 11:26:58 - INFO - __main__ - Step 376: {'lr': 0.00026785714285714287, 'samples': 18048, 'steps': 375, 'loss/train': 5.688192844390869} +07/25/2024 11:26:58 - INFO - __main__ - Step 377: {'lr': 0.00026857142857142856, 'samples': 18096, 'steps': 376, 'loss/train': 5.282657146453857} +07/25/2024 11:26:58 - INFO - __main__ - Step 378: {'lr': 0.0002692857142857143, 'samples': 18144, 'steps': 377, 'loss/train': 6.211413383483887} +07/25/2024 11:26:58 - INFO - __main__ - Step 379: {'lr': 0.00027, 'samples': 18192, 'steps': 378, 'loss/train': 5.477629661560059} +07/25/2024 11:26:59 - INFO - __main__ - Step 380: {'lr': 0.00027071428571428575, 'samples': 18240, 'steps': 379, 'loss/train': 5.2642130851745605} +07/25/2024 11:26:59 - INFO - __main__ - Step 381: {'lr': 0.0002714285714285714, 'samples': 18288, 'steps': 380, 'loss/train': 6.003504753112793} +07/25/2024 11:26:59 - INFO - __main__ - Step 382: {'lr': 0.00027214285714285714, 'samples': 18336, 'steps': 381, 'loss/train': 5.655426979064941} +07/25/2024 11:27:00 - INFO - __main__ - Step 383: {'lr': 0.0002728571428571429, 'samples': 18384, 'steps': 382, 'loss/train': 5.520073413848877} +07/25/2024 11:27:00 - INFO - __main__ - Step 384: {'lr': 0.0002735714285714286, 'samples': 18432, 'steps': 383, 'loss/train': 5.679370403289795} +07/25/2024 11:27:00 - INFO - __main__ - Step 385: {'lr': 0.0002742857142857143, 'samples': 18480, 'steps': 384, 'loss/train': 5.116621017456055} +07/25/2024 11:27:00 - INFO - __main__ - Step 386: {'lr': 0.000275, 'samples': 18528, 'steps': 385, 'loss/train': 4.000160217285156} +07/25/2024 11:27:01 - INFO - __main__ - Step 387: {'lr': 0.0002757142857142857, 'samples': 18576, 'steps': 386, 'loss/train': 5.870525360107422} +07/25/2024 11:27:01 - INFO - __main__ - Step 388: {'lr': 0.0002764285714285714, 'samples': 18624, 'steps': 387, 'loss/train': 5.535452365875244} +07/25/2024 11:27:01 - INFO - __main__ - Step 389: {'lr': 0.00027714285714285715, 'samples': 18672, 'steps': 388, 'loss/train': 5.792614459991455} +07/25/2024 11:27:02 - INFO - __main__ - Step 390: {'lr': 0.00027785714285714284, 'samples': 18720, 'steps': 389, 'loss/train': 5.261795520782471} +07/25/2024 11:27:02 - INFO - __main__ - Step 391: {'lr': 0.0002785714285714286, 'samples': 18768, 'steps': 390, 'loss/train': 5.848176956176758} +07/25/2024 11:27:02 - INFO - __main__ - Step 392: {'lr': 0.0002792857142857143, 'samples': 18816, 'steps': 391, 'loss/train': 3.0531749725341797} +07/25/2024 11:27:02 - INFO - __main__ - Step 393: {'lr': 0.00028000000000000003, 'samples': 18864, 'steps': 392, 'loss/train': 5.2248663902282715} +07/25/2024 11:27:03 - INFO - __main__ - Step 394: {'lr': 0.0002807142857142857, 'samples': 18912, 'steps': 393, 'loss/train': 5.764453411102295} +07/25/2024 11:27:03 - INFO - __main__ - Step 395: {'lr': 0.0002814285714285714, 'samples': 18960, 'steps': 394, 'loss/train': 5.34336519241333} +07/25/2024 11:27:03 - INFO - __main__ - Step 396: {'lr': 0.00028214285714285716, 'samples': 19008, 'steps': 395, 'loss/train': 5.060535907745361} +07/25/2024 11:27:04 - INFO - __main__ - Step 397: {'lr': 0.00028285714285714286, 'samples': 19056, 'steps': 396, 'loss/train': 5.15131139755249} +07/25/2024 11:27:04 - INFO - __main__ - Step 398: {'lr': 0.0002835714285714286, 'samples': 19104, 'steps': 397, 'loss/train': 5.546749114990234} +07/25/2024 11:27:04 - INFO - __main__ - Step 399: {'lr': 0.0002842857142857143, 'samples': 19152, 'steps': 398, 'loss/train': 6.071304798126221} +07/25/2024 11:27:04 - INFO - __main__ - Step 400: {'lr': 0.000285, 'samples': 19200, 'steps': 399, 'loss/train': 5.347541809082031} +07/25/2024 11:27:05 - INFO - __main__ - Step 401: {'lr': 0.0002857142857142857, 'samples': 19248, 'steps': 400, 'loss/train': 5.706829071044922} +07/25/2024 11:27:05 - INFO - __main__ - Step 402: {'lr': 0.00028642857142857143, 'samples': 19296, 'steps': 401, 'loss/train': 5.5088324546813965} +07/25/2024 11:27:05 - INFO - __main__ - Step 403: {'lr': 0.0002871428571428572, 'samples': 19344, 'steps': 402, 'loss/train': 5.262076377868652} +07/25/2024 11:27:06 - INFO - __main__ - Step 404: {'lr': 0.00028785714285714287, 'samples': 19392, 'steps': 403, 'loss/train': 5.1472601890563965} +07/25/2024 11:27:06 - INFO - __main__ - Step 405: {'lr': 0.0002885714285714286, 'samples': 19440, 'steps': 404, 'loss/train': 5.470161437988281} +07/25/2024 11:27:06 - INFO - __main__ - Step 406: {'lr': 0.0002892857142857143, 'samples': 19488, 'steps': 405, 'loss/train': 5.678465366363525} +07/25/2024 11:27:06 - INFO - __main__ - Step 407: {'lr': 0.00029, 'samples': 19536, 'steps': 406, 'loss/train': 5.275732517242432} +07/25/2024 11:27:07 - INFO - __main__ - Step 408: {'lr': 0.0002907142857142857, 'samples': 19584, 'steps': 407, 'loss/train': 5.481686115264893} +07/25/2024 11:27:07 - INFO - __main__ - Step 409: {'lr': 0.00029142857142857144, 'samples': 19632, 'steps': 408, 'loss/train': 5.748928070068359} +07/25/2024 11:27:07 - INFO - __main__ - Step 410: {'lr': 0.00029214285714285713, 'samples': 19680, 'steps': 409, 'loss/train': 2.8533151149749756} +07/25/2024 11:27:08 - INFO - __main__ - Step 411: {'lr': 0.0002928571428571429, 'samples': 19728, 'steps': 410, 'loss/train': 7.07308292388916} +07/25/2024 11:27:08 - INFO - __main__ - Step 412: {'lr': 0.0002935714285714286, 'samples': 19776, 'steps': 411, 'loss/train': 5.445854663848877} +07/25/2024 11:27:08 - INFO - __main__ - Step 413: {'lr': 0.00029428571428571427, 'samples': 19824, 'steps': 412, 'loss/train': 5.735104084014893} +07/25/2024 11:27:08 - INFO - __main__ - Step 414: {'lr': 0.000295, 'samples': 19872, 'steps': 413, 'loss/train': 4.672630310058594} +07/25/2024 11:27:09 - INFO - __main__ - Step 415: {'lr': 0.0002957142857142857, 'samples': 19920, 'steps': 414, 'loss/train': 5.474041938781738} +07/25/2024 11:27:09 - INFO - __main__ - Step 416: {'lr': 0.00029642857142857145, 'samples': 19968, 'steps': 415, 'loss/train': 5.160914897918701} +07/25/2024 11:27:09 - INFO - __main__ - Step 417: {'lr': 0.00029714285714285715, 'samples': 20016, 'steps': 416, 'loss/train': 5.305094242095947} +07/25/2024 11:27:10 - INFO - __main__ - Step 418: {'lr': 0.0002978571428571429, 'samples': 20064, 'steps': 417, 'loss/train': 5.954584121704102} +07/25/2024 11:27:10 - INFO - __main__ - Step 419: {'lr': 0.0002985714285714286, 'samples': 20112, 'steps': 418, 'loss/train': 5.847262859344482} +07/25/2024 11:27:10 - INFO - __main__ - Step 420: {'lr': 0.0002992857142857143, 'samples': 20160, 'steps': 419, 'loss/train': 5.648597717285156} +07/25/2024 11:27:10 - INFO - __main__ - Step 421: {'lr': 0.0003, 'samples': 20208, 'steps': 420, 'loss/train': 5.399572849273682} +07/25/2024 11:27:11 - INFO - __main__ - Step 422: {'lr': 0.0003007142857142857, 'samples': 20256, 'steps': 421, 'loss/train': 5.556665420532227} +07/25/2024 11:27:11 - INFO - __main__ - Step 423: {'lr': 0.0003014285714285714, 'samples': 20304, 'steps': 422, 'loss/train': 5.368066787719727} +07/25/2024 11:27:11 - INFO - __main__ - Step 424: {'lr': 0.00030214285714285716, 'samples': 20352, 'steps': 423, 'loss/train': 5.138967037200928} +07/25/2024 11:27:12 - INFO - __main__ - Step 425: {'lr': 0.0003028571428571429, 'samples': 20400, 'steps': 424, 'loss/train': 5.3975043296813965} +07/25/2024 11:27:12 - INFO - __main__ - Step 426: {'lr': 0.00030357142857142855, 'samples': 20448, 'steps': 425, 'loss/train': 6.053587913513184} +07/25/2024 11:27:12 - INFO - __main__ - Step 427: {'lr': 0.0003042857142857143, 'samples': 20496, 'steps': 426, 'loss/train': 5.553687572479248} +07/25/2024 11:27:12 - INFO - __main__ - Step 428: {'lr': 0.000305, 'samples': 20544, 'steps': 427, 'loss/train': 4.6215925216674805} +07/25/2024 11:27:13 - INFO - __main__ - Step 429: {'lr': 0.00030571428571428573, 'samples': 20592, 'steps': 428, 'loss/train': 5.239320278167725} +07/25/2024 11:27:13 - INFO - __main__ - Step 430: {'lr': 0.0003064285714285714, 'samples': 20640, 'steps': 429, 'loss/train': 5.218222618103027} +07/25/2024 11:27:13 - INFO - __main__ - Step 431: {'lr': 0.0003071428571428572, 'samples': 20688, 'steps': 430, 'loss/train': 5.4917426109313965} +07/25/2024 11:27:14 - INFO - __main__ - Step 432: {'lr': 0.00030785714285714287, 'samples': 20736, 'steps': 431, 'loss/train': 5.366219520568848} +07/25/2024 11:27:14 - INFO - __main__ - Step 433: {'lr': 0.00030857142857142856, 'samples': 20784, 'steps': 432, 'loss/train': 5.672132968902588} +07/25/2024 11:27:14 - INFO - __main__ - Step 434: {'lr': 0.0003092857142857143, 'samples': 20832, 'steps': 433, 'loss/train': 2.7663679122924805} +07/25/2024 11:27:14 - INFO - __main__ - Step 435: {'lr': 0.00031, 'samples': 20880, 'steps': 434, 'loss/train': 5.114584445953369} +07/25/2024 11:27:15 - INFO - __main__ - Step 436: {'lr': 0.00031071428571428575, 'samples': 20928, 'steps': 435, 'loss/train': 4.906188011169434} +07/25/2024 11:27:15 - INFO - __main__ - Step 437: {'lr': 0.00031142857142857144, 'samples': 20976, 'steps': 436, 'loss/train': 5.617345333099365} +07/25/2024 11:27:15 - INFO - __main__ - Step 438: {'lr': 0.0003121428571428572, 'samples': 21024, 'steps': 437, 'loss/train': 5.3168134689331055} +07/25/2024 11:27:16 - INFO - __main__ - Step 439: {'lr': 0.0003128571428571428, 'samples': 21072, 'steps': 438, 'loss/train': 5.9275102615356445} +07/25/2024 11:27:16 - INFO - __main__ - Step 440: {'lr': 0.0003135714285714286, 'samples': 21120, 'steps': 439, 'loss/train': 5.676148891448975} +07/25/2024 11:27:16 - INFO - __main__ - Step 441: {'lr': 0.00031428571428571427, 'samples': 21168, 'steps': 440, 'loss/train': 5.500940799713135} +07/25/2024 11:27:16 - INFO - __main__ - Step 442: {'lr': 0.000315, 'samples': 21216, 'steps': 441, 'loss/train': 5.5004777908325195} +07/25/2024 11:27:17 - INFO - __main__ - Step 443: {'lr': 0.0003157142857142857, 'samples': 21264, 'steps': 442, 'loss/train': 5.778385639190674} +07/25/2024 11:27:17 - INFO - __main__ - Step 444: {'lr': 0.00031642857142857145, 'samples': 21312, 'steps': 443, 'loss/train': 5.602491855621338} +07/25/2024 11:27:17 - INFO - __main__ - Step 445: {'lr': 0.00031714285714285715, 'samples': 21360, 'steps': 444, 'loss/train': 5.408997535705566} +07/25/2024 11:27:17 - INFO - __main__ - Step 446: {'lr': 0.00031785714285714284, 'samples': 21408, 'steps': 445, 'loss/train': 5.704083442687988} +07/25/2024 11:27:18 - INFO - __main__ - Step 447: {'lr': 0.0003185714285714286, 'samples': 21456, 'steps': 446, 'loss/train': 5.953718185424805} +07/25/2024 11:27:18 - INFO - __main__ - Step 448: {'lr': 0.0003192857142857143, 'samples': 21504, 'steps': 447, 'loss/train': 5.258885383605957} +07/25/2024 11:27:18 - INFO - __main__ - Step 449: {'lr': 0.00032, 'samples': 21552, 'steps': 448, 'loss/train': 5.1789093017578125} +07/25/2024 11:27:19 - INFO - __main__ - Step 450: {'lr': 0.0003207142857142857, 'samples': 21600, 'steps': 449, 'loss/train': 5.688394546508789} +07/25/2024 11:27:19 - INFO - __main__ - Step 451: {'lr': 0.00032142857142857147, 'samples': 21648, 'steps': 450, 'loss/train': 5.387409210205078} +07/25/2024 11:27:19 - INFO - __main__ - Step 452: {'lr': 0.0003221428571428571, 'samples': 21696, 'steps': 451, 'loss/train': 4.667904376983643} +07/25/2024 11:27:19 - INFO - __main__ - Step 453: {'lr': 0.00032285714285714285, 'samples': 21744, 'steps': 452, 'loss/train': 5.178496837615967} +07/25/2024 11:27:20 - INFO - __main__ - Step 454: {'lr': 0.0003235714285714286, 'samples': 21792, 'steps': 453, 'loss/train': 5.717849254608154} +07/25/2024 11:27:20 - INFO - __main__ - Step 455: {'lr': 0.0003242857142857143, 'samples': 21840, 'steps': 454, 'loss/train': 4.716970443725586} +07/25/2024 11:27:20 - INFO - __main__ - Step 456: {'lr': 0.00032500000000000004, 'samples': 21888, 'steps': 455, 'loss/train': 5.360010147094727} +07/25/2024 11:27:21 - INFO - __main__ - Step 457: {'lr': 0.00032571428571428573, 'samples': 21936, 'steps': 456, 'loss/train': 5.3450493812561035} +07/25/2024 11:27:21 - INFO - __main__ - Step 458: {'lr': 0.0003264285714285714, 'samples': 21984, 'steps': 457, 'loss/train': 2.28220534324646} +07/25/2024 11:27:21 - INFO - __main__ - Step 459: {'lr': 0.0003271428571428571, 'samples': 22032, 'steps': 458, 'loss/train': 5.2693257331848145} +07/25/2024 11:27:21 - INFO - __main__ - Step 460: {'lr': 0.00032785714285714287, 'samples': 22080, 'steps': 459, 'loss/train': 4.768644332885742} +07/25/2024 11:27:22 - INFO - __main__ - Step 461: {'lr': 0.00032857142857142856, 'samples': 22128, 'steps': 460, 'loss/train': 5.411599636077881} +07/25/2024 11:27:22 - INFO - __main__ - Step 462: {'lr': 0.0003292857142857143, 'samples': 22176, 'steps': 461, 'loss/train': 5.458674907684326} +07/25/2024 11:27:22 - INFO - __main__ - Step 463: {'lr': 0.00033, 'samples': 22224, 'steps': 462, 'loss/train': 6.111447811126709} +07/25/2024 11:27:23 - INFO - __main__ - Step 464: {'lr': 0.00033071428571428575, 'samples': 22272, 'steps': 463, 'loss/train': 5.433444023132324} +07/25/2024 11:27:23 - INFO - __main__ - Step 465: {'lr': 0.00033142857142857144, 'samples': 22320, 'steps': 464, 'loss/train': 6.93735408782959} +07/25/2024 11:27:23 - INFO - __main__ - Step 466: {'lr': 0.00033214285714285713, 'samples': 22368, 'steps': 465, 'loss/train': 5.5222907066345215} +07/25/2024 11:27:23 - INFO - __main__ - Step 467: {'lr': 0.0003328571428571429, 'samples': 22416, 'steps': 466, 'loss/train': 5.385127544403076} +07/25/2024 11:27:24 - INFO - __main__ - Step 468: {'lr': 0.00033357142857142857, 'samples': 22464, 'steps': 467, 'loss/train': 5.212122917175293} +07/25/2024 11:27:24 - INFO - __main__ - Step 469: {'lr': 0.0003342857142857143, 'samples': 22512, 'steps': 468, 'loss/train': 5.643209934234619} +07/25/2024 11:27:24 - INFO - __main__ - Step 470: {'lr': 0.000335, 'samples': 22560, 'steps': 469, 'loss/train': 4.983966827392578} +07/25/2024 11:27:25 - INFO - __main__ - Step 471: {'lr': 0.0003357142857142857, 'samples': 22608, 'steps': 470, 'loss/train': 5.317984104156494} +07/25/2024 11:27:25 - INFO - __main__ - Step 472: {'lr': 0.0003364285714285714, 'samples': 22656, 'steps': 471, 'loss/train': 5.302931785583496} +07/25/2024 11:27:25 - INFO - __main__ - Step 473: {'lr': 0.00033714285714285714, 'samples': 22704, 'steps': 472, 'loss/train': 5.0862226486206055} +07/25/2024 11:27:25 - INFO - __main__ - Step 474: {'lr': 0.00033785714285714284, 'samples': 22752, 'steps': 473, 'loss/train': 5.249210834503174} +07/25/2024 11:27:26 - INFO - __main__ - Step 475: {'lr': 0.0003385714285714286, 'samples': 22800, 'steps': 474, 'loss/train': 5.417367935180664} +07/25/2024 11:27:26 - INFO - __main__ - Step 476: {'lr': 0.00033928571428571433, 'samples': 22848, 'steps': 475, 'loss/train': 5.054484844207764} +07/25/2024 11:27:26 - INFO - __main__ - Step 477: {'lr': 0.00034, 'samples': 22896, 'steps': 476, 'loss/train': 5.206552982330322} +07/25/2024 11:27:27 - INFO - __main__ - Step 478: {'lr': 0.0003407142857142857, 'samples': 22944, 'steps': 477, 'loss/train': 5.282764434814453} +07/25/2024 11:27:27 - INFO - __main__ - Step 479: {'lr': 0.0003414285714285714, 'samples': 22992, 'steps': 478, 'loss/train': 5.2550129890441895} +07/25/2024 11:27:27 - INFO - __main__ - Step 480: {'lr': 0.00034214285714285716, 'samples': 23040, 'steps': 479, 'loss/train': 5.912631034851074} +07/25/2024 11:27:27 - INFO - __main__ - Step 481: {'lr': 0.00034285714285714285, 'samples': 23088, 'steps': 480, 'loss/train': 5.7085418701171875} +07/25/2024 11:27:28 - INFO - __main__ - Step 482: {'lr': 0.0003435714285714286, 'samples': 23136, 'steps': 481, 'loss/train': 2.3058838844299316} +07/25/2024 11:27:28 - INFO - __main__ - Step 483: {'lr': 0.0003442857142857143, 'samples': 23184, 'steps': 482, 'loss/train': 5.772623538970947} +07/25/2024 11:27:28 - INFO - __main__ - Step 484: {'lr': 0.000345, 'samples': 23232, 'steps': 483, 'loss/train': 4.465800762176514} +07/25/2024 11:27:29 - INFO - __main__ - Step 485: {'lr': 0.00034571428571428573, 'samples': 23280, 'steps': 484, 'loss/train': 6.094921588897705} +07/25/2024 11:27:29 - INFO - __main__ - Step 486: {'lr': 0.0003464285714285714, 'samples': 23328, 'steps': 485, 'loss/train': 5.6171488761901855} +07/25/2024 11:27:29 - INFO - __main__ - Step 487: {'lr': 0.00034714285714285717, 'samples': 23376, 'steps': 486, 'loss/train': 5.921747207641602} +07/25/2024 11:27:29 - INFO - __main__ - Step 488: {'lr': 0.00034785714285714286, 'samples': 23424, 'steps': 487, 'loss/train': 4.982382774353027} +07/25/2024 11:27:30 - INFO - __main__ - Step 489: {'lr': 0.0003485714285714286, 'samples': 23472, 'steps': 488, 'loss/train': 6.612940788269043} +07/25/2024 11:27:30 - INFO - __main__ - Step 490: {'lr': 0.0003492857142857143, 'samples': 23520, 'steps': 489, 'loss/train': 5.756211757659912} +07/25/2024 11:27:30 - INFO - __main__ - Step 491: {'lr': 0.00035, 'samples': 23568, 'steps': 490, 'loss/train': 5.011313438415527} +07/25/2024 11:27:31 - INFO - __main__ - Step 492: {'lr': 0.0003507142857142857, 'samples': 23616, 'steps': 491, 'loss/train': 5.1694746017456055} +07/25/2024 11:27:31 - INFO - __main__ - Step 493: {'lr': 0.00035142857142857144, 'samples': 23664, 'steps': 492, 'loss/train': 5.279468059539795} +07/25/2024 11:27:31 - INFO - __main__ - Step 494: {'lr': 0.00035214285714285713, 'samples': 23712, 'steps': 493, 'loss/train': 4.962800025939941} +07/25/2024 11:27:31 - INFO - __main__ - Step 495: {'lr': 0.0003528571428571429, 'samples': 23760, 'steps': 494, 'loss/train': 4.980832576751709} +07/25/2024 11:27:32 - INFO - __main__ - Step 496: {'lr': 0.0003535714285714286, 'samples': 23808, 'steps': 495, 'loss/train': 5.078948497772217} +07/25/2024 11:27:32 - INFO - __main__ - Step 497: {'lr': 0.00035428571428571426, 'samples': 23856, 'steps': 496, 'loss/train': 5.392380237579346} +07/25/2024 11:27:32 - INFO - __main__ - Step 498: {'lr': 0.000355, 'samples': 23904, 'steps': 497, 'loss/train': 5.158860683441162} +07/25/2024 11:27:33 - INFO - __main__ - Step 499: {'lr': 0.0003557142857142857, 'samples': 23952, 'steps': 498, 'loss/train': 5.5884175300598145} +07/25/2024 11:27:33 - INFO - __main__ - Step 500: {'lr': 0.00035642857142857145, 'samples': 24000, 'steps': 499, 'loss/train': 5.21852445602417} +07/25/2024 11:27:33 - INFO - __main__ - Step 501: {'lr': 0.00035714285714285714, 'samples': 24048, 'steps': 500, 'loss/train': 5.358186721801758} +07/25/2024 11:27:33 - INFO - __main__ - Step 502: {'lr': 0.0003578571428571429, 'samples': 24096, 'steps': 501, 'loss/train': 4.848573207855225} +07/25/2024 11:27:34 - INFO - __main__ - Step 503: {'lr': 0.0003585714285714286, 'samples': 24144, 'steps': 502, 'loss/train': 5.477226257324219} +07/25/2024 11:27:34 - INFO - __main__ - Step 504: {'lr': 0.0003592857142857143, 'samples': 24192, 'steps': 503, 'loss/train': 5.143960475921631} +07/25/2024 11:27:34 - INFO - __main__ - Step 505: {'lr': 0.00035999999999999997, 'samples': 24240, 'steps': 504, 'loss/train': 4.908770561218262} +07/25/2024 11:27:35 - INFO - __main__ - Step 506: {'lr': 0.0003607142857142857, 'samples': 24288, 'steps': 505, 'loss/train': 5.084807872772217} +07/25/2024 11:27:35 - INFO - __main__ - Step 507: {'lr': 0.00036142857142857146, 'samples': 24336, 'steps': 506, 'loss/train': 4.740333557128906} +07/25/2024 11:27:35 - INFO - __main__ - Step 508: {'lr': 0.00036214285714285716, 'samples': 24384, 'steps': 507, 'loss/train': 4.465239524841309} +07/25/2024 11:27:35 - INFO - __main__ - Step 509: {'lr': 0.0003628571428571429, 'samples': 24432, 'steps': 508, 'loss/train': 6.494688510894775} +07/25/2024 11:27:36 - INFO - __main__ - Step 510: {'lr': 0.00036357142857142854, 'samples': 24480, 'steps': 509, 'loss/train': 5.464983940124512} +07/25/2024 11:27:36 - INFO - __main__ - Step 511: {'lr': 0.0003642857142857143, 'samples': 24528, 'steps': 510, 'loss/train': 6.328172206878662} +07/25/2024 11:27:36 - INFO - __main__ - Step 512: {'lr': 0.000365, 'samples': 24576, 'steps': 511, 'loss/train': 4.660853862762451} +07/25/2024 11:27:37 - INFO - __main__ - Step 513: {'lr': 0.00036571428571428573, 'samples': 24624, 'steps': 512, 'loss/train': 5.857969760894775} +07/25/2024 11:27:37 - INFO - __main__ - Step 514: {'lr': 0.0003664285714285714, 'samples': 24672, 'steps': 513, 'loss/train': 7.343400001525879} +07/25/2024 11:27:37 - INFO - __main__ - Step 515: {'lr': 0.00036714285714285717, 'samples': 24720, 'steps': 514, 'loss/train': 5.840303421020508} +07/25/2024 11:27:37 - INFO - __main__ - Step 516: {'lr': 0.0003678571428571429, 'samples': 24768, 'steps': 515, 'loss/train': 5.322987079620361} +07/25/2024 11:27:38 - INFO - __main__ - Step 517: {'lr': 0.00036857142857142855, 'samples': 24816, 'steps': 516, 'loss/train': 5.1095170974731445} +07/25/2024 11:27:38 - INFO - __main__ - Step 518: {'lr': 0.0003692857142857143, 'samples': 24864, 'steps': 517, 'loss/train': 5.1532883644104} +07/25/2024 11:27:38 - INFO - __main__ - Step 519: {'lr': 0.00037, 'samples': 24912, 'steps': 518, 'loss/train': 5.40431547164917} +07/25/2024 11:27:39 - INFO - __main__ - Step 520: {'lr': 0.00037071428571428574, 'samples': 24960, 'steps': 519, 'loss/train': 5.180419445037842} +07/25/2024 11:27:39 - INFO - __main__ - Step 521: {'lr': 0.00037142857142857143, 'samples': 25008, 'steps': 520, 'loss/train': 5.1823530197143555} +07/25/2024 11:27:39 - INFO - __main__ - Step 522: {'lr': 0.0003721428571428572, 'samples': 25056, 'steps': 521, 'loss/train': 4.937041282653809} +07/25/2024 11:27:39 - INFO - __main__ - Step 523: {'lr': 0.0003728571428571428, 'samples': 25104, 'steps': 522, 'loss/train': 5.642856121063232} +07/25/2024 11:27:40 - INFO - __main__ - Step 524: {'lr': 0.00037357142857142857, 'samples': 25152, 'steps': 523, 'loss/train': 5.054916858673096} +07/25/2024 11:27:40 - INFO - __main__ - Step 525: {'lr': 0.00037428571428571426, 'samples': 25200, 'steps': 524, 'loss/train': 5.431991100311279} +07/25/2024 11:27:40 - INFO - __main__ - Step 526: {'lr': 0.000375, 'samples': 25248, 'steps': 525, 'loss/train': 5.793028354644775} +07/25/2024 11:27:41 - INFO - __main__ - Step 527: {'lr': 0.00037571428571428575, 'samples': 25296, 'steps': 526, 'loss/train': 5.42849063873291} +07/25/2024 11:27:41 - INFO - __main__ - Step 528: {'lr': 0.00037642857142857145, 'samples': 25344, 'steps': 527, 'loss/train': 5.203094482421875} +07/25/2024 11:27:41 - INFO - __main__ - Step 529: {'lr': 0.0003771428571428572, 'samples': 25392, 'steps': 528, 'loss/train': 5.601256370544434} +07/25/2024 11:27:41 - INFO - __main__ - Step 530: {'lr': 0.00037785714285714283, 'samples': 25440, 'steps': 529, 'loss/train': 5.079232215881348} +07/25/2024 11:27:42 - INFO - __main__ - Step 531: {'lr': 0.0003785714285714286, 'samples': 25488, 'steps': 530, 'loss/train': 4.2095818519592285} +07/25/2024 11:27:42 - INFO - __main__ - Step 532: {'lr': 0.0003792857142857143, 'samples': 25536, 'steps': 531, 'loss/train': 4.543304920196533} +07/25/2024 11:27:42 - INFO - __main__ - Step 533: {'lr': 0.00038, 'samples': 25584, 'steps': 532, 'loss/train': 6.007132053375244} +07/25/2024 11:27:43 - INFO - __main__ - Step 534: {'lr': 0.0003807142857142857, 'samples': 25632, 'steps': 533, 'loss/train': 5.107751369476318} +07/25/2024 11:27:43 - INFO - __main__ - Step 535: {'lr': 0.00038142857142857146, 'samples': 25680, 'steps': 534, 'loss/train': 5.860163688659668} +07/25/2024 11:27:43 - INFO - __main__ - Step 536: {'lr': 0.0003821428571428571, 'samples': 25728, 'steps': 535, 'loss/train': 5.26261043548584} +07/25/2024 11:27:43 - INFO - __main__ - Step 537: {'lr': 0.00038285714285714285, 'samples': 25776, 'steps': 536, 'loss/train': 5.297442436218262} +07/25/2024 11:27:44 - INFO - __main__ - Step 538: {'lr': 0.0003835714285714286, 'samples': 25824, 'steps': 537, 'loss/train': 6.547177791595459} +07/25/2024 11:27:44 - INFO - __main__ - Step 539: {'lr': 0.0003842857142857143, 'samples': 25872, 'steps': 538, 'loss/train': 4.879672527313232} +07/25/2024 11:27:44 - INFO - __main__ - Step 540: {'lr': 0.00038500000000000003, 'samples': 25920, 'steps': 539, 'loss/train': 4.755589962005615} +07/25/2024 11:27:45 - INFO - __main__ - Step 541: {'lr': 0.0003857142857142857, 'samples': 25968, 'steps': 540, 'loss/train': 5.352569103240967} +07/25/2024 11:27:45 - INFO - __main__ - Step 542: {'lr': 0.0003864285714285715, 'samples': 26016, 'steps': 541, 'loss/train': 5.3843607902526855} +07/25/2024 11:27:45 - INFO - __main__ - Step 543: {'lr': 0.0003871428571428571, 'samples': 26064, 'steps': 542, 'loss/train': 5.452042579650879} +07/25/2024 11:27:45 - INFO - __main__ - Step 544: {'lr': 0.00038785714285714286, 'samples': 26112, 'steps': 543, 'loss/train': 5.4313435554504395} +07/25/2024 11:27:46 - INFO - __main__ - Step 545: {'lr': 0.00038857142857142855, 'samples': 26160, 'steps': 544, 'loss/train': 6.045054912567139} +07/25/2024 11:27:46 - INFO - __main__ - Step 546: {'lr': 0.0003892857142857143, 'samples': 26208, 'steps': 545, 'loss/train': 5.846328258514404} +07/25/2024 11:27:46 - INFO - __main__ - Step 547: {'lr': 0.00039000000000000005, 'samples': 26256, 'steps': 546, 'loss/train': 5.404782295227051} +07/25/2024 11:27:47 - INFO - __main__ - Step 548: {'lr': 0.00039071428571428574, 'samples': 26304, 'steps': 547, 'loss/train': 5.110532760620117} +07/25/2024 11:27:47 - INFO - __main__ - Step 549: {'lr': 0.00039142857142857143, 'samples': 26352, 'steps': 548, 'loss/train': 4.867753505706787} +07/25/2024 11:27:47 - INFO - __main__ - Step 550: {'lr': 0.0003921428571428571, 'samples': 26400, 'steps': 549, 'loss/train': 5.401219844818115} +07/25/2024 11:27:47 - INFO - __main__ - Step 551: {'lr': 0.0003928571428571429, 'samples': 26448, 'steps': 550, 'loss/train': 5.103180408477783} +07/25/2024 11:27:48 - INFO - __main__ - Step 552: {'lr': 0.00039357142857142857, 'samples': 26496, 'steps': 551, 'loss/train': 5.579793453216553} +07/25/2024 11:27:48 - INFO - __main__ - Step 553: {'lr': 0.0003942857142857143, 'samples': 26544, 'steps': 552, 'loss/train': 5.683652877807617} +07/25/2024 11:27:48 - INFO - __main__ - Step 554: {'lr': 0.000395, 'samples': 26592, 'steps': 553, 'loss/train': 5.351332664489746} +07/25/2024 11:27:48 - INFO - __main__ - Step 555: {'lr': 0.00039571428571428575, 'samples': 26640, 'steps': 554, 'loss/train': 5.343776702880859} +07/25/2024 11:27:49 - INFO - __main__ - Step 556: {'lr': 0.0003964285714285714, 'samples': 26688, 'steps': 555, 'loss/train': 5.160445690155029} +07/25/2024 11:27:49 - INFO - __main__ - Step 557: {'lr': 0.00039714285714285714, 'samples': 26736, 'steps': 556, 'loss/train': 6.0691633224487305} +07/25/2024 11:27:49 - INFO - __main__ - Step 558: {'lr': 0.0003978571428571429, 'samples': 26784, 'steps': 557, 'loss/train': 5.252988338470459} +07/25/2024 11:27:50 - INFO - __main__ - Step 559: {'lr': 0.0003985714285714286, 'samples': 26832, 'steps': 558, 'loss/train': 5.247750759124756} +07/25/2024 11:27:50 - INFO - __main__ - Step 560: {'lr': 0.0003992857142857143, 'samples': 26880, 'steps': 559, 'loss/train': 5.270819664001465} +07/25/2024 11:27:50 - INFO - __main__ - Step 561: {'lr': 0.0004, 'samples': 26928, 'steps': 560, 'loss/train': 4.965932846069336} +07/25/2024 11:27:50 - INFO - __main__ - Step 562: {'lr': 0.0004007142857142857, 'samples': 26976, 'steps': 561, 'loss/train': 5.925219535827637} +07/25/2024 11:27:51 - INFO - __main__ - Step 563: {'lr': 0.0004014285714285714, 'samples': 27024, 'steps': 562, 'loss/train': 4.430778980255127} +07/25/2024 11:27:51 - INFO - __main__ - Step 564: {'lr': 0.00040214285714285715, 'samples': 27072, 'steps': 563, 'loss/train': 5.096189498901367} +07/25/2024 11:27:51 - INFO - __main__ - Step 565: {'lr': 0.00040285714285714285, 'samples': 27120, 'steps': 564, 'loss/train': 5.278543472290039} +07/25/2024 11:27:52 - INFO - __main__ - Step 566: {'lr': 0.0004035714285714286, 'samples': 27168, 'steps': 565, 'loss/train': 5.113448619842529} +07/25/2024 11:27:52 - INFO - __main__ - Step 567: {'lr': 0.0004042857142857143, 'samples': 27216, 'steps': 566, 'loss/train': 5.427122592926025} +07/25/2024 11:27:52 - INFO - __main__ - Step 568: {'lr': 0.00040500000000000003, 'samples': 27264, 'steps': 567, 'loss/train': 4.9761457443237305} +07/25/2024 11:27:52 - INFO - __main__ - Step 569: {'lr': 0.0004057142857142857, 'samples': 27312, 'steps': 568, 'loss/train': 4.892258167266846} +07/25/2024 11:27:53 - INFO - __main__ - Step 570: {'lr': 0.0004064285714285714, 'samples': 27360, 'steps': 569, 'loss/train': 5.945255279541016} +07/25/2024 11:27:53 - INFO - __main__ - Step 571: {'lr': 0.00040714285714285717, 'samples': 27408, 'steps': 570, 'loss/train': 5.504655361175537} +07/25/2024 11:27:53 - INFO - __main__ - Step 572: {'lr': 0.00040785714285714286, 'samples': 27456, 'steps': 571, 'loss/train': 5.052785873413086} +07/25/2024 11:27:54 - INFO - __main__ - Step 573: {'lr': 0.0004085714285714286, 'samples': 27504, 'steps': 572, 'loss/train': 5.355578422546387} +07/25/2024 11:27:54 - INFO - __main__ - Step 574: {'lr': 0.0004092857142857143, 'samples': 27552, 'steps': 573, 'loss/train': 4.793902397155762} +07/25/2024 11:27:54 - INFO - __main__ - Step 575: {'lr': 0.00041, 'samples': 27600, 'steps': 574, 'loss/train': 4.875839710235596} +07/25/2024 11:27:54 - INFO - __main__ - Step 576: {'lr': 0.0004107142857142857, 'samples': 27648, 'steps': 575, 'loss/train': 5.1021809577941895} +07/25/2024 11:27:55 - INFO - __main__ - Step 577: {'lr': 0.00041142857142857143, 'samples': 27696, 'steps': 576, 'loss/train': 5.3876237869262695} +07/25/2024 11:27:55 - INFO - __main__ - Step 578: {'lr': 0.0004121428571428572, 'samples': 27744, 'steps': 577, 'loss/train': 5.047191619873047} +07/25/2024 11:27:55 - INFO - __main__ - Step 579: {'lr': 0.00041285714285714287, 'samples': 27792, 'steps': 578, 'loss/train': 4.5576348304748535} +07/25/2024 11:27:56 - INFO - __main__ - Step 580: {'lr': 0.0004135714285714286, 'samples': 27840, 'steps': 579, 'loss/train': 4.664468288421631} +07/25/2024 11:27:56 - INFO - __main__ - Step 581: {'lr': 0.0004142857142857143, 'samples': 27888, 'steps': 580, 'loss/train': 5.585317134857178} +07/25/2024 11:27:56 - INFO - __main__ - Step 582: {'lr': 0.000415, 'samples': 27936, 'steps': 581, 'loss/train': 5.5967020988464355} +07/25/2024 11:27:56 - INFO - __main__ - Step 583: {'lr': 0.0004157142857142857, 'samples': 27984, 'steps': 582, 'loss/train': 5.6220293045043945} +07/25/2024 11:27:57 - INFO - __main__ - Step 584: {'lr': 0.00041642857142857144, 'samples': 28032, 'steps': 583, 'loss/train': 5.570768356323242} +07/25/2024 11:27:57 - INFO - __main__ - Step 585: {'lr': 0.00041714285714285714, 'samples': 28080, 'steps': 584, 'loss/train': 5.176299095153809} +07/25/2024 11:27:57 - INFO - __main__ - Step 586: {'lr': 0.0004178571428571429, 'samples': 28128, 'steps': 585, 'loss/train': 5.479454517364502} +07/25/2024 11:27:58 - INFO - __main__ - Step 587: {'lr': 0.0004185714285714286, 'samples': 28176, 'steps': 586, 'loss/train': 5.0696024894714355} +07/25/2024 11:27:58 - INFO - __main__ - Step 588: {'lr': 0.00041928571428571427, 'samples': 28224, 'steps': 587, 'loss/train': 4.825963020324707} +07/25/2024 11:27:58 - INFO - __main__ - Step 589: {'lr': 0.00042, 'samples': 28272, 'steps': 588, 'loss/train': 5.427100658416748} +07/25/2024 11:27:58 - INFO - __main__ - Step 590: {'lr': 0.0004207142857142857, 'samples': 28320, 'steps': 589, 'loss/train': 5.1409807205200195} +07/25/2024 11:27:59 - INFO - __main__ - Step 591: {'lr': 0.00042142857142857146, 'samples': 28368, 'steps': 590, 'loss/train': 5.224395275115967} +07/25/2024 11:27:59 - INFO - __main__ - Step 592: {'lr': 0.00042214285714285715, 'samples': 28416, 'steps': 591, 'loss/train': 5.339632034301758} +07/25/2024 11:27:59 - INFO - __main__ - Step 593: {'lr': 0.0004228571428571429, 'samples': 28464, 'steps': 592, 'loss/train': 4.2167744636535645} +07/25/2024 11:28:00 - INFO - __main__ - Step 594: {'lr': 0.0004235714285714286, 'samples': 28512, 'steps': 593, 'loss/train': 5.368649482727051} +07/25/2024 11:28:00 - INFO - __main__ - Step 595: {'lr': 0.0004242857142857143, 'samples': 28560, 'steps': 594, 'loss/train': 5.189920425415039} +07/25/2024 11:28:00 - INFO - __main__ - Step 596: {'lr': 0.000425, 'samples': 28608, 'steps': 595, 'loss/train': 5.602392196655273} +07/25/2024 11:28:00 - INFO - __main__ - Step 597: {'lr': 0.0004257142857142857, 'samples': 28656, 'steps': 596, 'loss/train': 5.709826469421387} +07/25/2024 11:28:01 - INFO - __main__ - Step 598: {'lr': 0.0004264285714285714, 'samples': 28704, 'steps': 597, 'loss/train': 5.039351463317871} +07/25/2024 11:28:01 - INFO - __main__ - Step 599: {'lr': 0.00042714285714285716, 'samples': 28752, 'steps': 598, 'loss/train': 4.525015830993652} +07/25/2024 11:28:01 - INFO - __main__ - Step 600: {'lr': 0.0004278571428571429, 'samples': 28800, 'steps': 599, 'loss/train': 5.096427917480469} +07/25/2024 11:28:02 - INFO - __main__ - Step 601: {'lr': 0.00042857142857142855, 'samples': 28848, 'steps': 600, 'loss/train': 5.577730655670166} +07/25/2024 11:28:02 - INFO - __main__ - Step 602: {'lr': 0.0004292857142857143, 'samples': 28896, 'steps': 601, 'loss/train': 4.753580093383789} +07/25/2024 11:28:02 - INFO - __main__ - Step 603: {'lr': 0.00043, 'samples': 28944, 'steps': 602, 'loss/train': 5.8193039894104} +07/25/2024 11:28:02 - INFO - __main__ - Step 604: {'lr': 0.00043071428571428574, 'samples': 28992, 'steps': 603, 'loss/train': 5.35872220993042} +07/25/2024 11:28:03 - INFO - __main__ - Step 605: {'lr': 0.00043142857142857143, 'samples': 29040, 'steps': 604, 'loss/train': 6.023873329162598} +07/25/2024 11:28:03 - INFO - __main__ - Step 606: {'lr': 0.0004321428571428572, 'samples': 29088, 'steps': 605, 'loss/train': 4.999843597412109} +07/25/2024 11:28:03 - INFO - __main__ - Step 607: {'lr': 0.00043285714285714287, 'samples': 29136, 'steps': 606, 'loss/train': 5.56908655166626} +07/25/2024 11:28:04 - INFO - __main__ - Step 608: {'lr': 0.00043357142857142856, 'samples': 29184, 'steps': 607, 'loss/train': 5.115636825561523} +07/25/2024 11:28:04 - INFO - __main__ - Step 609: {'lr': 0.0004342857142857143, 'samples': 29232, 'steps': 608, 'loss/train': 5.1226487159729} +07/25/2024 11:28:04 - INFO - __main__ - Step 610: {'lr': 0.000435, 'samples': 29280, 'steps': 609, 'loss/train': 4.98507833480835} +07/25/2024 11:28:04 - INFO - __main__ - Step 611: {'lr': 0.00043571428571428575, 'samples': 29328, 'steps': 610, 'loss/train': 5.397762298583984} +07/25/2024 11:28:05 - INFO - __main__ - Step 612: {'lr': 0.00043642857142857144, 'samples': 29376, 'steps': 611, 'loss/train': 4.444913387298584} +07/25/2024 11:28:05 - INFO - __main__ - Step 613: {'lr': 0.0004371428571428572, 'samples': 29424, 'steps': 612, 'loss/train': 4.873682498931885} +07/25/2024 11:28:05 - INFO - __main__ - Step 614: {'lr': 0.00043785714285714283, 'samples': 29472, 'steps': 613, 'loss/train': 4.643433570861816} +07/25/2024 11:28:06 - INFO - __main__ - Step 615: {'lr': 0.0004385714285714286, 'samples': 29520, 'steps': 614, 'loss/train': 5.228496551513672} +07/25/2024 11:28:06 - INFO - __main__ - Step 616: {'lr': 0.00043928571428571427, 'samples': 29568, 'steps': 615, 'loss/train': 5.104097366333008} +07/25/2024 11:28:06 - INFO - __main__ - Step 617: {'lr': 0.00044, 'samples': 29616, 'steps': 616, 'loss/train': 4.815157890319824} +07/25/2024 11:28:06 - INFO - __main__ - Step 618: {'lr': 0.0004407142857142857, 'samples': 29664, 'steps': 617, 'loss/train': 5.192756652832031} +07/25/2024 11:28:07 - INFO - __main__ - Step 619: {'lr': 0.00044142857142857146, 'samples': 29712, 'steps': 618, 'loss/train': 4.8140459060668945} +07/25/2024 11:28:07 - INFO - __main__ - Step 620: {'lr': 0.00044214285714285715, 'samples': 29760, 'steps': 619, 'loss/train': 4.975928783416748} +07/25/2024 11:28:07 - INFO - __main__ - Step 621: {'lr': 0.00044285714285714284, 'samples': 29808, 'steps': 620, 'loss/train': 5.59190559387207} +07/25/2024 11:28:08 - INFO - __main__ - Step 622: {'lr': 0.0004435714285714286, 'samples': 29856, 'steps': 621, 'loss/train': 4.9148664474487305} +07/25/2024 11:28:08 - INFO - __main__ - Step 623: {'lr': 0.0004442857142857143, 'samples': 29904, 'steps': 622, 'loss/train': 5.160337924957275} +07/25/2024 11:28:08 - INFO - __main__ - Step 624: {'lr': 0.00044500000000000003, 'samples': 29952, 'steps': 623, 'loss/train': 5.064952373504639} +07/25/2024 11:28:08 - INFO - __main__ - Step 625: {'lr': 0.0004457142857142857, 'samples': 30000, 'steps': 624, 'loss/train': 5.2564496994018555} +07/25/2024 11:28:09 - INFO - __main__ - Step 626: {'lr': 0.00044642857142857147, 'samples': 30048, 'steps': 625, 'loss/train': 4.487521648406982} +07/25/2024 11:28:09 - INFO - __main__ - Step 627: {'lr': 0.0004471428571428571, 'samples': 30096, 'steps': 626, 'loss/train': 4.901277542114258} +07/25/2024 11:28:09 - INFO - __main__ - Step 628: {'lr': 0.00044785714285714285, 'samples': 30144, 'steps': 627, 'loss/train': 5.008328437805176} +07/25/2024 11:28:10 - INFO - __main__ - Step 629: {'lr': 0.0004485714285714286, 'samples': 30192, 'steps': 628, 'loss/train': 4.858040809631348} +07/25/2024 11:28:10 - INFO - __main__ - Step 630: {'lr': 0.0004492857142857143, 'samples': 30240, 'steps': 629, 'loss/train': 4.731490612030029} +07/25/2024 11:28:10 - INFO - __main__ - Step 631: {'lr': 0.00045000000000000004, 'samples': 30288, 'steps': 630, 'loss/train': 5.1491169929504395} +07/25/2024 11:28:10 - INFO - __main__ - Step 632: {'lr': 0.00045071428571428573, 'samples': 30336, 'steps': 631, 'loss/train': 5.196089267730713} +07/25/2024 11:28:11 - INFO - __main__ - Step 633: {'lr': 0.00045142857142857143, 'samples': 30384, 'steps': 632, 'loss/train': 5.398980140686035} +07/25/2024 11:28:11 - INFO - __main__ - Step 634: {'lr': 0.0004521428571428571, 'samples': 30432, 'steps': 633, 'loss/train': 5.321533679962158} +07/25/2024 11:28:11 - INFO - __main__ - Step 635: {'lr': 0.00045285714285714287, 'samples': 30480, 'steps': 634, 'loss/train': 4.863884925842285} +07/25/2024 11:28:12 - INFO - __main__ - Step 636: {'lr': 0.00045357142857142856, 'samples': 30528, 'steps': 635, 'loss/train': 4.852132797241211} +07/25/2024 11:28:12 - INFO - __main__ - Step 637: {'lr': 0.0004542857142857143, 'samples': 30576, 'steps': 636, 'loss/train': 5.03159761428833} +07/25/2024 11:28:12 - INFO - __main__ - Step 638: {'lr': 0.000455, 'samples': 30624, 'steps': 637, 'loss/train': 4.763741970062256} +07/25/2024 11:28:12 - INFO - __main__ - Step 639: {'lr': 0.00045571428571428575, 'samples': 30672, 'steps': 638, 'loss/train': 4.835602283477783} +07/25/2024 11:28:13 - INFO - __main__ - Step 640: {'lr': 0.00045642857142857144, 'samples': 30720, 'steps': 639, 'loss/train': 5.330620765686035} +07/25/2024 11:28:13 - INFO - __main__ - Step 641: {'lr': 0.00045714285714285713, 'samples': 30768, 'steps': 640, 'loss/train': 1.8108940124511719} +07/25/2024 11:28:13 - INFO - __main__ - Step 642: {'lr': 0.0004578571428571429, 'samples': 30816, 'steps': 641, 'loss/train': 5.116766929626465} +07/25/2024 11:28:14 - INFO - __main__ - Step 643: {'lr': 0.0004585714285714286, 'samples': 30864, 'steps': 642, 'loss/train': 4.698305130004883} +07/25/2024 11:28:14 - INFO - __main__ - Step 644: {'lr': 0.0004592857142857143, 'samples': 30912, 'steps': 643, 'loss/train': 5.4479522705078125} +07/25/2024 11:28:14 - INFO - __main__ - Step 645: {'lr': 0.00046, 'samples': 30960, 'steps': 644, 'loss/train': 4.921791076660156} +07/25/2024 11:28:14 - INFO - __main__ - Step 646: {'lr': 0.0004607142857142857, 'samples': 31008, 'steps': 645, 'loss/train': 5.234622955322266} +07/25/2024 11:28:15 - INFO - __main__ - Step 647: {'lr': 0.0004614285714285714, 'samples': 31056, 'steps': 646, 'loss/train': 5.500325679779053} +07/25/2024 11:28:15 - INFO - __main__ - Step 648: {'lr': 0.00046214285714285715, 'samples': 31104, 'steps': 647, 'loss/train': 5.431098937988281} +07/25/2024 11:28:15 - INFO - __main__ - Step 649: {'lr': 0.00046285714285714284, 'samples': 31152, 'steps': 648, 'loss/train': 4.876565456390381} +07/25/2024 11:28:16 - INFO - __main__ - Step 650: {'lr': 0.0004635714285714286, 'samples': 31200, 'steps': 649, 'loss/train': 4.9045891761779785} +07/25/2024 11:28:16 - INFO - __main__ - Step 651: {'lr': 0.00046428571428571433, 'samples': 31248, 'steps': 650, 'loss/train': 5.517709732055664} +07/25/2024 11:28:16 - INFO - __main__ - Step 652: {'lr': 0.000465, 'samples': 31296, 'steps': 651, 'loss/train': 4.952913761138916} +07/25/2024 11:28:16 - INFO - __main__ - Step 653: {'lr': 0.0004657142857142857, 'samples': 31344, 'steps': 652, 'loss/train': 5.166390895843506} +07/25/2024 11:28:17 - INFO - __main__ - Step 654: {'lr': 0.0004664285714285714, 'samples': 31392, 'steps': 653, 'loss/train': 5.641803741455078} +07/25/2024 11:28:17 - INFO - __main__ - Step 655: {'lr': 0.00046714285714285716, 'samples': 31440, 'steps': 654, 'loss/train': 4.589442729949951} +07/25/2024 11:28:17 - INFO - __main__ - Step 656: {'lr': 0.00046785714285714285, 'samples': 31488, 'steps': 655, 'loss/train': 5.001805305480957} +07/25/2024 11:28:18 - INFO - __main__ - Step 657: {'lr': 0.0004685714285714286, 'samples': 31536, 'steps': 656, 'loss/train': 4.693457126617432} +07/25/2024 11:28:18 - INFO - __main__ - Step 658: {'lr': 0.0004692857142857143, 'samples': 31584, 'steps': 657, 'loss/train': 5.386271953582764} +07/25/2024 11:28:18 - INFO - __main__ - Step 659: {'lr': 0.00047, 'samples': 31632, 'steps': 658, 'loss/train': 5.073136329650879} +07/25/2024 11:28:18 - INFO - __main__ - Step 660: {'lr': 0.00047071428571428573, 'samples': 31680, 'steps': 659, 'loss/train': 4.795386791229248} +07/25/2024 11:28:19 - INFO - __main__ - Step 661: {'lr': 0.0004714285714285714, 'samples': 31728, 'steps': 660, 'loss/train': 5.886542320251465} +07/25/2024 11:28:19 - INFO - __main__ - Step 662: {'lr': 0.0004721428571428572, 'samples': 31776, 'steps': 661, 'loss/train': 5.274571418762207} +07/25/2024 11:28:19 - INFO - __main__ - Step 663: {'lr': 0.00047285714285714287, 'samples': 31824, 'steps': 662, 'loss/train': 3.4391744136810303} +07/25/2024 11:28:20 - INFO - __main__ - Step 664: {'lr': 0.0004735714285714286, 'samples': 31872, 'steps': 663, 'loss/train': 5.438421726226807} +07/25/2024 11:28:20 - INFO - __main__ - Step 665: {'lr': 0.0004742857142857143, 'samples': 31920, 'steps': 664, 'loss/train': 5.293928623199463} +07/25/2024 11:28:20 - INFO - __main__ - Step 666: {'lr': 0.000475, 'samples': 31968, 'steps': 665, 'loss/train': 5.359801292419434} +07/25/2024 11:28:20 - INFO - __main__ - Step 667: {'lr': 0.0004757142857142857, 'samples': 32016, 'steps': 666, 'loss/train': 4.759598731994629} +07/25/2024 11:28:21 - INFO - __main__ - Step 668: {'lr': 0.00047642857142857144, 'samples': 32064, 'steps': 667, 'loss/train': 5.2323198318481445} +07/25/2024 11:28:21 - INFO - __main__ - Step 669: {'lr': 0.00047714285714285713, 'samples': 32112, 'steps': 668, 'loss/train': 5.070192813873291} +07/25/2024 11:28:21 - INFO - __main__ - Step 670: {'lr': 0.0004778571428571429, 'samples': 32160, 'steps': 669, 'loss/train': 5.397176265716553} +07/25/2024 11:28:22 - INFO - __main__ - Step 671: {'lr': 0.0004785714285714286, 'samples': 32208, 'steps': 670, 'loss/train': 5.2916059494018555} +07/25/2024 11:28:22 - INFO - __main__ - Step 672: {'lr': 0.00047928571428571427, 'samples': 32256, 'steps': 671, 'loss/train': 5.649632453918457} +07/25/2024 11:28:22 - INFO - __main__ - Step 673: {'lr': 0.00048, 'samples': 32304, 'steps': 672, 'loss/train': 5.29291296005249} +07/25/2024 11:28:22 - INFO - __main__ - Step 674: {'lr': 0.0004807142857142857, 'samples': 32352, 'steps': 673, 'loss/train': 2.1047961711883545} +07/25/2024 11:28:23 - INFO - __main__ - Step 675: {'lr': 0.00048142857142857145, 'samples': 32400, 'steps': 674, 'loss/train': 5.675563812255859} +07/25/2024 11:28:23 - INFO - __main__ - Step 676: {'lr': 0.00048214285714285715, 'samples': 32448, 'steps': 675, 'loss/train': 5.2416887283325195} +07/25/2024 11:28:23 - INFO - __main__ - Step 677: {'lr': 0.0004828571428571429, 'samples': 32496, 'steps': 676, 'loss/train': 5.126460552215576} +07/25/2024 11:28:24 - INFO - __main__ - Step 678: {'lr': 0.0004835714285714286, 'samples': 32544, 'steps': 677, 'loss/train': 5.213977336883545} +07/25/2024 11:28:24 - INFO - __main__ - Step 679: {'lr': 0.0004842857142857143, 'samples': 32592, 'steps': 678, 'loss/train': 5.11466121673584} +07/25/2024 11:28:24 - INFO - __main__ - Step 680: {'lr': 0.00048499999999999997, 'samples': 32640, 'steps': 679, 'loss/train': 5.070836067199707} +07/25/2024 11:28:24 - INFO - __main__ - Step 681: {'lr': 0.0004857142857142857, 'samples': 32688, 'steps': 680, 'loss/train': 5.001778602600098} +07/25/2024 11:28:25 - INFO - __main__ - Step 682: {'lr': 0.00048642857142857147, 'samples': 32736, 'steps': 681, 'loss/train': 4.957293510437012} +07/25/2024 11:28:25 - INFO - __main__ - Step 683: {'lr': 0.00048714285714285716, 'samples': 32784, 'steps': 682, 'loss/train': 5.055135726928711} +07/25/2024 11:28:25 - INFO - __main__ - Step 684: {'lr': 0.0004878571428571429, 'samples': 32832, 'steps': 683, 'loss/train': 5.22563362121582} +07/25/2024 11:28:26 - INFO - __main__ - Step 685: {'lr': 0.0004885714285714286, 'samples': 32880, 'steps': 684, 'loss/train': 5.427131175994873} +07/25/2024 11:28:26 - INFO - __main__ - Step 686: {'lr': 0.0004892857142857142, 'samples': 32928, 'steps': 685, 'loss/train': 5.36809778213501} +07/25/2024 11:28:26 - INFO - __main__ - Step 687: {'lr': 0.00049, 'samples': 32976, 'steps': 686, 'loss/train': 4.796217918395996} +07/25/2024 11:28:26 - INFO - __main__ - Step 688: {'lr': 0.0004907142857142857, 'samples': 33024, 'steps': 687, 'loss/train': 5.17207670211792} +07/25/2024 11:28:27 - INFO - __main__ - Step 689: {'lr': 0.0004914285714285715, 'samples': 33072, 'steps': 688, 'loss/train': 4.820998668670654} +07/25/2024 11:28:27 - INFO - __main__ - Step 690: {'lr': 0.0004921428571428571, 'samples': 33120, 'steps': 689, 'loss/train': 5.1848649978637695} +07/25/2024 11:28:27 - INFO - __main__ - Step 691: {'lr': 0.0004928571428571429, 'samples': 33168, 'steps': 690, 'loss/train': 4.989612579345703} +07/25/2024 11:28:28 - INFO - __main__ - Step 692: {'lr': 0.0004935714285714286, 'samples': 33216, 'steps': 691, 'loss/train': 5.44437313079834} +07/25/2024 11:28:28 - INFO - __main__ - Step 693: {'lr': 0.0004942857142857143, 'samples': 33264, 'steps': 692, 'loss/train': 4.956836223602295} +07/25/2024 11:28:28 - INFO - __main__ - Step 694: {'lr': 0.000495, 'samples': 33312, 'steps': 693, 'loss/train': 5.352087020874023} +07/25/2024 11:28:28 - INFO - __main__ - Step 695: {'lr': 0.0004957142857142857, 'samples': 33360, 'steps': 694, 'loss/train': 5.194976806640625} +07/25/2024 11:28:29 - INFO - __main__ - Step 696: {'lr': 0.0004964285714285715, 'samples': 33408, 'steps': 695, 'loss/train': 5.062796115875244} +07/25/2024 11:28:29 - INFO - __main__ - Step 697: {'lr': 0.0004971428571428571, 'samples': 33456, 'steps': 696, 'loss/train': 5.687288284301758} +07/25/2024 11:28:29 - INFO - __main__ - Step 698: {'lr': 0.0004978571428571429, 'samples': 33504, 'steps': 697, 'loss/train': 5.028861045837402} +07/25/2024 11:28:30 - INFO - __main__ - Step 699: {'lr': 0.0004985714285714285, 'samples': 33552, 'steps': 698, 'loss/train': 4.7216477394104} +07/25/2024 11:28:30 - INFO - __main__ - Step 700: {'lr': 0.0004992857142857143, 'samples': 33600, 'steps': 699, 'loss/train': 5.344950199127197} +07/25/2024 11:28:30 - INFO - __main__ - Step 701: {'lr': 0.0005, 'samples': 33648, 'steps': 700, 'loss/train': 5.06400203704834} +07/25/2024 11:28:30 - INFO - __main__ - Step 702: {'lr': 0.0004999999999446535, 'samples': 33696, 'steps': 701, 'loss/train': 4.975126266479492} +07/25/2024 11:28:31 - INFO - __main__ - Step 703: {'lr': 0.000499999999778614, 'samples': 33744, 'steps': 702, 'loss/train': 5.056604385375977} +07/25/2024 11:28:31 - INFO - __main__ - Step 704: {'lr': 0.0004999999995018815, 'samples': 33792, 'steps': 703, 'loss/train': 5.11561918258667} +07/25/2024 11:28:31 - INFO - __main__ - Step 705: {'lr': 0.000499999999114456, 'samples': 33840, 'steps': 704, 'loss/train': 4.631375789642334} +07/25/2024 11:28:32 - INFO - __main__ - Step 706: {'lr': 0.0004999999986163376, 'samples': 33888, 'steps': 705, 'loss/train': 4.751270771026611} +07/25/2024 11:28:32 - INFO - __main__ - Step 707: {'lr': 0.0004999999980075261, 'samples': 33936, 'steps': 706, 'loss/train': 3.8746402263641357} +07/25/2024 11:28:32 - INFO - __main__ - Step 708: {'lr': 0.0004999999972880217, 'samples': 33984, 'steps': 707, 'loss/train': 5.395007133483887} +07/25/2024 11:28:32 - INFO - __main__ - Step 709: {'lr': 0.0004999999964578241, 'samples': 34032, 'steps': 708, 'loss/train': 5.052489280700684} +07/25/2024 11:28:33 - INFO - __main__ - Step 710: {'lr': 0.0004999999955169337, 'samples': 34080, 'steps': 709, 'loss/train': 4.842911243438721} +07/25/2024 11:28:33 - INFO - __main__ - Step 711: {'lr': 0.0004999999944653502, 'samples': 34128, 'steps': 710, 'loss/train': 4.5596208572387695} +07/25/2024 11:28:33 - INFO - __main__ - Step 712: {'lr': 0.0004999999933030738, 'samples': 34176, 'steps': 711, 'loss/train': 4.68341588973999} +07/25/2024 11:28:34 - INFO - __main__ - Step 713: {'lr': 0.0004999999920301043, 'samples': 34224, 'steps': 712, 'loss/train': 5.419370174407959} +07/25/2024 11:28:34 - INFO - __main__ - Step 714: {'lr': 0.0004999999906464419, 'samples': 34272, 'steps': 713, 'loss/train': 5.432085990905762} +07/25/2024 11:28:34 - INFO - __main__ - Step 715: {'lr': 0.0004999999891520865, 'samples': 34320, 'steps': 714, 'loss/train': 4.982696056365967} +07/25/2024 11:28:34 - INFO - __main__ - Step 716: {'lr': 0.0004999999875470382, 'samples': 34368, 'steps': 715, 'loss/train': 5.334582328796387} +07/25/2024 11:28:35 - INFO - __main__ - Step 717: {'lr': 0.0004999999858312968, 'samples': 34416, 'steps': 716, 'loss/train': 4.86524772644043} +07/25/2024 11:28:35 - INFO - __main__ - Step 718: {'lr': 0.0004999999840048624, 'samples': 34464, 'steps': 717, 'loss/train': 5.57450008392334} +07/25/2024 11:28:35 - INFO - __main__ - Step 719: {'lr': 0.000499999982067735, 'samples': 34512, 'steps': 718, 'loss/train': 4.359405994415283} +07/25/2024 11:28:36 - INFO - __main__ - Step 720: {'lr': 0.0004999999800199146, 'samples': 34560, 'steps': 719, 'loss/train': 4.980475902557373} +07/25/2024 11:28:36 - INFO - __main__ - Step 721: {'lr': 0.0004999999778614013, 'samples': 34608, 'steps': 720, 'loss/train': 4.983829498291016} +07/25/2024 11:28:36 - INFO - __main__ - Step 722: {'lr': 0.000499999975592195, 'samples': 34656, 'steps': 721, 'loss/train': 5.040074825286865} +07/25/2024 11:28:36 - INFO - __main__ - Step 723: {'lr': 0.0004999999732122957, 'samples': 34704, 'steps': 722, 'loss/train': 4.683883190155029} +07/25/2024 11:28:37 - INFO - __main__ - Step 724: {'lr': 0.0004999999707217034, 'samples': 34752, 'steps': 723, 'loss/train': 5.132885456085205} +07/25/2024 11:28:37 - INFO - __main__ - Step 725: {'lr': 0.0004999999681204181, 'samples': 34800, 'steps': 724, 'loss/train': 4.9012651443481445} +07/25/2024 11:28:37 - INFO - __main__ - Step 726: {'lr': 0.0004999999654084398, 'samples': 34848, 'steps': 725, 'loss/train': 4.9259138107299805} +07/25/2024 11:28:38 - INFO - __main__ - Step 727: {'lr': 0.0004999999625857687, 'samples': 34896, 'steps': 726, 'loss/train': 5.019402503967285} +07/25/2024 11:28:38 - INFO - __main__ - Step 728: {'lr': 0.0004999999596524044, 'samples': 34944, 'steps': 727, 'loss/train': 4.5933518409729} +07/25/2024 11:28:38 - INFO - __main__ - Step 729: {'lr': 0.0004999999566083472, 'samples': 34992, 'steps': 728, 'loss/train': 5.9385271072387695} +07/25/2024 11:28:38 - INFO - __main__ - Step 730: {'lr': 0.0004999999534535971, 'samples': 35040, 'steps': 729, 'loss/train': 4.837466716766357} +07/25/2024 11:28:39 - INFO - __main__ - Step 731: {'lr': 0.0004999999501881538, 'samples': 35088, 'steps': 730, 'loss/train': 3.6092264652252197} +07/25/2024 11:28:39 - INFO - __main__ - Step 732: {'lr': 0.0004999999468120178, 'samples': 35136, 'steps': 731, 'loss/train': 5.080010890960693} +07/25/2024 11:28:39 - INFO - __main__ - Step 733: {'lr': 0.0004999999433251887, 'samples': 35184, 'steps': 732, 'loss/train': 4.757095813751221} +07/25/2024 11:28:40 - INFO - __main__ - Step 734: {'lr': 0.0004999999397276667, 'samples': 35232, 'steps': 733, 'loss/train': 3.824213743209839} +07/25/2024 11:28:40 - INFO - __main__ - Step 735: {'lr': 0.0004999999360194516, 'samples': 35280, 'steps': 734, 'loss/train': 4.993920803070068} +07/25/2024 11:28:40 - INFO - __main__ - Step 736: {'lr': 0.0004999999322005435, 'samples': 35328, 'steps': 735, 'loss/train': 5.481433391571045} +07/25/2024 11:28:40 - INFO - __main__ - Step 737: {'lr': 0.0004999999282709426, 'samples': 35376, 'steps': 736, 'loss/train': 5.321059226989746} +07/25/2024 11:28:41 - INFO - __main__ - Step 738: {'lr': 0.0004999999242306487, 'samples': 35424, 'steps': 737, 'loss/train': 4.850591659545898} +07/25/2024 11:28:41 - INFO - __main__ - Step 739: {'lr': 0.0004999999200796618, 'samples': 35472, 'steps': 738, 'loss/train': 5.444103240966797} +07/25/2024 11:28:41 - INFO - __main__ - Step 740: {'lr': 0.000499999915817982, 'samples': 35520, 'steps': 739, 'loss/train': 5.683528900146484} +07/25/2024 11:28:42 - INFO - __main__ - Step 741: {'lr': 0.0004999999114456092, 'samples': 35568, 'steps': 740, 'loss/train': 4.777538776397705} +07/25/2024 11:28:42 - INFO - __main__ - Step 742: {'lr': 0.0004999999069625434, 'samples': 35616, 'steps': 741, 'loss/train': 5.182293891906738} +07/25/2024 11:28:42 - INFO - __main__ - Step 743: {'lr': 0.0004999999023687847, 'samples': 35664, 'steps': 742, 'loss/train': 5.395333766937256} +07/25/2024 11:28:42 - INFO - __main__ - Step 744: {'lr': 0.000499999897664333, 'samples': 35712, 'steps': 743, 'loss/train': 5.072170257568359} +07/25/2024 11:28:43 - INFO - __main__ - Step 745: {'lr': 0.0004999998928491883, 'samples': 35760, 'steps': 744, 'loss/train': 5.1680474281311035} +07/25/2024 11:28:43 - INFO - __main__ - Step 746: {'lr': 0.0004999998879233508, 'samples': 35808, 'steps': 745, 'loss/train': 5.137325286865234} +07/25/2024 11:28:43 - INFO - __main__ - Step 747: {'lr': 0.0004999998828868203, 'samples': 35856, 'steps': 746, 'loss/train': 4.88859224319458} +07/25/2024 11:28:44 - INFO - __main__ - Step 748: {'lr': 0.0004999998777395969, 'samples': 35904, 'steps': 747, 'loss/train': 4.986325740814209} +07/25/2024 11:28:44 - INFO - __main__ - Step 749: {'lr': 0.0004999998724816805, 'samples': 35952, 'steps': 748, 'loss/train': 4.979000091552734} +07/25/2024 11:28:44 - INFO - __main__ - Step 750: {'lr': 0.0004999998671130711, 'samples': 36000, 'steps': 749, 'loss/train': 5.139535427093506} +07/25/2024 11:28:44 - INFO - __main__ - Step 751: {'lr': 0.0004999998616337689, 'samples': 36048, 'steps': 750, 'loss/train': 4.835492134094238} +07/25/2024 11:28:45 - INFO - __main__ - Step 752: {'lr': 0.0004999998560437737, 'samples': 36096, 'steps': 751, 'loss/train': 4.935268878936768} +07/25/2024 11:28:45 - INFO - __main__ - Step 753: {'lr': 0.0004999998503430855, 'samples': 36144, 'steps': 752, 'loss/train': 5.18471622467041} +07/25/2024 11:28:45 - INFO - __main__ - Step 754: {'lr': 0.0004999998445317045, 'samples': 36192, 'steps': 753, 'loss/train': 4.71912956237793} +07/25/2024 11:28:46 - INFO - __main__ - Step 755: {'lr': 0.0004999998386096305, 'samples': 36240, 'steps': 754, 'loss/train': 3.4049079418182373} +07/25/2024 11:28:46 - INFO - __main__ - Step 756: {'lr': 0.0004999998325768636, 'samples': 36288, 'steps': 755, 'loss/train': 4.986286640167236} +07/25/2024 11:28:46 - INFO - __main__ - Step 757: {'lr': 0.0004999998264334038, 'samples': 36336, 'steps': 756, 'loss/train': 4.793195724487305} +07/25/2024 11:28:46 - INFO - __main__ - Step 758: {'lr': 0.000499999820179251, 'samples': 36384, 'steps': 757, 'loss/train': 4.840726852416992} +07/25/2024 11:28:47 - INFO - __main__ - Step 759: {'lr': 0.0004999998138144054, 'samples': 36432, 'steps': 758, 'loss/train': 4.672305107116699} +07/25/2024 11:28:47 - INFO - __main__ - Step 760: {'lr': 0.0004999998073388668, 'samples': 36480, 'steps': 759, 'loss/train': 4.570957660675049} +07/25/2024 11:28:47 - INFO - __main__ - Step 761: {'lr': 0.0004999998007526353, 'samples': 36528, 'steps': 760, 'loss/train': 4.845930099487305} +07/25/2024 11:28:48 - INFO - __main__ - Step 762: {'lr': 0.0004999997940557109, 'samples': 36576, 'steps': 761, 'loss/train': 5.822187423706055} +07/25/2024 11:28:48 - INFO - __main__ - Step 763: {'lr': 0.0004999997872480935, 'samples': 36624, 'steps': 762, 'loss/train': 4.881598472595215} +07/25/2024 11:28:48 - INFO - __main__ - Step 764: {'lr': 0.0004999997803297834, 'samples': 36672, 'steps': 763, 'loss/train': 5.495177745819092} +07/25/2024 11:28:48 - INFO - __main__ - Step 765: {'lr': 0.0004999997733007803, 'samples': 36720, 'steps': 764, 'loss/train': 5.080472469329834} +07/25/2024 11:28:49 - INFO - __main__ - Step 766: {'lr': 0.0004999997661610843, 'samples': 36768, 'steps': 765, 'loss/train': 5.211321830749512} +07/25/2024 11:28:49 - INFO - __main__ - Step 767: {'lr': 0.0004999997589106955, 'samples': 36816, 'steps': 766, 'loss/train': 4.71547794342041} +07/25/2024 11:28:49 - INFO - __main__ - Step 768: {'lr': 0.0004999997515496137, 'samples': 36864, 'steps': 767, 'loss/train': 5.3114213943481445} +07/25/2024 11:28:50 - INFO - __main__ - Step 769: {'lr': 0.0004999997440778389, 'samples': 36912, 'steps': 768, 'loss/train': 5.020923137664795} +07/25/2024 11:28:50 - INFO - __main__ - Step 770: {'lr': 0.0004999997364953714, 'samples': 36960, 'steps': 769, 'loss/train': 4.871150016784668} +07/25/2024 11:28:50 - INFO - __main__ - Step 771: {'lr': 0.0004999997288022111, 'samples': 37008, 'steps': 770, 'loss/train': 4.650104999542236} +07/25/2024 11:28:50 - INFO - __main__ - Step 772: {'lr': 0.0004999997209983578, 'samples': 37056, 'steps': 771, 'loss/train': 6.554501533508301} +07/25/2024 11:28:51 - INFO - __main__ - Step 773: {'lr': 0.0004999997130838116, 'samples': 37104, 'steps': 772, 'loss/train': 5.0151214599609375} +07/25/2024 11:28:51 - INFO - __main__ - Step 774: {'lr': 0.0004999997050585726, 'samples': 37152, 'steps': 773, 'loss/train': 4.890655994415283} +07/25/2024 11:28:51 - INFO - __main__ - Step 775: {'lr': 0.0004999996969226407, 'samples': 37200, 'steps': 774, 'loss/train': 5.192580699920654} +07/25/2024 11:28:52 - INFO - __main__ - Step 776: {'lr': 0.000499999688676016, 'samples': 37248, 'steps': 775, 'loss/train': 4.589440822601318} +07/25/2024 11:28:52 - INFO - __main__ - Step 777: {'lr': 0.0004999996803186983, 'samples': 37296, 'steps': 776, 'loss/train': 4.462411403656006} +07/25/2024 11:28:52 - INFO - __main__ - Step 778: {'lr': 0.0004999996718506878, 'samples': 37344, 'steps': 777, 'loss/train': 4.699161529541016} +07/25/2024 11:28:52 - INFO - __main__ - Step 779: {'lr': 0.0004999996632719845, 'samples': 37392, 'steps': 778, 'loss/train': 3.503894090652466} +07/25/2024 11:28:53 - INFO - __main__ - Step 780: {'lr': 0.0004999996545825883, 'samples': 37440, 'steps': 779, 'loss/train': 3.833696126937866} +07/25/2024 11:28:53 - INFO - __main__ - Step 781: {'lr': 0.0004999996457824993, 'samples': 37488, 'steps': 780, 'loss/train': 4.625669479370117} +07/25/2024 11:28:53 - INFO - __main__ - Step 782: {'lr': 0.0004999996368717175, 'samples': 37536, 'steps': 781, 'loss/train': 4.996088027954102} +07/25/2024 11:28:54 - INFO - __main__ - Step 783: {'lr': 0.0004999996278502428, 'samples': 37584, 'steps': 782, 'loss/train': 5.105872631072998} +07/25/2024 11:28:54 - INFO - __main__ - Step 784: {'lr': 0.0004999996187180754, 'samples': 37632, 'steps': 783, 'loss/train': 4.643321990966797} +07/25/2024 11:28:54 - INFO - __main__ - Step 785: {'lr': 0.000499999609475215, 'samples': 37680, 'steps': 784, 'loss/train': 4.870301246643066} +07/25/2024 11:28:54 - INFO - __main__ - Step 786: {'lr': 0.0004999996001216618, 'samples': 37728, 'steps': 785, 'loss/train': 5.177028656005859} +07/25/2024 11:28:55 - INFO - __main__ - Step 787: {'lr': 0.0004999995906574159, 'samples': 37776, 'steps': 786, 'loss/train': 4.48173189163208} +07/25/2024 11:28:55 - INFO - __main__ - Step 788: {'lr': 0.000499999581082477, 'samples': 37824, 'steps': 787, 'loss/train': 5.137053489685059} +07/25/2024 11:28:55 - INFO - __main__ - Step 789: {'lr': 0.0004999995713968455, 'samples': 37872, 'steps': 788, 'loss/train': 5.182165622711182} +07/25/2024 11:28:56 - INFO - __main__ - Step 790: {'lr': 0.0004999995616005211, 'samples': 37920, 'steps': 789, 'loss/train': 5.628625392913818} +07/25/2024 11:28:56 - INFO - __main__ - Step 791: {'lr': 0.0004999995516935038, 'samples': 37968, 'steps': 790, 'loss/train': 4.8636603355407715} +07/25/2024 11:28:56 - INFO - __main__ - Step 792: {'lr': 0.0004999995416757938, 'samples': 38016, 'steps': 791, 'loss/train': 4.603965759277344} +07/25/2024 11:28:56 - INFO - __main__ - Step 793: {'lr': 0.0004999995315473911, 'samples': 38064, 'steps': 792, 'loss/train': 5.029893398284912} +07/25/2024 11:28:57 - INFO - __main__ - Step 794: {'lr': 0.0004999995213082955, 'samples': 38112, 'steps': 793, 'loss/train': 4.923609733581543} +07/25/2024 11:28:57 - INFO - __main__ - Step 795: {'lr': 0.0004999995109585072, 'samples': 38160, 'steps': 794, 'loss/train': 4.7521281242370605} +07/25/2024 11:28:57 - INFO - __main__ - Step 796: {'lr': 0.000499999500498026, 'samples': 38208, 'steps': 795, 'loss/train': 6.827172756195068} +07/25/2024 11:28:58 - INFO - __main__ - Step 797: {'lr': 0.0004999994899268521, 'samples': 38256, 'steps': 796, 'loss/train': 4.968458652496338} +07/25/2024 11:28:58 - INFO - __main__ - Step 798: {'lr': 0.0004999994792449854, 'samples': 38304, 'steps': 797, 'loss/train': 4.8277082443237305} +07/25/2024 11:28:58 - INFO - __main__ - Step 799: {'lr': 0.0004999994684524259, 'samples': 38352, 'steps': 798, 'loss/train': 5.0575337409973145} +07/25/2024 11:28:58 - INFO - __main__ - Step 800: {'lr': 0.0004999994575491737, 'samples': 38400, 'steps': 799, 'loss/train': 4.770940780639648} +07/25/2024 11:28:59 - INFO - __main__ - Step 801: {'lr': 0.0004999994465352288, 'samples': 38448, 'steps': 800, 'loss/train': 4.587588787078857} +07/25/2024 11:28:59 - INFO - __main__ - Step 802: {'lr': 0.000499999435410591, 'samples': 38496, 'steps': 801, 'loss/train': 3.3805487155914307} +07/25/2024 11:28:59 - INFO - __main__ - Step 803: {'lr': 0.0004999994241752606, 'samples': 38544, 'steps': 802, 'loss/train': 3.3488574028015137} +07/25/2024 11:28:59 - INFO - __main__ - Step 804: {'lr': 0.0004999994128292374, 'samples': 38592, 'steps': 803, 'loss/train': 5.070829391479492} +07/25/2024 11:29:00 - INFO - __main__ - Step 805: {'lr': 0.0004999994013725215, 'samples': 38640, 'steps': 804, 'loss/train': 5.607965469360352} +07/25/2024 11:29:00 - INFO - __main__ - Step 806: {'lr': 0.0004999993898051128, 'samples': 38688, 'steps': 805, 'loss/train': 4.844038486480713} +07/25/2024 11:29:00 - INFO - __main__ - Step 807: {'lr': 0.0004999993781270114, 'samples': 38736, 'steps': 806, 'loss/train': 4.914126396179199} +07/25/2024 11:29:01 - INFO - __main__ - Step 808: {'lr': 0.0004999993663382174, 'samples': 38784, 'steps': 807, 'loss/train': 4.453704833984375} +07/25/2024 11:29:01 - INFO - __main__ - Step 809: {'lr': 0.0004999993544387304, 'samples': 38832, 'steps': 808, 'loss/train': 4.366898059844971} +07/25/2024 11:29:01 - INFO - __main__ - Step 810: {'lr': 0.0004999993424285509, 'samples': 38880, 'steps': 809, 'loss/train': 5.180317401885986} +07/25/2024 11:29:01 - INFO - __main__ - Step 811: {'lr': 0.0004999993303076787, 'samples': 38928, 'steps': 810, 'loss/train': 4.724207401275635} +07/25/2024 11:29:02 - INFO - __main__ - Step 812: {'lr': 0.0004999993180761137, 'samples': 38976, 'steps': 811, 'loss/train': 5.125328540802002} +07/25/2024 11:29:02 - INFO - __main__ - Step 813: {'lr': 0.0004999993057338561, 'samples': 39024, 'steps': 812, 'loss/train': 4.733358383178711} +07/25/2024 11:29:02 - INFO - __main__ - Step 814: {'lr': 0.0004999992932809057, 'samples': 39072, 'steps': 813, 'loss/train': 4.889720439910889} +07/25/2024 11:29:03 - INFO - __main__ - Step 815: {'lr': 0.0004999992807172628, 'samples': 39120, 'steps': 814, 'loss/train': 4.520035266876221} +07/25/2024 11:29:03 - INFO - __main__ - Step 816: {'lr': 0.0004999992680429271, 'samples': 39168, 'steps': 815, 'loss/train': 4.674759387969971} +07/25/2024 11:29:03 - INFO - __main__ - Step 817: {'lr': 0.0004999992552578988, 'samples': 39216, 'steps': 816, 'loss/train': 4.6809258460998535} +07/25/2024 11:29:03 - INFO - __main__ - Step 818: {'lr': 0.0004999992423621777, 'samples': 39264, 'steps': 817, 'loss/train': 4.806493759155273} +07/25/2024 11:29:04 - INFO - __main__ - Step 819: {'lr': 0.0004999992293557641, 'samples': 39312, 'steps': 818, 'loss/train': 4.972570419311523} +07/25/2024 11:29:04 - INFO - __main__ - Step 820: {'lr': 0.0004999992162386577, 'samples': 39360, 'steps': 819, 'loss/train': 6.537446022033691} +07/25/2024 11:29:04 - INFO - __main__ - Step 821: {'lr': 0.0004999992030108588, 'samples': 39408, 'steps': 820, 'loss/train': 5.019286632537842} +07/25/2024 11:29:05 - INFO - __main__ - Step 822: {'lr': 0.0004999991896723671, 'samples': 39456, 'steps': 821, 'loss/train': 5.045091152191162} +07/25/2024 11:29:05 - INFO - __main__ - Step 823: {'lr': 0.000499999176223183, 'samples': 39504, 'steps': 822, 'loss/train': 5.251152038574219} +07/25/2024 11:29:05 - INFO - __main__ - Step 824: {'lr': 0.000499999162663306, 'samples': 39552, 'steps': 823, 'loss/train': 4.8887481689453125} +07/25/2024 11:29:05 - INFO - __main__ - Step 825: {'lr': 0.0004999991489927365, 'samples': 39600, 'steps': 824, 'loss/train': 4.871933460235596} +07/25/2024 11:29:06 - INFO - __main__ - Step 826: {'lr': 0.0004999991352114744, 'samples': 39648, 'steps': 825, 'loss/train': 2.44584059715271} +07/25/2024 11:29:06 - INFO - __main__ - Step 827: {'lr': 0.0004999991213195196, 'samples': 39696, 'steps': 826, 'loss/train': 4.480823040008545} +07/25/2024 11:29:06 - INFO - __main__ - Step 828: {'lr': 0.0004999991073168723, 'samples': 39744, 'steps': 827, 'loss/train': 5.1321892738342285} +07/25/2024 11:29:07 - INFO - __main__ - Step 829: {'lr': 0.0004999990932035323, 'samples': 39792, 'steps': 828, 'loss/train': 4.968252182006836} +07/25/2024 11:29:07 - INFO - __main__ - Step 830: {'lr': 0.0004999990789794998, 'samples': 39840, 'steps': 829, 'loss/train': 4.853609085083008} +07/25/2024 11:29:07 - INFO - __main__ - Step 831: {'lr': 0.0004999990646447748, 'samples': 39888, 'steps': 830, 'loss/train': 5.261252403259277} +07/25/2024 11:29:07 - INFO - __main__ - Step 832: {'lr': 0.000499999050199357, 'samples': 39936, 'steps': 831, 'loss/train': 5.180055618286133} +07/25/2024 11:29:08 - INFO - __main__ - Step 833: {'lr': 0.0004999990356432467, 'samples': 39984, 'steps': 832, 'loss/train': 6.649871826171875} +07/25/2024 11:29:08 - INFO - __main__ - Step 834: {'lr': 0.0004999990209764439, 'samples': 40032, 'steps': 833, 'loss/train': 5.021651268005371} +07/25/2024 11:29:08 - INFO - __main__ - Step 835: {'lr': 0.0004999990061989485, 'samples': 40080, 'steps': 834, 'loss/train': 5.512131214141846} +07/25/2024 11:29:09 - INFO - __main__ - Step 836: {'lr': 0.0004999989913107605, 'samples': 40128, 'steps': 835, 'loss/train': 4.969745635986328} +07/25/2024 11:29:09 - INFO - __main__ - Step 837: {'lr': 0.00049999897631188, 'samples': 40176, 'steps': 836, 'loss/train': 4.51662015914917} +07/25/2024 11:29:09 - INFO - __main__ - Step 838: {'lr': 0.000499998961202307, 'samples': 40224, 'steps': 837, 'loss/train': 5.241724014282227} +07/25/2024 11:29:09 - INFO - __main__ - Step 839: {'lr': 0.0004999989459820413, 'samples': 40272, 'steps': 838, 'loss/train': 5.191972732543945} +07/25/2024 11:29:10 - INFO - __main__ - Step 840: {'lr': 0.0004999989306510832, 'samples': 40320, 'steps': 839, 'loss/train': 5.282331466674805} +07/25/2024 11:29:10 - INFO - __main__ - Step 841: {'lr': 0.0004999989152094326, 'samples': 40368, 'steps': 840, 'loss/train': 4.969051837921143} +07/25/2024 11:29:10 - INFO - __main__ - Step 842: {'lr': 0.0004999988996570895, 'samples': 40416, 'steps': 841, 'loss/train': 5.206221580505371} +07/25/2024 11:29:11 - INFO - __main__ - Step 843: {'lr': 0.0004999988839940538, 'samples': 40464, 'steps': 842, 'loss/train': 5.199893951416016} +07/25/2024 11:29:11 - INFO - __main__ - Step 844: {'lr': 0.0004999988682203256, 'samples': 40512, 'steps': 843, 'loss/train': 6.359066963195801} +07/25/2024 11:29:11 - INFO - __main__ - Step 845: {'lr': 0.000499998852335905, 'samples': 40560, 'steps': 844, 'loss/train': 5.181402206420898} +07/25/2024 11:29:11 - INFO - __main__ - Step 846: {'lr': 0.0004999988363407918, 'samples': 40608, 'steps': 845, 'loss/train': 5.159450054168701} +07/25/2024 11:29:12 - INFO - __main__ - Step 847: {'lr': 0.0004999988202349861, 'samples': 40656, 'steps': 846, 'loss/train': 4.7054829597473145} +07/25/2024 11:29:12 - INFO - __main__ - Step 848: {'lr': 0.000499998804018488, 'samples': 40704, 'steps': 847, 'loss/train': 4.868635177612305} +07/25/2024 11:29:12 - INFO - __main__ - Step 849: {'lr': 0.0004999987876912975, 'samples': 40752, 'steps': 848, 'loss/train': 4.959140777587891} +07/25/2024 11:29:13 - INFO - __main__ - Step 850: {'lr': 0.0004999987712534145, 'samples': 40800, 'steps': 849, 'loss/train': 2.199373722076416} +07/25/2024 11:29:13 - INFO - __main__ - Step 851: {'lr': 0.000499998754704839, 'samples': 40848, 'steps': 850, 'loss/train': 5.358442783355713} +07/25/2024 11:29:13 - INFO - __main__ - Step 852: {'lr': 0.0004999987380455711, 'samples': 40896, 'steps': 851, 'loss/train': 4.694375991821289} +07/25/2024 11:29:13 - INFO - __main__ - Step 853: {'lr': 0.0004999987212756107, 'samples': 40944, 'steps': 852, 'loss/train': 4.836912631988525} +07/25/2024 11:29:14 - INFO - __main__ - Step 854: {'lr': 0.000499998704394958, 'samples': 40992, 'steps': 853, 'loss/train': 4.96494722366333} +07/25/2024 11:29:14 - INFO - __main__ - Step 855: {'lr': 0.0004999986874036127, 'samples': 41040, 'steps': 854, 'loss/train': 5.846248626708984} +07/25/2024 11:29:14 - INFO - __main__ - Step 856: {'lr': 0.0004999986703015752, 'samples': 41088, 'steps': 855, 'loss/train': 4.301486968994141} +07/25/2024 11:29:15 - INFO - __main__ - Step 857: {'lr': 0.0004999986530888451, 'samples': 41136, 'steps': 856, 'loss/train': 4.825923442840576} +07/25/2024 11:29:15 - INFO - __main__ - Step 858: {'lr': 0.0004999986357654227, 'samples': 41184, 'steps': 857, 'loss/train': 5.604084491729736} +07/25/2024 11:29:15 - INFO - __main__ - Step 859: {'lr': 0.0004999986183313079, 'samples': 41232, 'steps': 858, 'loss/train': 5.295572757720947} +07/25/2024 11:29:15 - INFO - __main__ - Step 860: {'lr': 0.0004999986007865007, 'samples': 41280, 'steps': 859, 'loss/train': 2.351705312728882} +07/25/2024 11:29:16 - INFO - __main__ - Step 861: {'lr': 0.0004999985831310011, 'samples': 41328, 'steps': 860, 'loss/train': 4.420810699462891} +07/25/2024 11:29:16 - INFO - __main__ - Step 862: {'lr': 0.0004999985653648091, 'samples': 41376, 'steps': 861, 'loss/train': 4.787631988525391} +07/25/2024 11:29:16 - INFO - __main__ - Step 863: {'lr': 0.0004999985474879249, 'samples': 41424, 'steps': 862, 'loss/train': 4.876559734344482} +07/25/2024 11:29:17 - INFO - __main__ - Step 864: {'lr': 0.0004999985295003483, 'samples': 41472, 'steps': 863, 'loss/train': 5.44087028503418} +07/25/2024 11:29:17 - INFO - __main__ - Step 865: {'lr': 0.0004999985114020793, 'samples': 41520, 'steps': 864, 'loss/train': 5.341536998748779} +07/25/2024 11:29:17 - INFO - __main__ - Step 866: {'lr': 0.0004999984931931179, 'samples': 41568, 'steps': 865, 'loss/train': 4.802292823791504} +07/25/2024 11:29:17 - INFO - __main__ - Step 867: {'lr': 0.0004999984748734643, 'samples': 41616, 'steps': 866, 'loss/train': 5.034390449523926} +07/25/2024 11:29:18 - INFO - __main__ - Step 868: {'lr': 0.0004999984564431183, 'samples': 41664, 'steps': 867, 'loss/train': 6.577080726623535} +07/25/2024 11:29:18 - INFO - __main__ - Step 869: {'lr': 0.00049999843790208, 'samples': 41712, 'steps': 868, 'loss/train': 4.65304708480835} +07/25/2024 11:29:18 - INFO - __main__ - Step 870: {'lr': 0.0004999984192503494, 'samples': 41760, 'steps': 869, 'loss/train': 5.646238803863525} +07/25/2024 11:29:19 - INFO - __main__ - Step 871: {'lr': 0.0004999984004879265, 'samples': 41808, 'steps': 870, 'loss/train': 4.363501071929932} +07/25/2024 11:29:19 - INFO - __main__ - Step 872: {'lr': 0.0004999983816148113, 'samples': 41856, 'steps': 871, 'loss/train': 5.473283767700195} +07/25/2024 11:29:19 - INFO - __main__ - Step 873: {'lr': 0.0004999983626310039, 'samples': 41904, 'steps': 872, 'loss/train': 5.163318634033203} +07/25/2024 11:29:19 - INFO - __main__ - Step 874: {'lr': 0.0004999983435365041, 'samples': 41952, 'steps': 873, 'loss/train': 2.540121078491211} +07/25/2024 11:29:20 - INFO - __main__ - Step 875: {'lr': 0.0004999983243313121, 'samples': 42000, 'steps': 874, 'loss/train': 4.659963607788086} +07/25/2024 11:29:20 - INFO - __main__ - Step 876: {'lr': 0.0004999983050154279, 'samples': 42048, 'steps': 875, 'loss/train': 5.098273277282715} +07/25/2024 11:29:20 - INFO - __main__ - Step 877: {'lr': 0.0004999982855888514, 'samples': 42096, 'steps': 876, 'loss/train': 4.962574005126953} +07/25/2024 11:29:21 - INFO - __main__ - Step 878: {'lr': 0.0004999982660515828, 'samples': 42144, 'steps': 877, 'loss/train': 3.9857006072998047} +07/25/2024 11:29:21 - INFO - __main__ - Step 879: {'lr': 0.0004999982464036218, 'samples': 42192, 'steps': 878, 'loss/train': 5.535689353942871} +07/25/2024 11:29:21 - INFO - __main__ - Step 880: {'lr': 0.0004999982266449686, 'samples': 42240, 'steps': 879, 'loss/train': 4.654881477355957} +07/25/2024 11:29:21 - INFO - __main__ - Step 881: {'lr': 0.0004999982067756233, 'samples': 42288, 'steps': 880, 'loss/train': 4.240962028503418} +07/25/2024 11:29:22 - INFO - __main__ - Step 882: {'lr': 0.0004999981867955856, 'samples': 42336, 'steps': 881, 'loss/train': 4.901486873626709} +07/25/2024 11:29:22 - INFO - __main__ - Step 883: {'lr': 0.0004999981667048558, 'samples': 42384, 'steps': 882, 'loss/train': 4.817023277282715} +07/25/2024 11:29:22 - INFO - __main__ - Step 884: {'lr': 0.0004999981465034339, 'samples': 42432, 'steps': 883, 'loss/train': 3.3032774925231934} +07/25/2024 11:29:23 - INFO - __main__ - Step 885: {'lr': 0.0004999981261913197, 'samples': 42480, 'steps': 884, 'loss/train': 4.876931190490723} +07/25/2024 11:29:23 - INFO - __main__ - Step 886: {'lr': 0.0004999981057685135, 'samples': 42528, 'steps': 885, 'loss/train': 5.528351306915283} +07/25/2024 11:29:23 - INFO - __main__ - Step 887: {'lr': 0.0004999980852350151, 'samples': 42576, 'steps': 886, 'loss/train': 4.51015043258667} +07/25/2024 11:29:23 - INFO - __main__ - Step 888: {'lr': 0.0004999980645908244, 'samples': 42624, 'steps': 887, 'loss/train': 4.645712852478027} +07/25/2024 11:29:24 - INFO - __main__ - Step 889: {'lr': 0.0004999980438359417, 'samples': 42672, 'steps': 888, 'loss/train': 5.507139205932617} +07/25/2024 11:29:24 - INFO - __main__ - Step 890: {'lr': 0.0004999980229703669, 'samples': 42720, 'steps': 889, 'loss/train': 4.529921054840088} +07/25/2024 11:29:24 - INFO - __main__ - Step 891: {'lr': 0.0004999980019940999, 'samples': 42768, 'steps': 890, 'loss/train': 4.326233386993408} +07/25/2024 11:29:25 - INFO - __main__ - Step 892: {'lr': 0.0004999979809071407, 'samples': 42816, 'steps': 891, 'loss/train': 5.904787540435791} +07/25/2024 11:29:25 - INFO - __main__ - Step 893: {'lr': 0.0004999979597094896, 'samples': 42864, 'steps': 892, 'loss/train': 4.707917213439941} +07/25/2024 11:29:25 - INFO - __main__ - Step 894: {'lr': 0.0004999979384011462, 'samples': 42912, 'steps': 893, 'loss/train': 5.195908546447754} +07/25/2024 11:29:25 - INFO - __main__ - Step 895: {'lr': 0.000499997916982111, 'samples': 42960, 'steps': 894, 'loss/train': 4.4713006019592285} +07/25/2024 11:29:26 - INFO - __main__ - Step 896: {'lr': 0.0004999978954523835, 'samples': 43008, 'steps': 895, 'loss/train': 4.251871109008789} +07/25/2024 11:29:26 - INFO - __main__ - Step 897: {'lr': 0.0004999978738119639, 'samples': 43056, 'steps': 896, 'loss/train': 5.048295021057129} +07/25/2024 11:29:26 - INFO - __main__ - Step 898: {'lr': 0.0004999978520608525, 'samples': 43104, 'steps': 897, 'loss/train': 2.2602949142456055} +07/25/2024 11:29:27 - INFO - __main__ - Step 899: {'lr': 0.0004999978301990488, 'samples': 43152, 'steps': 898, 'loss/train': 4.597592830657959} +07/25/2024 11:29:27 - INFO - __main__ - Step 900: {'lr': 0.0004999978082265531, 'samples': 43200, 'steps': 899, 'loss/train': 4.239841938018799} +07/25/2024 11:29:27 - INFO - __main__ - Step 901: {'lr': 0.0004999977861433655, 'samples': 43248, 'steps': 900, 'loss/train': 5.053356170654297} +07/25/2024 11:29:27 - INFO - __main__ - Step 902: {'lr': 0.0004999977639494858, 'samples': 43296, 'steps': 901, 'loss/train': 4.41482400894165} +07/25/2024 11:29:28 - INFO - __main__ - Step 903: {'lr': 0.0004999977416449142, 'samples': 43344, 'steps': 902, 'loss/train': 5.219255447387695} +07/25/2024 11:29:28 - INFO - __main__ - Step 904: {'lr': 0.0004999977192296505, 'samples': 43392, 'steps': 903, 'loss/train': 5.063007831573486} +07/25/2024 11:29:28 - INFO - __main__ - Step 905: {'lr': 0.0004999976967036948, 'samples': 43440, 'steps': 904, 'loss/train': 4.537136077880859} +07/25/2024 11:29:29 - INFO - __main__ - Step 906: {'lr': 0.0004999976740670473, 'samples': 43488, 'steps': 905, 'loss/train': 4.797700881958008} +07/25/2024 11:29:29 - INFO - __main__ - Step 907: {'lr': 0.0004999976513197075, 'samples': 43536, 'steps': 906, 'loss/train': 4.408461093902588} +07/25/2024 11:29:29 - INFO - __main__ - Step 908: {'lr': 0.0004999976284616761, 'samples': 43584, 'steps': 907, 'loss/train': 4.240564823150635} +07/25/2024 11:29:29 - INFO - __main__ - Step 909: {'lr': 0.0004999976054929525, 'samples': 43632, 'steps': 908, 'loss/train': 4.677988529205322} +07/25/2024 11:29:30 - INFO - __main__ - Step 910: {'lr': 0.0004999975824135371, 'samples': 43680, 'steps': 909, 'loss/train': 4.5764384269714355} +07/25/2024 11:29:30 - INFO - __main__ - Step 911: {'lr': 0.0004999975592234297, 'samples': 43728, 'steps': 910, 'loss/train': 4.69354248046875} +07/25/2024 11:29:30 - INFO - __main__ - Step 912: {'lr': 0.0004999975359226305, 'samples': 43776, 'steps': 911, 'loss/train': 5.018548011779785} +07/25/2024 11:29:31 - INFO - __main__ - Step 913: {'lr': 0.0004999975125111393, 'samples': 43824, 'steps': 912, 'loss/train': 4.649795055389404} +07/25/2024 11:29:31 - INFO - __main__ - Step 914: {'lr': 0.0004999974889889562, 'samples': 43872, 'steps': 913, 'loss/train': 4.765526294708252} +07/25/2024 11:29:31 - INFO - __main__ - Step 915: {'lr': 0.0004999974653560812, 'samples': 43920, 'steps': 914, 'loss/train': 4.311110496520996} +07/25/2024 11:29:31 - INFO - __main__ - Step 916: {'lr': 0.0004999974416125144, 'samples': 43968, 'steps': 915, 'loss/train': 6.127801895141602} +07/25/2024 11:29:32 - INFO - __main__ - Step 917: {'lr': 0.0004999974177582557, 'samples': 44016, 'steps': 916, 'loss/train': 4.835689544677734} +07/25/2024 11:29:32 - INFO - __main__ - Step 918: {'lr': 0.0004999973937933052, 'samples': 44064, 'steps': 917, 'loss/train': 4.731512546539307} +07/25/2024 11:29:32 - INFO - __main__ - Step 919: {'lr': 0.0004999973697176628, 'samples': 44112, 'steps': 918, 'loss/train': 4.770925045013428} +07/25/2024 11:29:33 - INFO - __main__ - Step 920: {'lr': 0.0004999973455313286, 'samples': 44160, 'steps': 919, 'loss/train': 3.9944865703582764} +07/25/2024 11:29:33 - INFO - __main__ - Step 921: {'lr': 0.0004999973212343026, 'samples': 44208, 'steps': 920, 'loss/train': 4.911899089813232} +07/25/2024 11:29:33 - INFO - __main__ - Step 922: {'lr': 0.0004999972968265847, 'samples': 44256, 'steps': 921, 'loss/train': 1.815787672996521} +07/25/2024 11:29:33 - INFO - __main__ - Step 923: {'lr': 0.000499997272308175, 'samples': 44304, 'steps': 922, 'loss/train': 4.291437149047852} +07/25/2024 11:29:34 - INFO - __main__ - Step 924: {'lr': 0.0004999972476790736, 'samples': 44352, 'steps': 923, 'loss/train': 4.729881763458252} +07/25/2024 11:29:34 - INFO - __main__ - Step 925: {'lr': 0.0004999972229392804, 'samples': 44400, 'steps': 924, 'loss/train': 4.501216888427734} +07/25/2024 11:29:34 - INFO - __main__ - Step 926: {'lr': 0.0004999971980887954, 'samples': 44448, 'steps': 925, 'loss/train': 4.681583881378174} +07/25/2024 11:29:35 - INFO - __main__ - Step 927: {'lr': 0.0004999971731276188, 'samples': 44496, 'steps': 926, 'loss/train': 4.784088134765625} +07/25/2024 11:29:35 - INFO - __main__ - Step 928: {'lr': 0.0004999971480557502, 'samples': 44544, 'steps': 927, 'loss/train': 4.962979316711426} +07/25/2024 11:29:35 - INFO - __main__ - Step 929: {'lr': 0.00049999712287319, 'samples': 44592, 'steps': 928, 'loss/train': 4.463033676147461} +07/25/2024 11:29:35 - INFO - __main__ - Step 930: {'lr': 0.0004999970975799381, 'samples': 44640, 'steps': 929, 'loss/train': 3.6862781047821045} +07/25/2024 11:29:36 - INFO - __main__ - Step 931: {'lr': 0.0004999970721759945, 'samples': 44688, 'steps': 930, 'loss/train': 4.80645751953125} +07/25/2024 11:29:36 - INFO - __main__ - Step 932: {'lr': 0.0004999970466613591, 'samples': 44736, 'steps': 931, 'loss/train': 5.074225902557373} +07/25/2024 11:29:36 - INFO - __main__ - Step 933: {'lr': 0.0004999970210360321, 'samples': 44784, 'steps': 932, 'loss/train': 4.544675350189209} +07/25/2024 11:29:37 - INFO - __main__ - Step 934: {'lr': 0.0004999969953000134, 'samples': 44832, 'steps': 933, 'loss/train': 4.731500625610352} +07/25/2024 11:29:37 - INFO - __main__ - Step 935: {'lr': 0.0004999969694533031, 'samples': 44880, 'steps': 934, 'loss/train': 5.049954891204834} +07/25/2024 11:29:37 - INFO - __main__ - Step 936: {'lr': 0.000499996943495901, 'samples': 44928, 'steps': 935, 'loss/train': 4.552140712738037} +07/25/2024 11:29:37 - INFO - __main__ - Step 937: {'lr': 0.0004999969174278074, 'samples': 44976, 'steps': 936, 'loss/train': 3.897148370742798} +07/25/2024 11:29:38 - INFO - __main__ - Step 938: {'lr': 0.0004999968912490221, 'samples': 45024, 'steps': 937, 'loss/train': 4.634158134460449} +07/25/2024 11:29:38 - INFO - __main__ - Step 939: {'lr': 0.0004999968649595452, 'samples': 45072, 'steps': 938, 'loss/train': 3.9504356384277344} +07/25/2024 11:29:38 - INFO - __main__ - Step 940: {'lr': 0.0004999968385593767, 'samples': 45120, 'steps': 939, 'loss/train': 7.005516529083252} +07/25/2024 11:29:39 - INFO - __main__ - Step 941: {'lr': 0.0004999968120485166, 'samples': 45168, 'steps': 940, 'loss/train': 4.810059070587158} +07/25/2024 11:29:39 - INFO - __main__ - Step 942: {'lr': 0.000499996785426965, 'samples': 45216, 'steps': 941, 'loss/train': 5.022961616516113} +07/25/2024 11:29:39 - INFO - __main__ - Step 943: {'lr': 0.0004999967586947217, 'samples': 45264, 'steps': 942, 'loss/train': 4.597911357879639} +07/25/2024 11:29:39 - INFO - __main__ - Step 944: {'lr': 0.0004999967318517868, 'samples': 45312, 'steps': 943, 'loss/train': 4.141514778137207} +07/25/2024 11:29:40 - INFO - __main__ - Step 945: {'lr': 0.0004999967048981604, 'samples': 45360, 'steps': 944, 'loss/train': 4.032029151916504} +07/25/2024 11:29:40 - INFO - __main__ - Step 946: {'lr': 0.0004999966778338426, 'samples': 45408, 'steps': 945, 'loss/train': 1.830592393875122} +07/25/2024 11:29:40 - INFO - __main__ - Step 947: {'lr': 0.0004999966506588332, 'samples': 45456, 'steps': 946, 'loss/train': 3.6663570404052734} +07/25/2024 11:29:41 - INFO - __main__ - Step 948: {'lr': 0.0004999966233731321, 'samples': 45504, 'steps': 947, 'loss/train': 4.724912166595459} +07/25/2024 11:29:41 - INFO - __main__ - Step 949: {'lr': 0.0004999965959767397, 'samples': 45552, 'steps': 948, 'loss/train': 4.475071907043457} +07/25/2024 11:29:41 - INFO - __main__ - Step 950: {'lr': 0.0004999965684696558, 'samples': 45600, 'steps': 949, 'loss/train': 4.610623836517334} +07/25/2024 11:29:41 - INFO - __main__ - Step 951: {'lr': 0.0004999965408518804, 'samples': 45648, 'steps': 950, 'loss/train': 4.814184665679932} +07/25/2024 11:29:42 - INFO - __main__ - Step 952: {'lr': 0.0004999965131234135, 'samples': 45696, 'steps': 951, 'loss/train': 4.066946983337402} +07/25/2024 11:29:42 - INFO - __main__ - Step 953: {'lr': 0.0004999964852842552, 'samples': 45744, 'steps': 952, 'loss/train': 4.346728801727295} +07/25/2024 11:29:42 - INFO - __main__ - Step 954: {'lr': 0.0004999964573344054, 'samples': 45792, 'steps': 953, 'loss/train': 2.661189079284668} +07/25/2024 11:29:43 - INFO - __main__ - Step 955: {'lr': 0.0004999964292738642, 'samples': 45840, 'steps': 954, 'loss/train': 4.173356533050537} +07/25/2024 11:29:43 - INFO - __main__ - Step 956: {'lr': 0.0004999964011026316, 'samples': 45888, 'steps': 955, 'loss/train': 4.9730730056762695} +07/25/2024 11:29:43 - INFO - __main__ - Step 957: {'lr': 0.0004999963728207076, 'samples': 45936, 'steps': 956, 'loss/train': 3.342686176300049} +07/25/2024 11:29:43 - INFO - __main__ - Step 958: {'lr': 0.0004999963444280922, 'samples': 45984, 'steps': 957, 'loss/train': 4.451864719390869} +07/25/2024 11:29:44 - INFO - __main__ - Step 959: {'lr': 0.0004999963159247855, 'samples': 46032, 'steps': 958, 'loss/train': 4.578034400939941} +07/25/2024 11:29:44 - INFO - __main__ - Step 960: {'lr': 0.0004999962873107874, 'samples': 46080, 'steps': 959, 'loss/train': 4.475720405578613} +07/25/2024 11:29:44 - INFO - __main__ - Step 961: {'lr': 0.0004999962585860979, 'samples': 46128, 'steps': 960, 'loss/train': 5.920584201812744} +07/25/2024 11:29:45 - INFO - __main__ - Step 962: {'lr': 0.0004999962297507171, 'samples': 46176, 'steps': 961, 'loss/train': 4.630947589874268} +07/25/2024 11:29:45 - INFO - __main__ - Step 963: {'lr': 0.0004999962008046449, 'samples': 46224, 'steps': 962, 'loss/train': 4.494876861572266} +07/25/2024 11:29:45 - INFO - __main__ - Step 964: {'lr': 0.0004999961717478815, 'samples': 46272, 'steps': 963, 'loss/train': 6.151782512664795} +07/25/2024 11:29:45 - INFO - __main__ - Step 965: {'lr': 0.0004999961425804267, 'samples': 46320, 'steps': 964, 'loss/train': 4.590292930603027} +07/25/2024 11:29:46 - INFO - __main__ - Step 966: {'lr': 0.0004999961133022807, 'samples': 46368, 'steps': 965, 'loss/train': 5.1051154136657715} +07/25/2024 11:29:46 - INFO - __main__ - Step 967: {'lr': 0.0004999960839134433, 'samples': 46416, 'steps': 966, 'loss/train': 4.871452808380127} +07/25/2024 11:29:46 - INFO - __main__ - Step 968: {'lr': 0.0004999960544139147, 'samples': 46464, 'steps': 967, 'loss/train': 4.81806755065918} +07/25/2024 11:29:47 - INFO - __main__ - Step 969: {'lr': 0.000499996024803695, 'samples': 46512, 'steps': 968, 'loss/train': 4.845391273498535} +07/25/2024 11:29:47 - INFO - __main__ - Step 970: {'lr': 0.0004999959950827839, 'samples': 46560, 'steps': 969, 'loss/train': 2.2122859954833984} +07/25/2024 11:29:47 - INFO - __main__ - Step 971: {'lr': 0.0004999959652511815, 'samples': 46608, 'steps': 970, 'loss/train': 2.9025630950927734} +07/25/2024 11:29:47 - INFO - __main__ - Step 972: {'lr': 0.0004999959353088881, 'samples': 46656, 'steps': 971, 'loss/train': 5.466766357421875} +07/25/2024 11:29:48 - INFO - __main__ - Step 973: {'lr': 0.0004999959052559034, 'samples': 46704, 'steps': 972, 'loss/train': 4.198077201843262} +07/25/2024 11:29:48 - INFO - __main__ - Step 974: {'lr': 0.0004999958750922276, 'samples': 46752, 'steps': 973, 'loss/train': 3.016974449157715} +07/25/2024 11:29:48 - INFO - __main__ - Step 975: {'lr': 0.0004999958448178605, 'samples': 46800, 'steps': 974, 'loss/train': 4.965607643127441} +07/25/2024 11:29:49 - INFO - __main__ - Step 976: {'lr': 0.0004999958144328023, 'samples': 46848, 'steps': 975, 'loss/train': 3.851520538330078} +07/25/2024 11:29:49 - INFO - __main__ - Step 977: {'lr': 0.0004999957839370529, 'samples': 46896, 'steps': 976, 'loss/train': 4.711852550506592} +07/25/2024 11:29:49 - INFO - __main__ - Step 978: {'lr': 0.0004999957533306124, 'samples': 46944, 'steps': 977, 'loss/train': 4.394510746002197} +07/25/2024 11:29:49 - INFO - __main__ - Step 979: {'lr': 0.0004999957226134809, 'samples': 46992, 'steps': 978, 'loss/train': 5.205809116363525} +07/25/2024 11:29:50 - INFO - __main__ - Step 980: {'lr': 0.0004999956917856582, 'samples': 47040, 'steps': 979, 'loss/train': 4.420988082885742} +07/25/2024 11:29:50 - INFO - __main__ - Step 981: {'lr': 0.0004999956608471444, 'samples': 47088, 'steps': 980, 'loss/train': 5.396276473999023} +07/25/2024 11:29:50 - INFO - __main__ - Step 982: {'lr': 0.0004999956297979396, 'samples': 47136, 'steps': 981, 'loss/train': 4.426606178283691} +07/25/2024 11:29:51 - INFO - __main__ - Step 983: {'lr': 0.0004999955986380437, 'samples': 47184, 'steps': 982, 'loss/train': 4.5260138511657715} +07/25/2024 11:29:51 - INFO - __main__ - Step 984: {'lr': 0.0004999955673674568, 'samples': 47232, 'steps': 983, 'loss/train': 4.901034355163574} +07/25/2024 11:29:51 - INFO - __main__ - Step 985: {'lr': 0.0004999955359861788, 'samples': 47280, 'steps': 984, 'loss/train': 4.502119064331055} +07/25/2024 11:29:51 - INFO - __main__ - Step 986: {'lr': 0.0004999955044942097, 'samples': 47328, 'steps': 985, 'loss/train': 4.545851230621338} +07/25/2024 11:29:52 - INFO - __main__ - Step 987: {'lr': 0.0004999954728915497, 'samples': 47376, 'steps': 986, 'loss/train': 4.4481353759765625} +07/25/2024 11:29:52 - INFO - __main__ - Step 988: {'lr': 0.0004999954411781987, 'samples': 47424, 'steps': 987, 'loss/train': 5.959582805633545} +07/25/2024 11:29:52 - INFO - __main__ - Step 989: {'lr': 0.0004999954093541567, 'samples': 47472, 'steps': 988, 'loss/train': 5.109045028686523} +07/25/2024 11:29:53 - INFO - __main__ - Step 990: {'lr': 0.0004999953774194239, 'samples': 47520, 'steps': 989, 'loss/train': 4.202765941619873} +07/25/2024 11:29:53 - INFO - __main__ - Step 991: {'lr': 0.000499995345374, 'samples': 47568, 'steps': 990, 'loss/train': 4.969134330749512} +07/25/2024 11:29:53 - INFO - __main__ - Step 992: {'lr': 0.0004999953132178852, 'samples': 47616, 'steps': 991, 'loss/train': 4.481476306915283} +07/25/2024 11:29:53 - INFO - __main__ - Step 993: {'lr': 0.0004999952809510794, 'samples': 47664, 'steps': 992, 'loss/train': 4.845812797546387} +07/25/2024 11:29:54 - INFO - __main__ - Step 994: {'lr': 0.0004999952485735828, 'samples': 47712, 'steps': 993, 'loss/train': 3.084613084793091} +07/25/2024 11:29:54 - INFO - __main__ - Step 995: {'lr': 0.0004999952160853952, 'samples': 47760, 'steps': 994, 'loss/train': 2.33349347114563} +07/25/2024 11:29:54 - INFO - __main__ - Step 996: {'lr': 0.0004999951834865168, 'samples': 47808, 'steps': 995, 'loss/train': 4.647409915924072} +07/25/2024 11:29:55 - INFO - __main__ - Step 997: {'lr': 0.0004999951507769476, 'samples': 47856, 'steps': 996, 'loss/train': 4.4788923263549805} +07/25/2024 11:29:55 - INFO - __main__ - Step 998: {'lr': 0.0004999951179566874, 'samples': 47904, 'steps': 997, 'loss/train': 4.58307409286499} +07/25/2024 11:29:55 - INFO - __main__ - Step 999: {'lr': 0.0004999950850257364, 'samples': 47952, 'steps': 998, 'loss/train': 4.52963399887085} +07/25/2024 11:29:55 - INFO - __main__ - Step 1000: {'lr': 0.0004999950519840947, 'samples': 48000, 'steps': 999, 'loss/train': 4.559277057647705} +07/25/2024 11:29:56 - INFO - __main__ - Step 1001: {'lr': 0.0004999950188317621, 'samples': 48048, 'steps': 1000, 'loss/train': 4.2972636222839355} +07/25/2024 11:29:56 - INFO - __main__ - Step 1002: {'lr': 0.0004999949855687387, 'samples': 48096, 'steps': 1001, 'loss/train': 4.523578643798828} +07/25/2024 11:29:56 - INFO - __main__ - Step 1003: {'lr': 0.0004999949521950246, 'samples': 48144, 'steps': 1002, 'loss/train': 4.420088291168213} +07/25/2024 11:29:57 - INFO - __main__ - Step 1004: {'lr': 0.0004999949187106196, 'samples': 48192, 'steps': 1003, 'loss/train': 4.253798007965088} +07/25/2024 11:29:57 - INFO - __main__ - Step 1005: {'lr': 0.0004999948851155241, 'samples': 48240, 'steps': 1004, 'loss/train': 5.6254777908325195} +07/25/2024 11:29:57 - INFO - __main__ - Step 1006: {'lr': 0.0004999948514097377, 'samples': 48288, 'steps': 1005, 'loss/train': 4.585422039031982} +07/25/2024 11:29:57 - INFO - __main__ - Step 1007: {'lr': 0.0004999948175932606, 'samples': 48336, 'steps': 1006, 'loss/train': 3.9037489891052246} +07/25/2024 11:29:58 - INFO - __main__ - Step 1008: {'lr': 0.0004999947836660928, 'samples': 48384, 'steps': 1007, 'loss/train': 4.443600654602051} +07/25/2024 11:29:58 - INFO - __main__ - Step 1009: {'lr': 0.0004999947496282343, 'samples': 48432, 'steps': 1008, 'loss/train': 4.928224086761475} +07/25/2024 11:29:58 - INFO - __main__ - Step 1010: {'lr': 0.0004999947154796851, 'samples': 48480, 'steps': 1009, 'loss/train': 4.61187744140625} +07/25/2024 11:29:59 - INFO - __main__ - Step 1011: {'lr': 0.0004999946812204453, 'samples': 48528, 'steps': 1010, 'loss/train': 4.457871437072754} +07/25/2024 11:29:59 - INFO - __main__ - Step 1012: {'lr': 0.0004999946468505149, 'samples': 48576, 'steps': 1011, 'loss/train': 5.9526801109313965} +07/25/2024 11:29:59 - INFO - __main__ - Step 1013: {'lr': 0.0004999946123698939, 'samples': 48624, 'steps': 1012, 'loss/train': 4.075104236602783} +07/25/2024 11:29:59 - INFO - __main__ - Step 1014: {'lr': 0.0004999945777785822, 'samples': 48672, 'steps': 1013, 'loss/train': 4.330913066864014} +07/25/2024 11:30:00 - INFO - __main__ - Step 1015: {'lr': 0.0004999945430765798, 'samples': 48720, 'steps': 1014, 'loss/train': 4.778712749481201} +07/25/2024 11:30:00 - INFO - __main__ - Step 1016: {'lr': 0.0004999945082638871, 'samples': 48768, 'steps': 1015, 'loss/train': 4.039951324462891} +07/25/2024 11:30:00 - INFO - __main__ - Step 1017: {'lr': 0.0004999944733405035, 'samples': 48816, 'steps': 1016, 'loss/train': 4.33430814743042} +07/25/2024 11:30:01 - INFO - __main__ - Step 1018: {'lr': 0.0004999944383064297, 'samples': 48864, 'steps': 1017, 'loss/train': 4.761912822723389} +07/25/2024 11:30:01 - INFO - __main__ - Step 1019: {'lr': 0.0004999944031616651, 'samples': 48912, 'steps': 1018, 'loss/train': 2.329806089401245} +07/25/2024 11:30:01 - INFO - __main__ - Step 1020: {'lr': 0.0004999943679062101, 'samples': 48960, 'steps': 1019, 'loss/train': 4.5794677734375} +07/25/2024 11:30:01 - INFO - __main__ - Step 1021: {'lr': 0.0004999943325400646, 'samples': 49008, 'steps': 1020, 'loss/train': 4.156892776489258} +07/25/2024 11:30:02 - INFO - __main__ - Step 1022: {'lr': 0.0004999942970632285, 'samples': 49056, 'steps': 1021, 'loss/train': 4.595393657684326} +07/25/2024 11:30:02 - INFO - __main__ - Step 1023: {'lr': 0.0004999942614757021, 'samples': 49104, 'steps': 1022, 'loss/train': 5.106929779052734} +07/25/2024 11:30:02 - INFO - __main__ - Step 1024: {'lr': 0.0004999942257774852, 'samples': 49152, 'steps': 1023, 'loss/train': 4.373543739318848} +07/25/2024 11:30:03 - INFO - __main__ - Step 1025: {'lr': 0.0004999941899685778, 'samples': 49200, 'steps': 1024, 'loss/train': 4.484495639801025} +07/25/2024 11:30:03 - INFO - __main__ - Step 1026: {'lr': 0.0004999941540489801, 'samples': 49248, 'steps': 1025, 'loss/train': 4.532932281494141} +07/25/2024 11:30:03 - INFO - __main__ - Step 1027: {'lr': 0.0004999941180186918, 'samples': 49296, 'steps': 1026, 'loss/train': 4.318892478942871} +07/25/2024 11:30:03 - INFO - __main__ - Step 1028: {'lr': 0.0004999940818777132, 'samples': 49344, 'steps': 1027, 'loss/train': 4.374916076660156} +07/25/2024 11:30:04 - INFO - __main__ - Step 1029: {'lr': 0.0004999940456260443, 'samples': 49392, 'steps': 1028, 'loss/train': 5.128415107727051} +07/25/2024 11:30:04 - INFO - __main__ - Step 1030: {'lr': 0.000499994009263685, 'samples': 49440, 'steps': 1029, 'loss/train': 4.162870407104492} +07/25/2024 11:30:04 - INFO - __main__ - Step 1031: {'lr': 0.0004999939727906353, 'samples': 49488, 'steps': 1030, 'loss/train': 4.000319004058838} +07/25/2024 11:30:05 - INFO - __main__ - Step 1032: {'lr': 0.0004999939362068953, 'samples': 49536, 'steps': 1031, 'loss/train': 4.224857330322266} +07/25/2024 11:30:05 - INFO - __main__ - Step 1033: {'lr': 0.0004999938995124651, 'samples': 49584, 'steps': 1032, 'loss/train': 4.956424236297607} +07/25/2024 11:30:05 - INFO - __main__ - Step 1034: {'lr': 0.0004999938627073445, 'samples': 49632, 'steps': 1033, 'loss/train': 4.28972053527832} +07/25/2024 11:30:05 - INFO - __main__ - Step 1035: {'lr': 0.0004999938257915336, 'samples': 49680, 'steps': 1034, 'loss/train': 4.63893461227417} +07/25/2024 11:30:06 - INFO - __main__ - Step 1036: {'lr': 0.0004999937887650325, 'samples': 49728, 'steps': 1035, 'loss/train': 5.789316654205322} +07/25/2024 11:30:06 - INFO - __main__ - Step 1037: {'lr': 0.0004999937516278411, 'samples': 49776, 'steps': 1036, 'loss/train': 4.731266975402832} +07/25/2024 11:30:06 - INFO - __main__ - Step 1038: {'lr': 0.0004999937143799595, 'samples': 49824, 'steps': 1037, 'loss/train': 4.979552745819092} +07/25/2024 11:30:07 - INFO - __main__ - Step 1039: {'lr': 0.0004999936770213876, 'samples': 49872, 'steps': 1038, 'loss/train': 5.390227317810059} +07/25/2024 11:30:07 - INFO - __main__ - Step 1040: {'lr': 0.0004999936395521257, 'samples': 49920, 'steps': 1039, 'loss/train': 4.171738624572754} +07/25/2024 11:30:07 - INFO - __main__ - Step 1041: {'lr': 0.0004999936019721735, 'samples': 49968, 'steps': 1040, 'loss/train': 4.3125} +07/25/2024 11:30:07 - INFO - __main__ - Step 1042: {'lr': 0.0004999935642815311, 'samples': 50016, 'steps': 1041, 'loss/train': 4.624352931976318} +07/25/2024 11:30:08 - INFO - __main__ - Step 1043: {'lr': 0.0004999935264801985, 'samples': 50064, 'steps': 1042, 'loss/train': 2.2687032222747803} +07/25/2024 11:30:08 - INFO - __main__ - Step 1044: {'lr': 0.0004999934885681759, 'samples': 50112, 'steps': 1043, 'loss/train': 4.421309947967529} +07/25/2024 11:30:08 - INFO - __main__ - Step 1045: {'lr': 0.0004999934505454633, 'samples': 50160, 'steps': 1044, 'loss/train': 4.474697113037109} +07/25/2024 11:30:09 - INFO - __main__ - Step 1046: {'lr': 0.0004999934124120605, 'samples': 50208, 'steps': 1045, 'loss/train': 4.07570743560791} +07/25/2024 11:30:09 - INFO - __main__ - Step 1047: {'lr': 0.0004999933741679675, 'samples': 50256, 'steps': 1046, 'loss/train': 4.459044933319092} +07/25/2024 11:30:09 - INFO - __main__ - Step 1048: {'lr': 0.0004999933358131846, 'samples': 50304, 'steps': 1047, 'loss/train': 4.4556684494018555} +07/25/2024 11:30:09 - INFO - __main__ - Step 1049: {'lr': 0.0004999932973477115, 'samples': 50352, 'steps': 1048, 'loss/train': 4.3679351806640625} +07/25/2024 11:30:10 - INFO - __main__ - Step 1050: {'lr': 0.0004999932587715485, 'samples': 50400, 'steps': 1049, 'loss/train': 4.297793865203857} +07/25/2024 11:30:10 - INFO - __main__ - Step 1051: {'lr': 0.0004999932200846954, 'samples': 50448, 'steps': 1050, 'loss/train': 5.087413787841797} +07/25/2024 11:30:10 - INFO - __main__ - Step 1052: {'lr': 0.0004999931812871523, 'samples': 50496, 'steps': 1051, 'loss/train': 4.02859354019165} +07/25/2024 11:30:11 - INFO - __main__ - Step 1053: {'lr': 0.0004999931423789193, 'samples': 50544, 'steps': 1052, 'loss/train': 5.621583938598633} +07/25/2024 11:30:11 - INFO - __main__ - Step 1054: {'lr': 0.0004999931033599964, 'samples': 50592, 'steps': 1053, 'loss/train': 4.628414154052734} +07/25/2024 11:30:11 - INFO - __main__ - Step 1055: {'lr': 0.0004999930642303834, 'samples': 50640, 'steps': 1054, 'loss/train': 4.858904838562012} +07/25/2024 11:30:11 - INFO - __main__ - Step 1056: {'lr': 0.0004999930249900805, 'samples': 50688, 'steps': 1055, 'loss/train': 4.258524417877197} +07/25/2024 11:30:12 - INFO - __main__ - Step 1057: {'lr': 0.0004999929856390878, 'samples': 50736, 'steps': 1056, 'loss/train': 4.098135948181152} +07/25/2024 11:30:12 - INFO - __main__ - Step 1058: {'lr': 0.0004999929461774052, 'samples': 50784, 'steps': 1057, 'loss/train': 4.78945779800415} +07/25/2024 11:30:12 - INFO - __main__ - Step 1059: {'lr': 0.0004999929066050327, 'samples': 50832, 'steps': 1058, 'loss/train': 4.225325107574463} +07/25/2024 11:30:13 - INFO - __main__ - Step 1060: {'lr': 0.0004999928669219703, 'samples': 50880, 'steps': 1059, 'loss/train': 5.863598823547363} +07/25/2024 11:30:13 - INFO - __main__ - Step 1061: {'lr': 0.000499992827128218, 'samples': 50928, 'steps': 1060, 'loss/train': 3.630878210067749} +07/25/2024 11:30:13 - INFO - __main__ - Step 1062: {'lr': 0.0004999927872237761, 'samples': 50976, 'steps': 1061, 'loss/train': 4.736876010894775} +07/25/2024 11:30:13 - INFO - __main__ - Step 1063: {'lr': 0.0004999927472086442, 'samples': 51024, 'steps': 1062, 'loss/train': 4.836633205413818} +07/25/2024 11:30:14 - INFO - __main__ - Step 1064: {'lr': 0.0004999927070828226, 'samples': 51072, 'steps': 1063, 'loss/train': 3.1358907222747803} +07/25/2024 11:30:14 - INFO - __main__ - Step 1065: {'lr': 0.0004999926668463112, 'samples': 51120, 'steps': 1064, 'loss/train': 4.842285633087158} +07/25/2024 11:30:14 - INFO - __main__ - Step 1066: {'lr': 0.0004999926264991101, 'samples': 51168, 'steps': 1065, 'loss/train': 4.450716495513916} +07/25/2024 11:30:15 - INFO - __main__ - Step 1067: {'lr': 0.0004999925860412192, 'samples': 51216, 'steps': 1066, 'loss/train': 2.418735980987549} +07/25/2024 11:30:15 - INFO - __main__ - Step 1068: {'lr': 0.0004999925454726386, 'samples': 51264, 'steps': 1067, 'loss/train': 4.861375331878662} +07/25/2024 11:30:15 - INFO - __main__ - Step 1069: {'lr': 0.0004999925047933684, 'samples': 51312, 'steps': 1068, 'loss/train': 4.817486763000488} +07/25/2024 11:30:15 - INFO - __main__ - Step 1070: {'lr': 0.0004999924640034085, 'samples': 51360, 'steps': 1069, 'loss/train': 4.845327377319336} +07/25/2024 11:30:16 - INFO - __main__ - Step 1071: {'lr': 0.0004999924231027589, 'samples': 51408, 'steps': 1070, 'loss/train': 5.0700883865356445} +07/25/2024 11:30:16 - INFO - __main__ - Step 1072: {'lr': 0.0004999923820914196, 'samples': 51456, 'steps': 1071, 'loss/train': 3.7210195064544678} +07/25/2024 11:30:16 - INFO - __main__ - Step 1073: {'lr': 0.0004999923409693907, 'samples': 51504, 'steps': 1072, 'loss/train': 4.946224689483643} +07/25/2024 11:30:17 - INFO - __main__ - Step 1074: {'lr': 0.0004999922997366722, 'samples': 51552, 'steps': 1073, 'loss/train': 4.449472427368164} +07/25/2024 11:30:17 - INFO - __main__ - Step 1075: {'lr': 0.0004999922583932642, 'samples': 51600, 'steps': 1074, 'loss/train': 4.851611614227295} +07/25/2024 11:30:17 - INFO - __main__ - Step 1076: {'lr': 0.0004999922169391666, 'samples': 51648, 'steps': 1075, 'loss/train': 4.625977993011475} +07/25/2024 11:30:17 - INFO - __main__ - Step 1077: {'lr': 0.0004999921753743795, 'samples': 51696, 'steps': 1076, 'loss/train': 5.327348709106445} +07/25/2024 11:30:18 - INFO - __main__ - Step 1078: {'lr': 0.0004999921336989027, 'samples': 51744, 'steps': 1077, 'loss/train': 4.30959939956665} +07/25/2024 11:30:18 - INFO - __main__ - Step 1079: {'lr': 0.0004999920919127365, 'samples': 51792, 'steps': 1078, 'loss/train': 4.674826145172119} +07/25/2024 11:30:18 - INFO - __main__ - Step 1080: {'lr': 0.0004999920500158809, 'samples': 51840, 'steps': 1079, 'loss/train': 3.9736225605010986} +07/25/2024 11:30:19 - INFO - __main__ - Step 1081: {'lr': 0.0004999920080083356, 'samples': 51888, 'steps': 1080, 'loss/train': 4.547460079193115} +07/25/2024 11:30:19 - INFO - __main__ - Step 1082: {'lr': 0.0004999919658901011, 'samples': 51936, 'steps': 1081, 'loss/train': 4.334077835083008} +07/25/2024 11:30:19 - INFO - __main__ - Step 1083: {'lr': 0.0004999919236611769, 'samples': 51984, 'steps': 1082, 'loss/train': 4.31503963470459} +07/25/2024 11:30:19 - INFO - __main__ - Step 1084: {'lr': 0.0004999918813215634, 'samples': 52032, 'steps': 1083, 'loss/train': 6.337460517883301} +07/25/2024 11:30:20 - INFO - __main__ - Step 1085: {'lr': 0.0004999918388712606, 'samples': 52080, 'steps': 1084, 'loss/train': 4.304469108581543} +07/25/2024 11:30:20 - INFO - __main__ - Step 1086: {'lr': 0.0004999917963102684, 'samples': 52128, 'steps': 1085, 'loss/train': 4.965458393096924} +07/25/2024 11:30:20 - INFO - __main__ - Step 1087: {'lr': 0.0004999917536385866, 'samples': 52176, 'steps': 1086, 'loss/train': 4.942840576171875} +07/25/2024 11:30:21 - INFO - __main__ - Step 1088: {'lr': 0.0004999917108562157, 'samples': 52224, 'steps': 1087, 'loss/train': 4.327383995056152} +07/25/2024 11:30:21 - INFO - __main__ - Step 1089: {'lr': 0.0004999916679631555, 'samples': 52272, 'steps': 1088, 'loss/train': 4.18502140045166} +07/25/2024 11:30:21 - INFO - __main__ - Step 1090: {'lr': 0.0004999916249594057, 'samples': 52320, 'steps': 1089, 'loss/train': 4.5445685386657715} +07/25/2024 11:30:21 - INFO - __main__ - Step 1091: {'lr': 0.0004999915818449669, 'samples': 52368, 'steps': 1090, 'loss/train': 2.109022855758667} +07/25/2024 11:30:22 - INFO - __main__ - Step 1092: {'lr': 0.0004999915386198387, 'samples': 52416, 'steps': 1091, 'loss/train': 4.802774906158447} +07/25/2024 11:30:22 - INFO - __main__ - Step 1093: {'lr': 0.0004999914952840213, 'samples': 52464, 'steps': 1092, 'loss/train': 4.995550155639648} +07/25/2024 11:30:22 - INFO - __main__ - Step 1094: {'lr': 0.0004999914518375146, 'samples': 52512, 'steps': 1093, 'loss/train': 4.977186679840088} +07/25/2024 11:30:23 - INFO - __main__ - Step 1095: {'lr': 0.0004999914082803188, 'samples': 52560, 'steps': 1094, 'loss/train': 4.691105365753174} +07/25/2024 11:30:23 - INFO - __main__ - Step 1096: {'lr': 0.0004999913646124338, 'samples': 52608, 'steps': 1095, 'loss/train': 1.9730228185653687} +07/25/2024 11:30:23 - INFO - __main__ - Step 1097: {'lr': 0.0004999913208338596, 'samples': 52656, 'steps': 1096, 'loss/train': 4.426222801208496} +07/25/2024 11:30:23 - INFO - __main__ - Step 1098: {'lr': 0.0004999912769445961, 'samples': 52704, 'steps': 1097, 'loss/train': 3.8948159217834473} +07/25/2024 11:30:24 - INFO - __main__ - Step 1099: {'lr': 0.0004999912329446437, 'samples': 52752, 'steps': 1098, 'loss/train': 4.883854866027832} +07/25/2024 11:30:24 - INFO - __main__ - Step 1100: {'lr': 0.0004999911888340021, 'samples': 52800, 'steps': 1099, 'loss/train': 4.578806400299072} +07/25/2024 11:30:24 - INFO - __main__ - Step 1101: {'lr': 0.0004999911446126713, 'samples': 52848, 'steps': 1100, 'loss/train': 4.2870192527771} +07/25/2024 11:30:25 - INFO - __main__ - Step 1102: {'lr': 0.0004999911002806516, 'samples': 52896, 'steps': 1101, 'loss/train': 4.167599678039551} +07/25/2024 11:30:25 - INFO - __main__ - Step 1103: {'lr': 0.0004999910558379428, 'samples': 52944, 'steps': 1102, 'loss/train': 4.10032320022583} +07/25/2024 11:30:25 - INFO - __main__ - Step 1104: {'lr': 0.0004999910112845449, 'samples': 52992, 'steps': 1103, 'loss/train': 4.669090270996094} +07/25/2024 11:30:25 - INFO - __main__ - Step 1105: {'lr': 0.000499990966620458, 'samples': 53040, 'steps': 1104, 'loss/train': 4.904987812042236} +07/25/2024 11:30:26 - INFO - __main__ - Step 1106: {'lr': 0.0004999909218456821, 'samples': 53088, 'steps': 1105, 'loss/train': 4.797867774963379} +07/25/2024 11:30:26 - INFO - __main__ - Step 1107: {'lr': 0.0004999908769602173, 'samples': 53136, 'steps': 1106, 'loss/train': 4.5368876457214355} +07/25/2024 11:30:26 - INFO - __main__ - Step 1108: {'lr': 0.0004999908319640635, 'samples': 53184, 'steps': 1107, 'loss/train': 5.956573009490967} +07/25/2024 11:30:27 - INFO - __main__ - Step 1109: {'lr': 0.0004999907868572206, 'samples': 53232, 'steps': 1108, 'loss/train': 3.8393595218658447} +07/25/2024 11:30:27 - INFO - __main__ - Step 1110: {'lr': 0.0004999907416396891, 'samples': 53280, 'steps': 1109, 'loss/train': 4.929397106170654} +07/25/2024 11:30:27 - INFO - __main__ - Step 1111: {'lr': 0.0004999906963114684, 'samples': 53328, 'steps': 1110, 'loss/train': 3.9522764682769775} +07/25/2024 11:30:27 - INFO - __main__ - Step 1112: {'lr': 0.000499990650872559, 'samples': 53376, 'steps': 1111, 'loss/train': 4.942009925842285} +07/25/2024 11:30:28 - INFO - __main__ - Step 1113: {'lr': 0.0004999906053229607, 'samples': 53424, 'steps': 1112, 'loss/train': 4.666757583618164} +07/25/2024 11:30:28 - INFO - __main__ - Step 1114: {'lr': 0.0004999905596626736, 'samples': 53472, 'steps': 1113, 'loss/train': 4.260319709777832} +07/25/2024 11:30:28 - INFO - __main__ - Step 1115: {'lr': 0.0004999905138916977, 'samples': 53520, 'steps': 1114, 'loss/train': 2.2744526863098145} +07/25/2024 11:30:29 - INFO - __main__ - Step 1116: {'lr': 0.0004999904680100329, 'samples': 53568, 'steps': 1115, 'loss/train': 4.17803955078125} +07/25/2024 11:30:29 - INFO - __main__ - Step 1117: {'lr': 0.0004999904220176794, 'samples': 53616, 'steps': 1116, 'loss/train': 4.3346052169799805} +07/25/2024 11:30:29 - INFO - __main__ - Step 1118: {'lr': 0.0004999903759146372, 'samples': 53664, 'steps': 1117, 'loss/train': 4.185057640075684} +07/25/2024 11:30:29 - INFO - __main__ - Step 1119: {'lr': 0.0004999903297009062, 'samples': 53712, 'steps': 1118, 'loss/train': 4.28247594833374} +07/25/2024 11:30:30 - INFO - __main__ - Step 1120: {'lr': 0.0004999902833764864, 'samples': 53760, 'steps': 1119, 'loss/train': 1.8745015859603882} +07/25/2024 11:30:30 - INFO - __main__ - Step 1121: {'lr': 0.000499990236941378, 'samples': 53808, 'steps': 1120, 'loss/train': 4.112489700317383} +07/25/2024 11:30:30 - INFO - __main__ - Step 1122: {'lr': 0.0004999901903955809, 'samples': 53856, 'steps': 1121, 'loss/train': 4.073506832122803} +07/25/2024 11:30:31 - INFO - __main__ - Step 1123: {'lr': 0.0004999901437390952, 'samples': 53904, 'steps': 1122, 'loss/train': 3.8088479042053223} +07/25/2024 11:30:31 - INFO - __main__ - Step 1124: {'lr': 0.0004999900969719209, 'samples': 53952, 'steps': 1123, 'loss/train': 4.513753414154053} +07/25/2024 11:30:31 - INFO - __main__ - Step 1125: {'lr': 0.0004999900500940579, 'samples': 54000, 'steps': 1124, 'loss/train': 3.877835750579834} +07/25/2024 11:30:31 - INFO - __main__ - Step 1126: {'lr': 0.0004999900031055064, 'samples': 54048, 'steps': 1125, 'loss/train': 4.486719131469727} +07/25/2024 11:30:32 - INFO - __main__ - Step 1127: {'lr': 0.0004999899560062662, 'samples': 54096, 'steps': 1126, 'loss/train': 4.369678497314453} +07/25/2024 11:30:32 - INFO - __main__ - Step 1128: {'lr': 0.0004999899087963375, 'samples': 54144, 'steps': 1127, 'loss/train': 4.472923755645752} +07/25/2024 11:30:32 - INFO - __main__ - Step 1129: {'lr': 0.0004999898614757203, 'samples': 54192, 'steps': 1128, 'loss/train': 4.938466548919678} +07/25/2024 11:30:33 - INFO - __main__ - Step 1130: {'lr': 0.0004999898140444146, 'samples': 54240, 'steps': 1129, 'loss/train': 4.6002912521362305} +07/25/2024 11:30:33 - INFO - __main__ - Step 1131: {'lr': 0.0004999897665024204, 'samples': 54288, 'steps': 1130, 'loss/train': 4.226775646209717} +07/25/2024 11:30:33 - INFO - __main__ - Step 1132: {'lr': 0.0004999897188497377, 'samples': 54336, 'steps': 1131, 'loss/train': 4.445532321929932} +07/25/2024 11:30:33 - INFO - __main__ - Step 1133: {'lr': 0.0004999896710863667, 'samples': 54384, 'steps': 1132, 'loss/train': 4.777052402496338} +07/25/2024 11:30:34 - INFO - __main__ - Step 1134: {'lr': 0.0004999896232123071, 'samples': 54432, 'steps': 1133, 'loss/train': 5.120794773101807} +07/25/2024 11:30:34 - INFO - __main__ - Step 1135: {'lr': 0.0004999895752275592, 'samples': 54480, 'steps': 1134, 'loss/train': 4.252773284912109} +07/25/2024 11:30:34 - INFO - __main__ - Step 1136: {'lr': 0.0004999895271321228, 'samples': 54528, 'steps': 1135, 'loss/train': 4.104519844055176} +07/25/2024 11:30:35 - INFO - __main__ - Step 1137: {'lr': 0.0004999894789259982, 'samples': 54576, 'steps': 1136, 'loss/train': 4.295417785644531} +07/25/2024 11:30:35 - INFO - __main__ - Step 1138: {'lr': 0.0004999894306091852, 'samples': 54624, 'steps': 1137, 'loss/train': 4.297276973724365} +07/25/2024 11:30:35 - INFO - __main__ - Step 1139: {'lr': 0.0004999893821816839, 'samples': 54672, 'steps': 1138, 'loss/train': 2.015188694000244} +07/25/2024 11:30:35 - INFO - __main__ - Step 1140: {'lr': 0.0004999893336434943, 'samples': 54720, 'steps': 1139, 'loss/train': 4.769472599029541} +07/25/2024 11:30:36 - INFO - __main__ - Step 1141: {'lr': 0.0004999892849946164, 'samples': 54768, 'steps': 1140, 'loss/train': 4.208677768707275} +07/25/2024 11:30:36 - INFO - __main__ - Step 1142: {'lr': 0.0004999892362350503, 'samples': 54816, 'steps': 1141, 'loss/train': 5.13785457611084} +07/25/2024 11:30:36 - INFO - __main__ - Step 1143: {'lr': 0.0004999891873647959, 'samples': 54864, 'steps': 1142, 'loss/train': 4.178411960601807} +07/25/2024 11:30:37 - INFO - __main__ - Step 1144: {'lr': 0.0004999891383838534, 'samples': 54912, 'steps': 1143, 'loss/train': 1.6398323774337769} +07/25/2024 11:30:37 - INFO - __main__ - Step 1145: {'lr': 0.0004999890892922226, 'samples': 54960, 'steps': 1144, 'loss/train': 4.774133682250977} +07/25/2024 11:30:37 - INFO - __main__ - Step 1146: {'lr': 0.0004999890400899037, 'samples': 55008, 'steps': 1145, 'loss/train': 4.485567092895508} +07/25/2024 11:30:37 - INFO - __main__ - Step 1147: {'lr': 0.0004999889907768967, 'samples': 55056, 'steps': 1146, 'loss/train': 3.446824550628662} +07/25/2024 11:30:38 - INFO - __main__ - Step 1148: {'lr': 0.0004999889413532015, 'samples': 55104, 'steps': 1147, 'loss/train': 3.880856513977051} +07/25/2024 11:30:38 - INFO - __main__ - Step 1149: {'lr': 0.0004999888918188182, 'samples': 55152, 'steps': 1148, 'loss/train': 4.786018371582031} +07/25/2024 11:30:38 - INFO - __main__ - Step 1150: {'lr': 0.0004999888421737469, 'samples': 55200, 'steps': 1149, 'loss/train': 4.025722026824951} +07/25/2024 11:30:39 - INFO - __main__ - Step 1151: {'lr': 0.0004999887924179874, 'samples': 55248, 'steps': 1150, 'loss/train': 4.553150177001953} +07/25/2024 11:30:39 - INFO - __main__ - Step 1152: {'lr': 0.0004999887425515399, 'samples': 55296, 'steps': 1151, 'loss/train': 4.284087181091309} +07/25/2024 11:30:39 - INFO - __main__ - Step 1153: {'lr': 0.0004999886925744044, 'samples': 55344, 'steps': 1152, 'loss/train': 4.362899303436279} +07/25/2024 11:30:39 - INFO - __main__ - Step 1154: {'lr': 0.000499988642486581, 'samples': 55392, 'steps': 1153, 'loss/train': 4.319853782653809} +07/25/2024 11:30:40 - INFO - __main__ - Step 1155: {'lr': 0.0004999885922880696, 'samples': 55440, 'steps': 1154, 'loss/train': 4.290018081665039} +07/25/2024 11:30:40 - INFO - __main__ - Step 1156: {'lr': 0.0004999885419788704, 'samples': 55488, 'steps': 1155, 'loss/train': 4.507620811462402} +07/25/2024 11:30:40 - INFO - __main__ - Step 1157: {'lr': 0.000499988491558983, 'samples': 55536, 'steps': 1156, 'loss/train': 4.28510046005249} +07/25/2024 11:30:41 - INFO - __main__ - Step 1158: {'lr': 0.0004999884410284078, 'samples': 55584, 'steps': 1157, 'loss/train': 4.994575500488281} +07/25/2024 11:30:41 - INFO - __main__ - Step 1159: {'lr': 0.0004999883903871448, 'samples': 55632, 'steps': 1158, 'loss/train': 4.080944061279297} +07/25/2024 11:30:41 - INFO - __main__ - Step 1160: {'lr': 0.0004999883396351938, 'samples': 55680, 'steps': 1159, 'loss/train': 4.619893550872803} +07/25/2024 11:30:41 - INFO - __main__ - Step 1161: {'lr': 0.0004999882887725552, 'samples': 55728, 'steps': 1160, 'loss/train': 4.296975612640381} +07/25/2024 11:30:42 - INFO - __main__ - Step 1162: {'lr': 0.0004999882377992286, 'samples': 55776, 'steps': 1161, 'loss/train': 5.224027156829834} +07/25/2024 11:30:42 - INFO - __main__ - Step 1163: {'lr': 0.0004999881867152143, 'samples': 55824, 'steps': 1162, 'loss/train': 1.7881747484207153} +07/25/2024 11:30:42 - INFO - __main__ - Step 1164: {'lr': 0.0004999881355205122, 'samples': 55872, 'steps': 1163, 'loss/train': 4.69658088684082} +07/25/2024 11:30:43 - INFO - __main__ - Step 1165: {'lr': 0.0004999880842151223, 'samples': 55920, 'steps': 1164, 'loss/train': 3.8317081928253174} +07/25/2024 11:30:43 - INFO - __main__ - Step 1166: {'lr': 0.0004999880327990448, 'samples': 55968, 'steps': 1165, 'loss/train': 4.207746982574463} +07/25/2024 11:30:43 - INFO - __main__ - Step 1167: {'lr': 0.0004999879812722795, 'samples': 56016, 'steps': 1166, 'loss/train': 4.18616247177124} +07/25/2024 11:30:43 - INFO - __main__ - Step 1168: {'lr': 0.0004999879296348266, 'samples': 56064, 'steps': 1167, 'loss/train': 1.6322253942489624} +07/25/2024 11:30:44 - INFO - __main__ - Step 1169: {'lr': 0.000499987877886686, 'samples': 56112, 'steps': 1168, 'loss/train': 4.663905143737793} +07/25/2024 11:30:44 - INFO - __main__ - Step 1170: {'lr': 0.0004999878260278578, 'samples': 56160, 'steps': 1169, 'loss/train': 4.7750115394592285} +07/25/2024 11:30:44 - INFO - __main__ - Step 1171: {'lr': 0.000499987774058342, 'samples': 56208, 'steps': 1170, 'loss/train': 4.4919867515563965} +07/25/2024 11:30:45 - INFO - __main__ - Step 1172: {'lr': 0.0004999877219781385, 'samples': 56256, 'steps': 1171, 'loss/train': 4.451632976531982} +07/25/2024 11:30:45 - INFO - __main__ - Step 1173: {'lr': 0.0004999876697872476, 'samples': 56304, 'steps': 1172, 'loss/train': 4.057921886444092} +07/25/2024 11:30:45 - INFO - __main__ - Step 1174: {'lr': 0.0004999876174856692, 'samples': 56352, 'steps': 1173, 'loss/train': 4.333468914031982} +07/25/2024 11:30:45 - INFO - __main__ - Step 1175: {'lr': 0.0004999875650734031, 'samples': 56400, 'steps': 1174, 'loss/train': 4.311472415924072} +07/25/2024 11:30:46 - INFO - __main__ - Step 1176: {'lr': 0.0004999875125504497, 'samples': 56448, 'steps': 1175, 'loss/train': 4.8041887283325195} +07/25/2024 11:30:46 - INFO - __main__ - Step 1177: {'lr': 0.0004999874599168088, 'samples': 56496, 'steps': 1176, 'loss/train': 4.38114595413208} +07/25/2024 11:30:46 - INFO - __main__ - Step 1178: {'lr': 0.0004999874071724802, 'samples': 56544, 'steps': 1177, 'loss/train': 3.6344070434570312} +07/25/2024 11:30:47 - INFO - __main__ - Step 1179: {'lr': 0.0004999873543174645, 'samples': 56592, 'steps': 1178, 'loss/train': 3.8525047302246094} +07/25/2024 11:30:47 - INFO - __main__ - Step 1180: {'lr': 0.0004999873013517613, 'samples': 56640, 'steps': 1179, 'loss/train': 3.873624324798584} +07/25/2024 11:30:47 - INFO - __main__ - Step 1181: {'lr': 0.0004999872482753707, 'samples': 56688, 'steps': 1180, 'loss/train': 4.304619789123535} +07/25/2024 11:30:47 - INFO - __main__ - Step 1182: {'lr': 0.0004999871950882928, 'samples': 56736, 'steps': 1181, 'loss/train': 4.888845443725586} +07/25/2024 11:30:48 - INFO - __main__ - Step 1183: {'lr': 0.0004999871417905275, 'samples': 56784, 'steps': 1182, 'loss/train': 3.842681884765625} +07/25/2024 11:30:48 - INFO - __main__ - Step 1184: {'lr': 0.0004999870883820749, 'samples': 56832, 'steps': 1183, 'loss/train': 4.196894645690918} +07/25/2024 11:30:48 - INFO - __main__ - Step 1185: {'lr': 0.000499987034862935, 'samples': 56880, 'steps': 1184, 'loss/train': 4.336077690124512} +07/25/2024 11:30:49 - INFO - __main__ - Step 1186: {'lr': 0.0004999869812331079, 'samples': 56928, 'steps': 1185, 'loss/train': 4.193338394165039} +07/25/2024 11:30:49 - INFO - __main__ - Step 1187: {'lr': 0.0004999869274925937, 'samples': 56976, 'steps': 1186, 'loss/train': 1.7524135112762451} +07/25/2024 11:30:49 - INFO - __main__ - Step 1188: {'lr': 0.0004999868736413921, 'samples': 57024, 'steps': 1187, 'loss/train': 3.8675477504730225} +07/25/2024 11:30:49 - INFO - __main__ - Step 1189: {'lr': 0.0004999868196795034, 'samples': 57072, 'steps': 1188, 'loss/train': 4.410436153411865} +07/25/2024 11:30:50 - INFO - __main__ - Step 1190: {'lr': 0.0004999867656069275, 'samples': 57120, 'steps': 1189, 'loss/train': 4.784191608428955} +07/25/2024 11:30:50 - INFO - __main__ - Step 1191: {'lr': 0.0004999867114236645, 'samples': 57168, 'steps': 1190, 'loss/train': 4.8255486488342285} +07/25/2024 11:30:50 - INFO - __main__ - Step 1192: {'lr': 0.0004999866571297144, 'samples': 57216, 'steps': 1191, 'loss/train': 1.5217509269714355} +07/25/2024 11:30:51 - INFO - __main__ - Step 1193: {'lr': 0.0004999866027250772, 'samples': 57264, 'steps': 1192, 'loss/train': 4.283402442932129} +07/25/2024 11:30:51 - INFO - __main__ - Step 1194: {'lr': 0.0004999865482097529, 'samples': 57312, 'steps': 1193, 'loss/train': 4.688429832458496} +07/25/2024 11:30:51 - INFO - __main__ - Step 1195: {'lr': 0.0004999864935837416, 'samples': 57360, 'steps': 1194, 'loss/train': 4.6118364334106445} +07/25/2024 11:30:51 - INFO - __main__ - Step 1196: {'lr': 0.0004999864388470432, 'samples': 57408, 'steps': 1195, 'loss/train': 3.794581174850464} +07/25/2024 11:30:52 - INFO - __main__ - Step 1197: {'lr': 0.0004999863839996579, 'samples': 57456, 'steps': 1196, 'loss/train': 4.079034328460693} +07/25/2024 11:30:52 - INFO - __main__ - Step 1198: {'lr': 0.0004999863290415857, 'samples': 57504, 'steps': 1197, 'loss/train': 4.598700046539307} +07/25/2024 11:30:52 - INFO - __main__ - Step 1199: {'lr': 0.0004999862739728264, 'samples': 57552, 'steps': 1198, 'loss/train': 4.193960189819336} +07/25/2024 11:30:53 - INFO - __main__ - Step 1200: {'lr': 0.0004999862187933802, 'samples': 57600, 'steps': 1199, 'loss/train': 4.78066873550415} +07/25/2024 11:30:53 - INFO - __main__ - Step 1201: {'lr': 0.0004999861635032472, 'samples': 57648, 'steps': 1200, 'loss/train': 4.244267463684082} +07/25/2024 11:30:53 - INFO - __main__ - Step 1202: {'lr': 0.0004999861081024273, 'samples': 57696, 'steps': 1201, 'loss/train': 2.0192534923553467} +07/25/2024 11:30:53 - INFO - __main__ - Step 1203: {'lr': 0.0004999860525909206, 'samples': 57744, 'steps': 1202, 'loss/train': 4.216322898864746} +07/25/2024 11:30:54 - INFO - __main__ - Step 1204: {'lr': 0.0004999859969687271, 'samples': 57792, 'steps': 1203, 'loss/train': 4.183557987213135} +07/25/2024 11:30:54 - INFO - __main__ - Step 1205: {'lr': 0.0004999859412358466, 'samples': 57840, 'steps': 1204, 'loss/train': 3.9855198860168457} +07/25/2024 11:30:54 - INFO - __main__ - Step 1206: {'lr': 0.0004999858853922796, 'samples': 57888, 'steps': 1205, 'loss/train': 4.607059955596924} +07/25/2024 11:30:55 - INFO - __main__ - Step 1207: {'lr': 0.0004999858294380256, 'samples': 57936, 'steps': 1206, 'loss/train': 3.9548540115356445} +07/25/2024 11:30:55 - INFO - __main__ - Step 1208: {'lr': 0.0004999857733730851, 'samples': 57984, 'steps': 1207, 'loss/train': 4.569610118865967} +07/25/2024 11:30:55 - INFO - __main__ - Step 1209: {'lr': 0.0004999857171974578, 'samples': 58032, 'steps': 1208, 'loss/train': 4.076261520385742} +07/25/2024 11:30:55 - INFO - __main__ - Step 1210: {'lr': 0.0004999856609111437, 'samples': 58080, 'steps': 1209, 'loss/train': 4.1263508796691895} +07/25/2024 11:30:56 - INFO - __main__ - Step 1211: {'lr': 0.0004999856045141432, 'samples': 58128, 'steps': 1210, 'loss/train': 1.7455568313598633} +07/25/2024 11:30:56 - INFO - __main__ - Step 1212: {'lr': 0.0004999855480064559, 'samples': 58176, 'steps': 1211, 'loss/train': 4.338027477264404} +07/25/2024 11:30:56 - INFO - __main__ - Step 1213: {'lr': 0.0004999854913880821, 'samples': 58224, 'steps': 1212, 'loss/train': 4.4526519775390625} +07/25/2024 11:30:57 - INFO - __main__ - Step 1214: {'lr': 0.0004999854346590217, 'samples': 58272, 'steps': 1213, 'loss/train': 4.608829498291016} +07/25/2024 11:30:57 - INFO - __main__ - Step 1215: {'lr': 0.0004999853778192748, 'samples': 58320, 'steps': 1214, 'loss/train': 4.073418140411377} +07/25/2024 11:30:57 - INFO - __main__ - Step 1216: {'lr': 0.0004999853208688413, 'samples': 58368, 'steps': 1215, 'loss/train': 1.3233171701431274} +07/25/2024 11:30:57 - INFO - __main__ - Step 1217: {'lr': 0.0004999852638077213, 'samples': 58416, 'steps': 1216, 'loss/train': 4.749626636505127} +07/25/2024 11:30:58 - INFO - __main__ - Step 1218: {'lr': 0.0004999852066359149, 'samples': 58464, 'steps': 1217, 'loss/train': 4.278598308563232} +07/25/2024 11:30:58 - INFO - __main__ - Step 1219: {'lr': 0.000499985149353422, 'samples': 58512, 'steps': 1218, 'loss/train': 4.015615463256836} +07/25/2024 11:30:58 - INFO - __main__ - Step 1220: {'lr': 0.0004999850919602427, 'samples': 58560, 'steps': 1219, 'loss/train': 5.246405124664307} +07/25/2024 11:30:59 - INFO - __main__ - Step 1221: {'lr': 0.000499985034456377, 'samples': 58608, 'steps': 1220, 'loss/train': 4.075469970703125} +07/25/2024 11:30:59 - INFO - __main__ - Step 1222: {'lr': 0.000499984976841825, 'samples': 58656, 'steps': 1221, 'loss/train': 4.663238525390625} +07/25/2024 11:30:59 - INFO - __main__ - Step 1223: {'lr': 0.0004999849191165865, 'samples': 58704, 'steps': 1222, 'loss/train': 4.17381477355957} +07/25/2024 11:30:59 - INFO - __main__ - Step 1224: {'lr': 0.0004999848612806618, 'samples': 58752, 'steps': 1223, 'loss/train': 4.562243938446045} +07/25/2024 11:31:00 - INFO - __main__ - Step 1225: {'lr': 0.0004999848033340507, 'samples': 58800, 'steps': 1224, 'loss/train': 4.3710618019104} +07/25/2024 11:31:00 - INFO - __main__ - Step 1226: {'lr': 0.0004999847452767535, 'samples': 58848, 'steps': 1225, 'loss/train': 4.306656360626221} +07/25/2024 11:31:00 - INFO - __main__ - Step 1227: {'lr': 0.00049998468710877, 'samples': 58896, 'steps': 1226, 'loss/train': 4.376728057861328} +07/25/2024 11:31:01 - INFO - __main__ - Step 1228: {'lr': 0.0004999846288301002, 'samples': 58944, 'steps': 1227, 'loss/train': 4.089417934417725} +07/25/2024 11:31:01 - INFO - __main__ - Step 1229: {'lr': 0.0004999845704407443, 'samples': 58992, 'steps': 1228, 'loss/train': 4.3150224685668945} +07/25/2024 11:31:01 - INFO - __main__ - Step 1230: {'lr': 0.0004999845119407023, 'samples': 59040, 'steps': 1229, 'loss/train': 4.704394817352295} +07/25/2024 11:31:01 - INFO - __main__ - Step 1231: {'lr': 0.000499984453329974, 'samples': 59088, 'steps': 1230, 'loss/train': 3.9258108139038086} +07/25/2024 11:31:02 - INFO - __main__ - Step 1232: {'lr': 0.0004999843946085597, 'samples': 59136, 'steps': 1231, 'loss/train': 4.3009843826293945} +07/25/2024 11:31:02 - INFO - __main__ - Step 1233: {'lr': 0.0004999843357764592, 'samples': 59184, 'steps': 1232, 'loss/train': 4.681562900543213} +07/25/2024 11:31:02 - INFO - __main__ - Step 1234: {'lr': 0.0004999842768336727, 'samples': 59232, 'steps': 1233, 'loss/train': 4.487188339233398} +07/25/2024 11:31:03 - INFO - __main__ - Step 1235: {'lr': 0.0004999842177802002, 'samples': 59280, 'steps': 1234, 'loss/train': 1.922473669052124} +07/25/2024 11:31:03 - INFO - __main__ - Step 1236: {'lr': 0.0004999841586160417, 'samples': 59328, 'steps': 1235, 'loss/train': 4.151848793029785} +07/25/2024 11:31:03 - INFO - __main__ - Step 1237: {'lr': 0.0004999840993411971, 'samples': 59376, 'steps': 1236, 'loss/train': 4.288239479064941} +07/25/2024 11:31:03 - INFO - __main__ - Step 1238: {'lr': 0.0004999840399556668, 'samples': 59424, 'steps': 1237, 'loss/train': 4.208078861236572} +07/25/2024 11:31:04 - INFO - __main__ - Step 1239: {'lr': 0.0004999839804594503, 'samples': 59472, 'steps': 1238, 'loss/train': 3.8526268005371094} +07/25/2024 11:31:04 - INFO - __main__ - Step 1240: {'lr': 0.000499983920852548, 'samples': 59520, 'steps': 1239, 'loss/train': 1.2083255052566528} +07/25/2024 11:31:04 - INFO - __main__ - Step 1241: {'lr': 0.0004999838611349598, 'samples': 59568, 'steps': 1240, 'loss/train': 5.653955936431885} +07/25/2024 11:31:05 - INFO - __main__ - Step 1242: {'lr': 0.0004999838013066859, 'samples': 59616, 'steps': 1241, 'loss/train': 1.925066590309143} +07/25/2024 11:31:05 - INFO - __main__ - Step 1243: {'lr': 0.0004999837413677261, 'samples': 59664, 'steps': 1242, 'loss/train': 4.157515048980713} +07/25/2024 11:31:05 - INFO - __main__ - Step 1244: {'lr': 0.0004999836813180804, 'samples': 59712, 'steps': 1243, 'loss/train': 3.6095645427703857} +07/25/2024 11:31:05 - INFO - __main__ - Step 1245: {'lr': 0.000499983621157749, 'samples': 59760, 'steps': 1244, 'loss/train': 3.8521406650543213} +07/25/2024 11:31:06 - INFO - __main__ - Step 1246: {'lr': 0.0004999835608867318, 'samples': 59808, 'steps': 1245, 'loss/train': 4.449384689331055} +07/25/2024 11:31:06 - INFO - __main__ - Step 1247: {'lr': 0.000499983500505029, 'samples': 59856, 'steps': 1246, 'loss/train': 3.5902786254882812} +07/25/2024 11:31:06 - INFO - __main__ - Step 1248: {'lr': 0.0004999834400126404, 'samples': 59904, 'steps': 1247, 'loss/train': 4.805014133453369} +07/25/2024 11:31:07 - INFO - __main__ - Step 1249: {'lr': 0.0004999833794095662, 'samples': 59952, 'steps': 1248, 'loss/train': 4.274191856384277} +07/25/2024 11:31:07 - INFO - __main__ - Step 1250: {'lr': 0.0004999833186958064, 'samples': 60000, 'steps': 1249, 'loss/train': 4.102481842041016} +07/25/2024 11:31:07 - INFO - __main__ - Step 1251: {'lr': 0.0004999832578713609, 'samples': 60048, 'steps': 1250, 'loss/train': 3.7802553176879883} +07/25/2024 11:31:07 - INFO - __main__ - Step 1252: {'lr': 0.0004999831969362299, 'samples': 60096, 'steps': 1251, 'loss/train': 4.958285331726074} +07/25/2024 11:31:08 - INFO - __main__ - Step 1253: {'lr': 0.0004999831358904132, 'samples': 60144, 'steps': 1252, 'loss/train': 3.8490123748779297} +07/25/2024 11:31:08 - INFO - __main__ - Step 1254: {'lr': 0.0004999830747339112, 'samples': 60192, 'steps': 1253, 'loss/train': 4.257024765014648} +07/25/2024 11:31:08 - INFO - __main__ - Step 1255: {'lr': 0.0004999830134667235, 'samples': 60240, 'steps': 1254, 'loss/train': 4.28190279006958} +07/25/2024 11:31:09 - INFO - __main__ - Step 1256: {'lr': 0.0004999829520888504, 'samples': 60288, 'steps': 1255, 'loss/train': 3.9548869132995605} +07/25/2024 11:31:09 - INFO - __main__ - Step 1257: {'lr': 0.0004999828906002918, 'samples': 60336, 'steps': 1256, 'loss/train': 4.14442777633667} +07/25/2024 11:31:09 - INFO - __main__ - Step 1258: {'lr': 0.0004999828290010479, 'samples': 60384, 'steps': 1257, 'loss/train': 4.141203880310059} +07/25/2024 11:31:09 - INFO - __main__ - Step 1259: {'lr': 0.0004999827672911185, 'samples': 60432, 'steps': 1258, 'loss/train': 2.0353176593780518} +07/25/2024 11:31:10 - INFO - __main__ - Step 1260: {'lr': 0.0004999827054705038, 'samples': 60480, 'steps': 1259, 'loss/train': 3.789705276489258} +07/25/2024 11:31:10 - INFO - __main__ - Step 1261: {'lr': 0.0004999826435392037, 'samples': 60528, 'steps': 1260, 'loss/train': 4.265719413757324} +07/25/2024 11:31:10 - INFO - __main__ - Step 1262: {'lr': 0.0004999825814972183, 'samples': 60576, 'steps': 1261, 'loss/train': 4.050114154815674} +07/25/2024 11:31:11 - INFO - __main__ - Step 1263: {'lr': 0.0004999825193445477, 'samples': 60624, 'steps': 1262, 'loss/train': 3.6806318759918213} +07/25/2024 11:31:11 - INFO - __main__ - Step 1264: {'lr': 0.0004999824570811917, 'samples': 60672, 'steps': 1263, 'loss/train': 1.516629695892334} +07/25/2024 11:31:11 - INFO - __main__ - Step 1265: {'lr': 0.0004999823947071507, 'samples': 60720, 'steps': 1264, 'loss/train': 5.111597537994385} +07/25/2024 11:31:11 - INFO - __main__ - Step 1266: {'lr': 0.0004999823322224243, 'samples': 60768, 'steps': 1265, 'loss/train': 3.1677870750427246} +07/25/2024 11:31:12 - INFO - __main__ - Step 1267: {'lr': 0.0004999822696270127, 'samples': 60816, 'steps': 1266, 'loss/train': 4.088977813720703} +07/25/2024 11:31:12 - INFO - __main__ - Step 1268: {'lr': 0.0004999822069209161, 'samples': 60864, 'steps': 1267, 'loss/train': 4.566497802734375} +07/25/2024 11:31:12 - INFO - __main__ - Step 1269: {'lr': 0.0004999821441041344, 'samples': 60912, 'steps': 1268, 'loss/train': 3.955132007598877} +07/25/2024 11:31:13 - INFO - __main__ - Step 1270: {'lr': 0.0004999820811766675, 'samples': 60960, 'steps': 1269, 'loss/train': 4.183516025543213} +07/25/2024 11:31:13 - INFO - __main__ - Step 1271: {'lr': 0.0004999820181385156, 'samples': 61008, 'steps': 1270, 'loss/train': 4.901875019073486} +07/25/2024 11:31:13 - INFO - __main__ - Step 1272: {'lr': 0.0004999819549896786, 'samples': 61056, 'steps': 1271, 'loss/train': 4.036576747894287} +07/25/2024 11:31:13 - INFO - __main__ - Step 1273: {'lr': 0.0004999818917301567, 'samples': 61104, 'steps': 1272, 'loss/train': 3.6638054847717285} +07/25/2024 11:31:14 - INFO - __main__ - Step 1274: {'lr': 0.0004999818283599498, 'samples': 61152, 'steps': 1273, 'loss/train': 4.472262859344482} +07/25/2024 11:31:14 - INFO - __main__ - Step 1275: {'lr': 0.0004999817648790578, 'samples': 61200, 'steps': 1274, 'loss/train': 3.390198230743408} +07/25/2024 11:31:14 - INFO - __main__ - Step 1276: {'lr': 0.000499981701287481, 'samples': 61248, 'steps': 1275, 'loss/train': 4.409641265869141} +07/25/2024 11:31:15 - INFO - __main__ - Step 1277: {'lr': 0.0004999816375852194, 'samples': 61296, 'steps': 1276, 'loss/train': 4.3258490562438965} +07/25/2024 11:31:15 - INFO - __main__ - Step 1278: {'lr': 0.0004999815737722728, 'samples': 61344, 'steps': 1277, 'loss/train': 4.5562028884887695} +07/25/2024 11:31:15 - INFO - __main__ - Step 1279: {'lr': 0.0004999815098486413, 'samples': 61392, 'steps': 1278, 'loss/train': 3.875267505645752} +07/25/2024 11:31:15 - INFO - __main__ - Step 1280: {'lr': 0.0004999814458143252, 'samples': 61440, 'steps': 1279, 'loss/train': 4.195573329925537} +07/25/2024 11:31:16 - INFO - __main__ - Step 1281: {'lr': 0.0004999813816693241, 'samples': 61488, 'steps': 1280, 'loss/train': 3.370211362838745} +07/25/2024 11:31:16 - INFO - __main__ - Step 1282: {'lr': 0.0004999813174136384, 'samples': 61536, 'steps': 1281, 'loss/train': 4.0314483642578125} +07/25/2024 11:31:16 - INFO - __main__ - Step 1283: {'lr': 0.000499981253047268, 'samples': 61584, 'steps': 1282, 'loss/train': 2.155268430709839} +07/25/2024 11:31:17 - INFO - __main__ - Step 1284: {'lr': 0.0004999811885702128, 'samples': 61632, 'steps': 1283, 'loss/train': 4.372516632080078} +07/25/2024 11:31:17 - INFO - __main__ - Step 1285: {'lr': 0.000499981123982473, 'samples': 61680, 'steps': 1284, 'loss/train': 4.090909481048584} +07/25/2024 11:31:17 - INFO - __main__ - Step 1286: {'lr': 0.0004999810592840485, 'samples': 61728, 'steps': 1285, 'loss/train': 3.861618757247925} +07/25/2024 11:31:17 - INFO - __main__ - Step 1287: {'lr': 0.0004999809944749395, 'samples': 61776, 'steps': 1286, 'loss/train': 4.277287006378174} +07/25/2024 11:31:18 - INFO - __main__ - Step 1288: {'lr': 0.0004999809295551458, 'samples': 61824, 'steps': 1287, 'loss/train': 1.438581109046936} +07/25/2024 11:31:18 - INFO - __main__ - Step 1289: {'lr': 0.0004999808645246676, 'samples': 61872, 'steps': 1288, 'loss/train': 4.298671245574951} +07/25/2024 11:31:18 - INFO - __main__ - Step 1290: {'lr': 0.0004999807993835048, 'samples': 61920, 'steps': 1289, 'loss/train': 3.4564685821533203} +07/25/2024 11:31:19 - INFO - __main__ - Step 1291: {'lr': 0.0004999807341316577, 'samples': 61968, 'steps': 1290, 'loss/train': 4.348170757293701} +07/25/2024 11:31:19 - INFO - __main__ - Step 1292: {'lr': 0.0004999806687691259, 'samples': 62016, 'steps': 1291, 'loss/train': 4.628023147583008} +07/25/2024 11:31:19 - INFO - __main__ - Step 1293: {'lr': 0.0004999806032959098, 'samples': 62064, 'steps': 1292, 'loss/train': 4.4414238929748535} +07/25/2024 11:31:19 - INFO - __main__ - Step 1294: {'lr': 0.0004999805377120094, 'samples': 62112, 'steps': 1293, 'loss/train': 4.6884684562683105} +07/25/2024 11:31:20 - INFO - __main__ - Step 1295: {'lr': 0.0004999804720174244, 'samples': 62160, 'steps': 1294, 'loss/train': 4.688125133514404} +07/25/2024 11:31:20 - INFO - __main__ - Step 1296: {'lr': 0.0004999804062121551, 'samples': 62208, 'steps': 1295, 'loss/train': 3.820019483566284} +07/25/2024 11:31:20 - INFO - __main__ - Step 1297: {'lr': 0.0004999803402962016, 'samples': 62256, 'steps': 1296, 'loss/train': 4.511084079742432} +07/25/2024 11:31:21 - INFO - __main__ - Step 1298: {'lr': 0.0004999802742695637, 'samples': 62304, 'steps': 1297, 'loss/train': 4.51163911819458} +07/25/2024 11:31:21 - INFO - __main__ - Step 1299: {'lr': 0.0004999802081322416, 'samples': 62352, 'steps': 1298, 'loss/train': 4.362824440002441} +07/25/2024 11:31:21 - INFO - __main__ - Step 1300: {'lr': 0.0004999801418842353, 'samples': 62400, 'steps': 1299, 'loss/train': 3.8499886989593506} +07/25/2024 11:31:21 - INFO - __main__ - Step 1301: {'lr': 0.0004999800755255447, 'samples': 62448, 'steps': 1300, 'loss/train': 4.105699062347412} +07/25/2024 11:31:22 - INFO - __main__ - Step 1302: {'lr': 0.00049998000905617, 'samples': 62496, 'steps': 1301, 'loss/train': 4.91643762588501} +07/25/2024 11:31:22 - INFO - __main__ - Step 1303: {'lr': 0.0004999799424761112, 'samples': 62544, 'steps': 1302, 'loss/train': 3.7967910766601562} +07/25/2024 11:31:22 - INFO - __main__ - Step 1304: {'lr': 0.0004999798757853681, 'samples': 62592, 'steps': 1303, 'loss/train': 4.307610511779785} +07/25/2024 11:31:23 - INFO - __main__ - Step 1305: {'lr': 0.000499979808983941, 'samples': 62640, 'steps': 1304, 'loss/train': 3.710989475250244} +07/25/2024 11:31:23 - INFO - __main__ - Step 1306: {'lr': 0.0004999797420718299, 'samples': 62688, 'steps': 1305, 'loss/train': 4.430567264556885} +07/25/2024 11:31:23 - INFO - __main__ - Step 1307: {'lr': 0.0004999796750490348, 'samples': 62736, 'steps': 1306, 'loss/train': 1.6223148107528687} +07/25/2024 11:31:23 - INFO - __main__ - Step 1308: {'lr': 0.0004999796079155557, 'samples': 62784, 'steps': 1307, 'loss/train': 4.152270317077637} +07/25/2024 11:31:24 - INFO - __main__ - Step 1309: {'lr': 0.0004999795406713925, 'samples': 62832, 'steps': 1308, 'loss/train': 4.090967655181885} +07/25/2024 11:31:24 - INFO - __main__ - Step 1310: {'lr': 0.0004999794733165455, 'samples': 62880, 'steps': 1309, 'loss/train': 4.419124603271484} +07/25/2024 11:31:24 - INFO - __main__ - Step 1311: {'lr': 0.0004999794058510145, 'samples': 62928, 'steps': 1310, 'loss/train': 3.7874879837036133} +07/25/2024 11:31:25 - INFO - __main__ - Step 1312: {'lr': 0.0004999793382747997, 'samples': 62976, 'steps': 1311, 'loss/train': 1.849772334098816} +07/25/2024 11:31:25 - INFO - __main__ - Step 1313: {'lr': 0.0004999792705879009, 'samples': 63024, 'steps': 1312, 'loss/train': 3.853640079498291} +07/25/2024 11:31:25 - INFO - __main__ - Step 1314: {'lr': 0.0004999792027903184, 'samples': 63072, 'steps': 1313, 'loss/train': 3.5559568405151367} +07/25/2024 11:31:25 - INFO - __main__ - Step 1315: {'lr': 0.0004999791348820521, 'samples': 63120, 'steps': 1314, 'loss/train': 4.298774242401123} +07/25/2024 11:31:26 - INFO - __main__ - Step 1316: {'lr': 0.0004999790668631021, 'samples': 63168, 'steps': 1315, 'loss/train': 3.77634596824646} +07/25/2024 11:31:26 - INFO - __main__ - Step 1317: {'lr': 0.0004999789987334683, 'samples': 63216, 'steps': 1316, 'loss/train': 3.9574806690216064} +07/25/2024 11:31:26 - INFO - __main__ - Step 1318: {'lr': 0.0004999789304931509, 'samples': 63264, 'steps': 1317, 'loss/train': 3.8872339725494385} +07/25/2024 11:31:27 - INFO - __main__ - Step 1319: {'lr': 0.0004999788621421498, 'samples': 63312, 'steps': 1318, 'loss/train': 4.26023530960083} +07/25/2024 11:31:27 - INFO - __main__ - Step 1320: {'lr': 0.000499978793680465, 'samples': 63360, 'steps': 1319, 'loss/train': 3.8439154624938965} +07/25/2024 11:31:27 - INFO - __main__ - Step 1321: {'lr': 0.0004999787251080966, 'samples': 63408, 'steps': 1320, 'loss/train': 4.028916358947754} +07/25/2024 11:31:27 - INFO - __main__ - Step 1322: {'lr': 0.0004999786564250447, 'samples': 63456, 'steps': 1321, 'loss/train': 3.9328722953796387} +07/25/2024 11:31:28 - INFO - __main__ - Step 1323: {'lr': 0.0004999785876313092, 'samples': 63504, 'steps': 1322, 'loss/train': 4.226010322570801} +07/25/2024 11:31:28 - INFO - __main__ - Step 1324: {'lr': 0.0004999785187268901, 'samples': 63552, 'steps': 1323, 'loss/train': 3.575289249420166} +07/25/2024 11:31:28 - INFO - __main__ - Step 1325: {'lr': 0.0004999784497117877, 'samples': 63600, 'steps': 1324, 'loss/train': 3.5234947204589844} +07/25/2024 11:31:29 - INFO - __main__ - Step 1326: {'lr': 0.0004999783805860017, 'samples': 63648, 'steps': 1325, 'loss/train': 4.686971664428711} +07/25/2024 11:31:29 - INFO - __main__ - Step 1327: {'lr': 0.0004999783113495324, 'samples': 63696, 'steps': 1326, 'loss/train': 4.291503429412842} +07/25/2024 11:31:29 - INFO - __main__ - Step 1328: {'lr': 0.0004999782420023796, 'samples': 63744, 'steps': 1327, 'loss/train': 4.146937847137451} +07/25/2024 11:31:29 - INFO - __main__ - Step 1329: {'lr': 0.0004999781725445434, 'samples': 63792, 'steps': 1328, 'loss/train': 4.494054794311523} +07/25/2024 11:31:30 - INFO - __main__ - Step 1330: {'lr': 0.0004999781029760241, 'samples': 63840, 'steps': 1329, 'loss/train': 4.065125942230225} +07/25/2024 11:31:30 - INFO - __main__ - Step 1331: {'lr': 0.0004999780332968213, 'samples': 63888, 'steps': 1330, 'loss/train': 1.737822413444519} +07/25/2024 11:31:30 - INFO - __main__ - Step 1332: {'lr': 0.0004999779635069353, 'samples': 63936, 'steps': 1331, 'loss/train': 4.277159690856934} +07/25/2024 11:31:31 - INFO - __main__ - Step 1333: {'lr': 0.0004999778936063661, 'samples': 63984, 'steps': 1332, 'loss/train': 4.466935157775879} +07/25/2024 11:31:31 - INFO - __main__ - Step 1334: {'lr': 0.0004999778235951135, 'samples': 64032, 'steps': 1333, 'loss/train': 4.332859039306641} +07/25/2024 11:31:31 - INFO - __main__ - Step 1335: {'lr': 0.000499977753473178, 'samples': 64080, 'steps': 1334, 'loss/train': 4.242156505584717} +07/25/2024 11:31:31 - INFO - __main__ - Step 1336: {'lr': 0.0004999776832405592, 'samples': 64128, 'steps': 1335, 'loss/train': 4.1022562980651855} +07/25/2024 11:31:32 - INFO - __main__ - Step 1337: {'lr': 0.0004999776128972573, 'samples': 64176, 'steps': 1336, 'loss/train': 4.477460861206055} +07/25/2024 11:31:32 - INFO - __main__ - Step 1338: {'lr': 0.0004999775424432723, 'samples': 64224, 'steps': 1337, 'loss/train': 3.6581263542175293} +07/25/2024 11:31:32 - INFO - __main__ - Step 1339: {'lr': 0.0004999774718786042, 'samples': 64272, 'steps': 1338, 'loss/train': 4.490350246429443} +07/25/2024 11:31:33 - INFO - __main__ - Step 1340: {'lr': 0.0004999774012032532, 'samples': 64320, 'steps': 1339, 'loss/train': 3.1741011142730713} +07/25/2024 11:31:33 - INFO - __main__ - Step 1341: {'lr': 0.0004999773304172192, 'samples': 64368, 'steps': 1340, 'loss/train': 3.9760992527008057} +07/25/2024 11:31:33 - INFO - __main__ - Step 1342: {'lr': 0.0004999772595205022, 'samples': 64416, 'steps': 1341, 'loss/train': 4.114334583282471} +07/25/2024 11:31:33 - INFO - __main__ - Step 1343: {'lr': 0.0004999771885131022, 'samples': 64464, 'steps': 1342, 'loss/train': 3.7968685626983643} +07/25/2024 11:31:34 - INFO - __main__ - Step 1344: {'lr': 0.0004999771173950194, 'samples': 64512, 'steps': 1343, 'loss/train': 3.946321725845337} +07/25/2024 11:31:34 - INFO - __main__ - Step 1345: {'lr': 0.0004999770461662537, 'samples': 64560, 'steps': 1344, 'loss/train': 3.85030460357666} +07/25/2024 11:31:34 - INFO - __main__ - Step 1346: {'lr': 0.0004999769748268052, 'samples': 64608, 'steps': 1345, 'loss/train': 4.466180801391602} +07/25/2024 11:31:35 - INFO - __main__ - Step 1347: {'lr': 0.0004999769033766739, 'samples': 64656, 'steps': 1346, 'loss/train': 3.584890604019165} +07/25/2024 11:31:35 - INFO - __main__ - Step 1348: {'lr': 0.0004999768318158599, 'samples': 64704, 'steps': 1347, 'loss/train': 4.898392200469971} +07/25/2024 11:31:35 - INFO - __main__ - Step 1349: {'lr': 0.000499976760144363, 'samples': 64752, 'steps': 1348, 'loss/train': 4.766510009765625} +07/25/2024 11:31:35 - INFO - __main__ - Step 1350: {'lr': 0.0004999766883621835, 'samples': 64800, 'steps': 1349, 'loss/train': 4.1659650802612305} +07/25/2024 11:31:36 - INFO - __main__ - Step 1351: {'lr': 0.0004999766164693213, 'samples': 64848, 'steps': 1350, 'loss/train': 3.4473938941955566} +07/25/2024 11:31:36 - INFO - __main__ - Step 1352: {'lr': 0.0004999765444657765, 'samples': 64896, 'steps': 1351, 'loss/train': 4.601259708404541} +07/25/2024 11:31:36 - INFO - __main__ - Step 1353: {'lr': 0.000499976472351549, 'samples': 64944, 'steps': 1352, 'loss/train': 4.184303283691406} +07/25/2024 11:31:37 - INFO - __main__ - Step 1354: {'lr': 0.0004999764001266389, 'samples': 64992, 'steps': 1353, 'loss/train': 4.729404926300049} +07/25/2024 11:31:37 - INFO - __main__ - Step 1355: {'lr': 0.0004999763277910464, 'samples': 65040, 'steps': 1354, 'loss/train': 1.7463831901550293} +07/25/2024 11:31:37 - INFO - __main__ - Step 1356: {'lr': 0.0004999762553447713, 'samples': 65088, 'steps': 1355, 'loss/train': 1.640607237815857} +07/25/2024 11:31:37 - INFO - __main__ - Step 1357: {'lr': 0.0004999761827878138, 'samples': 65136, 'steps': 1356, 'loss/train': 4.087022304534912} +07/25/2024 11:31:38 - INFO - __main__ - Step 1358: {'lr': 0.0004999761101201738, 'samples': 65184, 'steps': 1357, 'loss/train': 3.949892282485962} +07/25/2024 11:31:38 - INFO - __main__ - Step 1359: {'lr': 0.0004999760373418512, 'samples': 65232, 'steps': 1358, 'loss/train': 4.4999542236328125} +07/25/2024 11:31:38 - INFO - __main__ - Step 1360: {'lr': 0.0004999759644528464, 'samples': 65280, 'steps': 1359, 'loss/train': 4.190948486328125} +07/25/2024 11:31:38 - INFO - __main__ - Step 1361: {'lr': 0.0004999758914531593, 'samples': 65328, 'steps': 1360, 'loss/train': 4.724259853363037} +07/25/2024 11:31:39 - INFO - __main__ - Step 1362: {'lr': 0.0004999758183427897, 'samples': 65376, 'steps': 1361, 'loss/train': 3.1196188926696777} +07/25/2024 11:31:39 - INFO - __main__ - Step 1363: {'lr': 0.000499975745121738, 'samples': 65424, 'steps': 1362, 'loss/train': 4.201485633850098} +07/25/2024 11:31:39 - INFO - __main__ - Step 1364: {'lr': 0.000499975671790004, 'samples': 65472, 'steps': 1363, 'loss/train': 4.346766471862793} +07/25/2024 11:31:40 - INFO - __main__ - Step 1365: {'lr': 0.0004999755983475877, 'samples': 65520, 'steps': 1364, 'loss/train': 4.311817646026611} +07/25/2024 11:31:40 - INFO - __main__ - Step 1366: {'lr': 0.0004999755247944893, 'samples': 65568, 'steps': 1365, 'loss/train': 4.855163097381592} +07/25/2024 11:31:40 - INFO - __main__ - Step 1367: {'lr': 0.0004999754511307087, 'samples': 65616, 'steps': 1366, 'loss/train': 4.199634552001953} +07/25/2024 11:31:40 - INFO - __main__ - Step 1368: {'lr': 0.0004999753773562459, 'samples': 65664, 'steps': 1367, 'loss/train': 4.207736492156982} +07/25/2024 11:31:41 - INFO - __main__ - Step 1369: {'lr': 0.0004999753034711011, 'samples': 65712, 'steps': 1368, 'loss/train': 4.062959671020508} +07/25/2024 11:31:41 - INFO - __main__ - Step 1370: {'lr': 0.0004999752294752742, 'samples': 65760, 'steps': 1369, 'loss/train': 4.4023213386535645} +07/25/2024 11:31:41 - INFO - __main__ - Step 1371: {'lr': 0.0004999751553687652, 'samples': 65808, 'steps': 1370, 'loss/train': 4.318541526794434} +07/25/2024 11:31:42 - INFO - __main__ - Step 1372: {'lr': 0.0004999750811515743, 'samples': 65856, 'steps': 1371, 'loss/train': 4.509751319885254} +07/25/2024 11:31:42 - INFO - __main__ - Step 1373: {'lr': 0.0004999750068237015, 'samples': 65904, 'steps': 1372, 'loss/train': 4.685635089874268} +07/25/2024 11:31:42 - INFO - __main__ - Step 1374: {'lr': 0.0004999749323851467, 'samples': 65952, 'steps': 1373, 'loss/train': 4.115787506103516} +07/25/2024 11:31:42 - INFO - __main__ - Step 1375: {'lr': 0.00049997485783591, 'samples': 66000, 'steps': 1374, 'loss/train': 4.400790214538574} +07/25/2024 11:31:43 - INFO - __main__ - Step 1376: {'lr': 0.0004999747831759914, 'samples': 66048, 'steps': 1375, 'loss/train': 4.211608409881592} +07/25/2024 11:31:43 - INFO - __main__ - Step 1377: {'lr': 0.000499974708405391, 'samples': 66096, 'steps': 1376, 'loss/train': 4.1996870040893555} +07/25/2024 11:31:43 - INFO - __main__ - Step 1378: {'lr': 0.0004999746335241089, 'samples': 66144, 'steps': 1377, 'loss/train': 4.2249226570129395} +07/25/2024 11:31:44 - INFO - __main__ - Step 1379: {'lr': 0.0004999745585321449, 'samples': 66192, 'steps': 1378, 'loss/train': 1.7367351055145264} +07/25/2024 11:31:44 - INFO - __main__ - Step 1380: {'lr': 0.0004999744834294993, 'samples': 66240, 'steps': 1379, 'loss/train': 4.214527606964111} +07/25/2024 11:31:44 - INFO - __main__ - Step 1381: {'lr': 0.0004999744082161718, 'samples': 66288, 'steps': 1380, 'loss/train': 4.682741165161133} +07/25/2024 11:31:44 - INFO - __main__ - Step 1382: {'lr': 0.0004999743328921628, 'samples': 66336, 'steps': 1381, 'loss/train': 3.9636497497558594} +07/25/2024 11:31:45 - INFO - __main__ - Step 1383: {'lr': 0.0004999742574574722, 'samples': 66384, 'steps': 1382, 'loss/train': 3.4336040019989014} +07/25/2024 11:31:45 - INFO - __main__ - Step 1384: {'lr': 0.0004999741819120999, 'samples': 66432, 'steps': 1383, 'loss/train': 3.8941307067871094} +07/25/2024 11:31:45 - INFO - __main__ - Step 1385: {'lr': 0.000499974106256046, 'samples': 66480, 'steps': 1384, 'loss/train': 4.087078094482422} +07/25/2024 11:31:46 - INFO - __main__ - Step 1386: {'lr': 0.0004999740304893107, 'samples': 66528, 'steps': 1385, 'loss/train': 3.282548189163208} +07/25/2024 11:31:46 - INFO - __main__ - Step 1387: {'lr': 0.0004999739546118938, 'samples': 66576, 'steps': 1386, 'loss/train': 4.50581693649292} +07/25/2024 11:31:46 - INFO - __main__ - Step 1388: {'lr': 0.0004999738786237955, 'samples': 66624, 'steps': 1387, 'loss/train': 3.86824893951416} +07/25/2024 11:31:46 - INFO - __main__ - Step 1389: {'lr': 0.0004999738025250158, 'samples': 66672, 'steps': 1388, 'loss/train': 4.530158519744873} +07/25/2024 11:31:47 - INFO - __main__ - Step 1390: {'lr': 0.0004999737263155546, 'samples': 66720, 'steps': 1389, 'loss/train': 4.7715163230896} +07/25/2024 11:31:47 - INFO - __main__ - Step 1391: {'lr': 0.0004999736499954121, 'samples': 66768, 'steps': 1390, 'loss/train': 4.483738422393799} +07/25/2024 11:31:47 - INFO - __main__ - Step 1392: {'lr': 0.0004999735735645883, 'samples': 66816, 'steps': 1391, 'loss/train': 4.047937393188477} +07/25/2024 11:31:48 - INFO - __main__ - Step 1393: {'lr': 0.0004999734970230832, 'samples': 66864, 'steps': 1392, 'loss/train': 4.559358596801758} +07/25/2024 11:31:48 - INFO - __main__ - Step 1394: {'lr': 0.0004999734203708967, 'samples': 66912, 'steps': 1393, 'loss/train': 3.883598566055298} +07/25/2024 11:31:48 - INFO - __main__ - Step 1395: {'lr': 0.0004999733436080292, 'samples': 66960, 'steps': 1394, 'loss/train': 4.581500053405762} +07/25/2024 11:31:48 - INFO - __main__ - Step 1396: {'lr': 0.0004999732667344803, 'samples': 67008, 'steps': 1395, 'loss/train': 4.553986549377441} +07/25/2024 11:31:49 - INFO - __main__ - Step 1397: {'lr': 0.0004999731897502503, 'samples': 67056, 'steps': 1396, 'loss/train': 3.9598658084869385} +07/25/2024 11:31:49 - INFO - __main__ - Step 1398: {'lr': 0.0004999731126553393, 'samples': 67104, 'steps': 1397, 'loss/train': 3.6443445682525635} +07/25/2024 11:31:49 - INFO - __main__ - Step 1399: {'lr': 0.000499973035449747, 'samples': 67152, 'steps': 1398, 'loss/train': 3.192187547683716} +07/25/2024 11:31:50 - INFO - __main__ - Step 1400: {'lr': 0.0004999729581334738, 'samples': 67200, 'steps': 1399, 'loss/train': 3.17726731300354} +07/25/2024 11:31:50 - INFO - __main__ - Step 1401: {'lr': 0.0004999728807065195, 'samples': 67248, 'steps': 1400, 'loss/train': 4.270052433013916} +07/25/2024 11:31:50 - INFO - __main__ - Step 1402: {'lr': 0.0004999728031688844, 'samples': 67296, 'steps': 1401, 'loss/train': 3.972468852996826} +07/25/2024 11:31:50 - INFO - __main__ - Step 1403: {'lr': 0.0004999727255205681, 'samples': 67344, 'steps': 1402, 'loss/train': 1.3392481803894043} +07/25/2024 11:31:51 - INFO - __main__ - Step 1404: {'lr': 0.0004999726477615711, 'samples': 67392, 'steps': 1403, 'loss/train': 4.548022747039795} +07/25/2024 11:31:51 - INFO - __main__ - Step 1405: {'lr': 0.000499972569891893, 'samples': 67440, 'steps': 1404, 'loss/train': 3.9559359550476074} +07/25/2024 11:31:51 - INFO - __main__ - Step 1406: {'lr': 0.0004999724919115342, 'samples': 67488, 'steps': 1405, 'loss/train': 4.390100955963135} +07/25/2024 11:31:52 - INFO - __main__ - Step 1407: {'lr': 0.0004999724138204945, 'samples': 67536, 'steps': 1406, 'loss/train': 3.5757808685302734} +07/25/2024 11:31:52 - INFO - __main__ - Step 1408: {'lr': 0.000499972335618774, 'samples': 67584, 'steps': 1407, 'loss/train': 3.4588284492492676} +07/25/2024 11:31:52 - INFO - __main__ - Step 1409: {'lr': 0.0004999722573063728, 'samples': 67632, 'steps': 1408, 'loss/train': 4.25380277633667} +07/25/2024 11:31:52 - INFO - __main__ - Step 1410: {'lr': 0.0004999721788832909, 'samples': 67680, 'steps': 1409, 'loss/train': 3.3130905628204346} +07/25/2024 11:31:53 - INFO - __main__ - Step 1411: {'lr': 0.0004999721003495284, 'samples': 67728, 'steps': 1410, 'loss/train': 4.624722003936768} +07/25/2024 11:31:53 - INFO - __main__ - Step 1412: {'lr': 0.0004999720217050851, 'samples': 67776, 'steps': 1411, 'loss/train': 4.157478332519531} +07/25/2024 11:31:53 - INFO - __main__ - Step 1413: {'lr': 0.0004999719429499613, 'samples': 67824, 'steps': 1412, 'loss/train': 4.446649551391602} +07/25/2024 11:31:54 - INFO - __main__ - Step 1414: {'lr': 0.0004999718640841569, 'samples': 67872, 'steps': 1413, 'loss/train': 4.3528337478637695} +07/25/2024 11:31:54 - INFO - __main__ - Step 1415: {'lr': 0.0004999717851076719, 'samples': 67920, 'steps': 1414, 'loss/train': 4.955925941467285} +07/25/2024 11:31:54 - INFO - __main__ - Step 1416: {'lr': 0.0004999717060205065, 'samples': 67968, 'steps': 1415, 'loss/train': 3.189103364944458} +07/25/2024 11:31:54 - INFO - __main__ - Step 1417: {'lr': 0.0004999716268226606, 'samples': 68016, 'steps': 1416, 'loss/train': 4.477800369262695} +07/25/2024 11:31:55 - INFO - __main__ - Step 1418: {'lr': 0.0004999715475141342, 'samples': 68064, 'steps': 1417, 'loss/train': 3.970767021179199} +07/25/2024 11:31:55 - INFO - __main__ - Step 1419: {'lr': 0.0004999714680949275, 'samples': 68112, 'steps': 1418, 'loss/train': 4.009771347045898} +07/25/2024 11:31:55 - INFO - __main__ - Step 1420: {'lr': 0.0004999713885650405, 'samples': 68160, 'steps': 1419, 'loss/train': 4.194433212280273} +07/25/2024 11:31:56 - INFO - __main__ - Step 1421: {'lr': 0.0004999713089244729, 'samples': 68208, 'steps': 1420, 'loss/train': 4.310133934020996} +07/25/2024 11:31:56 - INFO - __main__ - Step 1422: {'lr': 0.0004999712291732252, 'samples': 68256, 'steps': 1421, 'loss/train': 2.405320167541504} +07/25/2024 11:31:56 - INFO - __main__ - Step 1423: {'lr': 0.0004999711493112973, 'samples': 68304, 'steps': 1422, 'loss/train': 4.482324600219727} +07/25/2024 11:31:56 - INFO - __main__ - Step 1424: {'lr': 0.0004999710693386891, 'samples': 68352, 'steps': 1423, 'loss/train': 4.156941890716553} +07/25/2024 11:31:57 - INFO - __main__ - Step 1425: {'lr': 0.0004999709892554007, 'samples': 68400, 'steps': 1424, 'loss/train': 4.378364562988281} +07/25/2024 11:31:57 - INFO - __main__ - Step 1426: {'lr': 0.0004999709090614321, 'samples': 68448, 'steps': 1425, 'loss/train': 3.9854142665863037} +07/25/2024 11:31:57 - INFO - __main__ - Step 1427: {'lr': 0.0004999708287567834, 'samples': 68496, 'steps': 1426, 'loss/train': 1.4170074462890625} +07/25/2024 11:31:58 - INFO - __main__ - Step 1428: {'lr': 0.0004999707483414547, 'samples': 68544, 'steps': 1427, 'loss/train': 4.274746417999268} +07/25/2024 11:31:58 - INFO - __main__ - Step 1429: {'lr': 0.0004999706678154459, 'samples': 68592, 'steps': 1428, 'loss/train': 3.669224262237549} +07/25/2024 11:31:58 - INFO - __main__ - Step 1430: {'lr': 0.0004999705871787572, 'samples': 68640, 'steps': 1429, 'loss/train': 4.274938583374023} +07/25/2024 11:31:58 - INFO - __main__ - Step 1431: {'lr': 0.0004999705064313884, 'samples': 68688, 'steps': 1430, 'loss/train': 4.187230587005615} +07/25/2024 11:31:59 - INFO - __main__ - Step 1432: {'lr': 0.0004999704255733398, 'samples': 68736, 'steps': 1431, 'loss/train': 3.4641456604003906} +07/25/2024 11:31:59 - INFO - __main__ - Step 1433: {'lr': 0.0004999703446046112, 'samples': 68784, 'steps': 1432, 'loss/train': 4.058530330657959} +07/25/2024 11:31:59 - INFO - __main__ - Step 1434: {'lr': 0.0004999702635252027, 'samples': 68832, 'steps': 1433, 'loss/train': 4.257251739501953} +07/25/2024 11:32:00 - INFO - __main__ - Step 1435: {'lr': 0.0004999701823351145, 'samples': 68880, 'steps': 1434, 'loss/train': 4.3701395988464355} +07/25/2024 11:32:00 - INFO - __main__ - Step 1436: {'lr': 0.0004999701010343463, 'samples': 68928, 'steps': 1435, 'loss/train': 4.322348594665527} +07/25/2024 11:32:00 - INFO - __main__ - Step 1437: {'lr': 0.0004999700196228985, 'samples': 68976, 'steps': 1436, 'loss/train': 3.772981882095337} +07/25/2024 11:32:00 - INFO - __main__ - Step 1438: {'lr': 0.0004999699381007711, 'samples': 69024, 'steps': 1437, 'loss/train': 4.5250983238220215} +07/25/2024 11:32:01 - INFO - __main__ - Step 1439: {'lr': 0.0004999698564679638, 'samples': 69072, 'steps': 1438, 'loss/train': 3.7300825119018555} +07/25/2024 11:32:01 - INFO - __main__ - Step 1440: {'lr': 0.0004999697747244769, 'samples': 69120, 'steps': 1439, 'loss/train': 2.2643065452575684} +07/25/2024 11:32:01 - INFO - __main__ - Step 1441: {'lr': 0.0004999696928703105, 'samples': 69168, 'steps': 1440, 'loss/train': 2.694937229156494} +07/25/2024 11:32:02 - INFO - __main__ - Step 1442: {'lr': 0.0004999696109054643, 'samples': 69216, 'steps': 1441, 'loss/train': 3.884852170944214} +07/25/2024 11:32:02 - INFO - __main__ - Step 1443: {'lr': 0.0004999695288299388, 'samples': 69264, 'steps': 1442, 'loss/train': 3.878967761993408} +07/25/2024 11:32:02 - INFO - __main__ - Step 1444: {'lr': 0.0004999694466437336, 'samples': 69312, 'steps': 1443, 'loss/train': 4.063049793243408} +07/25/2024 11:32:02 - INFO - __main__ - Step 1445: {'lr': 0.000499969364346849, 'samples': 69360, 'steps': 1444, 'loss/train': 3.987844467163086} +07/25/2024 11:32:03 - INFO - __main__ - Step 1446: {'lr': 0.000499969281939285, 'samples': 69408, 'steps': 1445, 'loss/train': 1.102113127708435} +07/25/2024 11:32:03 - INFO - __main__ - Step 1447: {'lr': 0.0004999691994210416, 'samples': 69456, 'steps': 1446, 'loss/train': 3.9236257076263428} +07/25/2024 11:32:03 - INFO - __main__ - Step 1448: {'lr': 0.0004999691167921189, 'samples': 69504, 'steps': 1447, 'loss/train': 3.890302896499634} +07/25/2024 11:32:04 - INFO - __main__ - Step 1449: {'lr': 0.0004999690340525168, 'samples': 69552, 'steps': 1448, 'loss/train': 3.7219395637512207} +07/25/2024 11:32:04 - INFO - __main__ - Step 1450: {'lr': 0.0004999689512022354, 'samples': 69600, 'steps': 1449, 'loss/train': 3.992295026779175} +07/25/2024 11:32:04 - INFO - __main__ - Step 1451: {'lr': 0.0004999688682412748, 'samples': 69648, 'steps': 1450, 'loss/train': 1.3913110494613647} +07/25/2024 11:32:04 - INFO - __main__ - Step 1452: {'lr': 0.000499968785169635, 'samples': 69696, 'steps': 1451, 'loss/train': 3.8865177631378174} +07/25/2024 11:32:05 - INFO - __main__ - Step 1453: {'lr': 0.000499968701987316, 'samples': 69744, 'steps': 1452, 'loss/train': 4.073939323425293} +07/25/2024 11:32:05 - INFO - __main__ - Step 1454: {'lr': 0.0004999686186943178, 'samples': 69792, 'steps': 1453, 'loss/train': 3.7361464500427246} +07/25/2024 11:32:05 - INFO - __main__ - Step 1455: {'lr': 0.0004999685352906405, 'samples': 69840, 'steps': 1454, 'loss/train': 4.051283836364746} +07/25/2024 11:32:06 - INFO - __main__ - Step 1456: {'lr': 0.0004999684517762843, 'samples': 69888, 'steps': 1455, 'loss/train': 3.4574990272521973} +07/25/2024 11:32:06 - INFO - __main__ - Step 1457: {'lr': 0.000499968368151249, 'samples': 69936, 'steps': 1456, 'loss/train': 3.9281225204467773} +07/25/2024 11:32:06 - INFO - __main__ - Step 1458: {'lr': 0.0004999682844155347, 'samples': 69984, 'steps': 1457, 'loss/train': 3.888486623764038} +07/25/2024 11:32:06 - INFO - __main__ - Step 1459: {'lr': 0.0004999682005691414, 'samples': 70032, 'steps': 1458, 'loss/train': 4.035988807678223} +07/25/2024 11:32:07 - INFO - __main__ - Step 1460: {'lr': 0.0004999681166120692, 'samples': 70080, 'steps': 1459, 'loss/train': 4.239274501800537} +07/25/2024 11:32:07 - INFO - __main__ - Step 1461: {'lr': 0.0004999680325443181, 'samples': 70128, 'steps': 1460, 'loss/train': 4.021892547607422} +07/25/2024 11:32:07 - INFO - __main__ - Step 1462: {'lr': 0.0004999679483658882, 'samples': 70176, 'steps': 1461, 'loss/train': 4.274096965789795} +07/25/2024 11:32:08 - INFO - __main__ - Step 1463: {'lr': 0.0004999678640767796, 'samples': 70224, 'steps': 1462, 'loss/train': 3.8906683921813965} +07/25/2024 11:32:08 - INFO - __main__ - Step 1464: {'lr': 0.0004999677796769921, 'samples': 70272, 'steps': 1463, 'loss/train': 3.586780071258545} +07/25/2024 11:32:08 - INFO - __main__ - Step 1465: {'lr': 0.0004999676951665258, 'samples': 70320, 'steps': 1464, 'loss/train': 3.6978416442871094} +07/25/2024 11:32:08 - INFO - __main__ - Step 1466: {'lr': 0.000499967610545381, 'samples': 70368, 'steps': 1465, 'loss/train': 4.234058856964111} +07/25/2024 11:32:09 - INFO - __main__ - Step 1467: {'lr': 0.0004999675258135574, 'samples': 70416, 'steps': 1466, 'loss/train': 3.9442427158355713} +07/25/2024 11:32:09 - INFO - __main__ - Step 1468: {'lr': 0.0004999674409710553, 'samples': 70464, 'steps': 1467, 'loss/train': 2.4456350803375244} +07/25/2024 11:32:09 - INFO - __main__ - Step 1469: {'lr': 0.0004999673560178745, 'samples': 70512, 'steps': 1468, 'loss/train': 4.382287502288818} +07/25/2024 11:32:10 - INFO - __main__ - Step 1470: {'lr': 0.0004999672709540152, 'samples': 70560, 'steps': 1469, 'loss/train': 0.9246193170547485} +07/25/2024 11:32:10 - INFO - __main__ - Step 1471: {'lr': 0.0004999671857794774, 'samples': 70608, 'steps': 1470, 'loss/train': 3.6420035362243652} +07/25/2024 11:32:10 - INFO - __main__ - Step 1472: {'lr': 0.0004999671004942612, 'samples': 70656, 'steps': 1471, 'loss/train': 4.0850934982299805} +07/25/2024 11:32:10 - INFO - __main__ - Step 1473: {'lr': 0.0004999670150983665, 'samples': 70704, 'steps': 1472, 'loss/train': 4.3606085777282715} +07/25/2024 11:32:11 - INFO - __main__ - Step 1474: {'lr': 0.0004999669295917934, 'samples': 70752, 'steps': 1473, 'loss/train': 4.199680328369141} +07/25/2024 11:32:11 - INFO - __main__ - Step 1475: {'lr': 0.000499966843974542, 'samples': 70800, 'steps': 1474, 'loss/train': 3.7796781063079834} +07/25/2024 11:32:11 - INFO - __main__ - Step 1476: {'lr': 0.0004999667582466123, 'samples': 70848, 'steps': 1475, 'loss/train': 4.128585338592529} +07/25/2024 11:32:11 - INFO - __main__ - Step 1477: {'lr': 0.0004999666724080043, 'samples': 70896, 'steps': 1476, 'loss/train': 5.017418384552002} +07/25/2024 11:32:12 - INFO - __main__ - Step 1478: {'lr': 0.000499966586458718, 'samples': 70944, 'steps': 1477, 'loss/train': 4.042722225189209} +07/25/2024 11:32:12 - INFO - __main__ - Step 1479: {'lr': 0.0004999665003987536, 'samples': 70992, 'steps': 1478, 'loss/train': 4.445557117462158} +07/25/2024 11:32:12 - INFO - __main__ - Step 1480: {'lr': 0.0004999664142281109, 'samples': 71040, 'steps': 1479, 'loss/train': 3.2472188472747803} +07/25/2024 11:32:13 - INFO - __main__ - Step 1481: {'lr': 0.0004999663279467902, 'samples': 71088, 'steps': 1480, 'loss/train': 4.693428993225098} +07/25/2024 11:32:13 - INFO - __main__ - Step 1482: {'lr': 0.0004999662415547914, 'samples': 71136, 'steps': 1481, 'loss/train': 3.9202821254730225} +07/25/2024 11:32:13 - INFO - __main__ - Step 1483: {'lr': 0.0004999661550521145, 'samples': 71184, 'steps': 1482, 'loss/train': 3.864898681640625} +07/25/2024 11:32:13 - INFO - __main__ - Step 1484: {'lr': 0.0004999660684387596, 'samples': 71232, 'steps': 1483, 'loss/train': 4.207211494445801} +07/25/2024 11:32:14 - INFO - __main__ - Step 1485: {'lr': 0.0004999659817147268, 'samples': 71280, 'steps': 1484, 'loss/train': 3.4509027004241943} +07/25/2024 11:32:14 - INFO - __main__ - Step 1486: {'lr': 0.000499965894880016, 'samples': 71328, 'steps': 1485, 'loss/train': 4.071646213531494} +07/25/2024 11:32:14 - INFO - __main__ - Step 1487: {'lr': 0.0004999658079346273, 'samples': 71376, 'steps': 1486, 'loss/train': 3.7366654872894287} +07/25/2024 11:32:15 - INFO - __main__ - Step 1488: {'lr': 0.0004999657208785607, 'samples': 71424, 'steps': 1487, 'loss/train': 3.259117364883423} +07/25/2024 11:32:15 - INFO - __main__ - Step 1489: {'lr': 0.0004999656337118163, 'samples': 71472, 'steps': 1488, 'loss/train': 4.269411087036133} +07/25/2024 11:32:15 - INFO - __main__ - Step 1490: {'lr': 0.0004999655464343943, 'samples': 71520, 'steps': 1489, 'loss/train': 4.223050594329834} +07/25/2024 11:32:15 - INFO - __main__ - Step 1491: {'lr': 0.0004999654590462945, 'samples': 71568, 'steps': 1490, 'loss/train': 3.872130870819092} +07/25/2024 11:32:16 - INFO - __main__ - Step 1492: {'lr': 0.0004999653715475168, 'samples': 71616, 'steps': 1491, 'loss/train': 2.672393560409546} +07/25/2024 11:32:16 - INFO - __main__ - Step 1493: {'lr': 0.0004999652839380615, 'samples': 71664, 'steps': 1492, 'loss/train': 3.794172525405884} +07/25/2024 11:32:16 - INFO - __main__ - Step 1494: {'lr': 0.0004999651962179287, 'samples': 71712, 'steps': 1493, 'loss/train': 1.208966851234436} +07/25/2024 11:32:17 - INFO - __main__ - Step 1495: {'lr': 0.0004999651083871183, 'samples': 71760, 'steps': 1494, 'loss/train': 4.298748970031738} +07/25/2024 11:32:17 - INFO - __main__ - Step 1496: {'lr': 0.0004999650204456303, 'samples': 71808, 'steps': 1495, 'loss/train': 3.8231394290924072} +07/25/2024 11:32:17 - INFO - __main__ - Step 1497: {'lr': 0.0004999649323934647, 'samples': 71856, 'steps': 1496, 'loss/train': 3.8445699214935303} +07/25/2024 11:32:17 - INFO - __main__ - Step 1498: {'lr': 0.0004999648442306218, 'samples': 71904, 'steps': 1497, 'loss/train': 4.696833610534668} +07/25/2024 11:32:18 - INFO - __main__ - Step 1499: {'lr': 0.0004999647559571015, 'samples': 71952, 'steps': 1498, 'loss/train': 4.433712005615234} +07/25/2024 11:32:18 - INFO - __main__ - Step 1500: {'lr': 0.0004999646675729036, 'samples': 72000, 'steps': 1499, 'loss/train': 4.143982887268066} +07/25/2024 11:32:18 - INFO - __main__ - Step 1501: {'lr': 0.0004999645790780285, 'samples': 72048, 'steps': 1500, 'loss/train': 4.402446746826172} +07/25/2024 11:32:19 - INFO - __main__ - Step 1502: {'lr': 0.0004999644904724761, 'samples': 72096, 'steps': 1501, 'loss/train': 3.7183687686920166} +07/25/2024 11:32:19 - INFO - __main__ - Step 1503: {'lr': 0.0004999644017562463, 'samples': 72144, 'steps': 1502, 'loss/train': 3.7970240116119385} +07/25/2024 11:32:19 - INFO - __main__ - Step 1504: {'lr': 0.0004999643129293393, 'samples': 72192, 'steps': 1503, 'loss/train': 3.776841402053833} +07/25/2024 11:32:19 - INFO - __main__ - Step 1505: {'lr': 0.0004999642239917553, 'samples': 72240, 'steps': 1504, 'loss/train': 4.002180099487305} +07/25/2024 11:32:20 - INFO - __main__ - Step 1506: {'lr': 0.0004999641349434939, 'samples': 72288, 'steps': 1505, 'loss/train': 1.9014101028442383} +07/25/2024 11:32:20 - INFO - __main__ - Step 1507: {'lr': 0.0004999640457845555, 'samples': 72336, 'steps': 1506, 'loss/train': 4.136569023132324} +07/25/2024 11:32:20 - INFO - __main__ - Step 1508: {'lr': 0.0004999639565149399, 'samples': 72384, 'steps': 1507, 'loss/train': 3.5612270832061768} +07/25/2024 11:32:21 - INFO - __main__ - Step 1509: {'lr': 0.0004999638671346475, 'samples': 72432, 'steps': 1508, 'loss/train': 3.489406108856201} +07/25/2024 11:32:21 - INFO - __main__ - Step 1510: {'lr': 0.0004999637776436779, 'samples': 72480, 'steps': 1509, 'loss/train': 3.6313750743865967} +07/25/2024 11:32:21 - INFO - __main__ - Step 1511: {'lr': 0.0004999636880420314, 'samples': 72528, 'steps': 1510, 'loss/train': 4.451263427734375} +07/25/2024 11:32:21 - INFO - __main__ - Step 1512: {'lr': 0.000499963598329708, 'samples': 72576, 'steps': 1511, 'loss/train': 3.888744831085205} +07/25/2024 11:32:22 - INFO - __main__ - Step 1513: {'lr': 0.0004999635085067078, 'samples': 72624, 'steps': 1512, 'loss/train': 4.229689598083496} +07/25/2024 11:32:22 - INFO - __main__ - Step 1514: {'lr': 0.0004999634185730306, 'samples': 72672, 'steps': 1513, 'loss/train': 3.9488413333892822} +07/25/2024 11:32:22 - INFO - __main__ - Step 1515: {'lr': 0.0004999633285286768, 'samples': 72720, 'steps': 1514, 'loss/train': 3.9330856800079346} +07/25/2024 11:32:23 - INFO - __main__ - Step 1516: {'lr': 0.0004999632383736461, 'samples': 72768, 'steps': 1515, 'loss/train': 3.405743360519409} +07/25/2024 11:32:23 - INFO - __main__ - Step 1517: {'lr': 0.0004999631481079386, 'samples': 72816, 'steps': 1516, 'loss/train': 3.888577938079834} +07/25/2024 11:32:23 - INFO - __main__ - Step 1518: {'lr': 0.0004999630577315546, 'samples': 72864, 'steps': 1517, 'loss/train': 1.0244977474212646} +07/25/2024 11:32:23 - INFO - __main__ - Step 1519: {'lr': 0.0004999629672444939, 'samples': 72912, 'steps': 1518, 'loss/train': 4.057705402374268} +07/25/2024 11:32:24 - INFO - __main__ - Step 1520: {'lr': 0.0004999628766467566, 'samples': 72960, 'steps': 1519, 'loss/train': 4.503059387207031} +07/25/2024 11:32:24 - INFO - __main__ - Step 1521: {'lr': 0.0004999627859383428, 'samples': 73008, 'steps': 1520, 'loss/train': 3.5277299880981445} +07/25/2024 11:32:24 - INFO - __main__ - Step 1522: {'lr': 0.0004999626951192524, 'samples': 73056, 'steps': 1521, 'loss/train': 3.6730616092681885} +07/25/2024 11:32:25 - INFO - __main__ - Step 1523: {'lr': 0.0004999626041894855, 'samples': 73104, 'steps': 1522, 'loss/train': 4.048335552215576} +07/25/2024 11:32:25 - INFO - __main__ - Step 1524: {'lr': 0.0004999625131490422, 'samples': 73152, 'steps': 1523, 'loss/train': 4.632394790649414} +07/25/2024 11:32:25 - INFO - __main__ - Step 1525: {'lr': 0.0004999624219979226, 'samples': 73200, 'steps': 1524, 'loss/train': 3.7944233417510986} +07/25/2024 11:32:25 - INFO - __main__ - Step 1526: {'lr': 0.0004999623307361265, 'samples': 73248, 'steps': 1525, 'loss/train': 4.053163051605225} +07/25/2024 11:32:26 - INFO - __main__ - Step 1527: {'lr': 0.0004999622393636542, 'samples': 73296, 'steps': 1526, 'loss/train': 3.7443089485168457} +07/25/2024 11:32:26 - INFO - __main__ - Step 1528: {'lr': 0.0004999621478805054, 'samples': 73344, 'steps': 1527, 'loss/train': 3.3269195556640625} +07/25/2024 11:32:26 - INFO - __main__ - Step 1529: {'lr': 0.0004999620562866806, 'samples': 73392, 'steps': 1528, 'loss/train': 3.910510301589966} +07/25/2024 11:32:27 - INFO - __main__ - Step 1530: {'lr': 0.0004999619645821796, 'samples': 73440, 'steps': 1529, 'loss/train': 3.95631742477417} +07/25/2024 11:32:27 - INFO - __main__ - Step 1531: {'lr': 0.0004999618727670024, 'samples': 73488, 'steps': 1530, 'loss/train': 3.6291286945343018} +07/25/2024 11:32:27 - INFO - __main__ - Step 1532: {'lr': 0.000499961780841149, 'samples': 73536, 'steps': 1531, 'loss/train': 4.267503261566162} +07/25/2024 11:32:27 - INFO - __main__ - Step 1533: {'lr': 0.0004999616888046196, 'samples': 73584, 'steps': 1532, 'loss/train': 4.022336959838867} +07/25/2024 11:32:28 - INFO - __main__ - Step 1534: {'lr': 0.0004999615966574142, 'samples': 73632, 'steps': 1533, 'loss/train': 3.6660497188568115} +07/25/2024 11:32:28 - INFO - __main__ - Step 1535: {'lr': 0.0004999615043995327, 'samples': 73680, 'steps': 1534, 'loss/train': 3.4425692558288574} +07/25/2024 11:32:28 - INFO - __main__ - Step 1536: {'lr': 0.0004999614120309754, 'samples': 73728, 'steps': 1535, 'loss/train': 4.431183815002441} +07/25/2024 11:32:29 - INFO - __main__ - Step 1537: {'lr': 0.000499961319551742, 'samples': 73776, 'steps': 1536, 'loss/train': 4.095093727111816} +07/25/2024 11:32:29 - INFO - __main__ - Step 1538: {'lr': 0.0004999612269618328, 'samples': 73824, 'steps': 1537, 'loss/train': 4.12827205657959} +07/25/2024 11:32:29 - INFO - __main__ - Step 1539: {'lr': 0.0004999611342612479, 'samples': 73872, 'steps': 1538, 'loss/train': 3.7766799926757812} +07/25/2024 11:32:29 - INFO - __main__ - Step 1540: {'lr': 0.0004999610414499872, 'samples': 73920, 'steps': 1539, 'loss/train': 3.903590679168701} +07/25/2024 11:32:30 - INFO - __main__ - Step 1541: {'lr': 0.0004999609485280506, 'samples': 73968, 'steps': 1540, 'loss/train': 3.8582890033721924} +07/25/2024 11:32:30 - INFO - __main__ - Step 1542: {'lr': 0.0004999608554954383, 'samples': 74016, 'steps': 1541, 'loss/train': 0.9603809118270874} +07/25/2024 11:32:30 - INFO - __main__ - Step 1543: {'lr': 0.0004999607623521505, 'samples': 74064, 'steps': 1542, 'loss/train': 3.8861796855926514} +07/25/2024 11:32:31 - INFO - __main__ - Step 1544: {'lr': 0.0004999606690981869, 'samples': 74112, 'steps': 1543, 'loss/train': 4.085121154785156} +07/25/2024 11:32:31 - INFO - __main__ - Step 1545: {'lr': 0.0004999605757335479, 'samples': 74160, 'steps': 1544, 'loss/train': 2.173691511154175} +07/25/2024 11:32:31 - INFO - __main__ - Step 1546: {'lr': 0.0004999604822582332, 'samples': 74208, 'steps': 1545, 'loss/train': 4.093739032745361} +07/25/2024 11:32:31 - INFO - __main__ - Step 1547: {'lr': 0.0004999603886722432, 'samples': 74256, 'steps': 1546, 'loss/train': 4.539936542510986} +07/25/2024 11:32:32 - INFO - __main__ - Step 1548: {'lr': 0.0004999602949755775, 'samples': 74304, 'steps': 1547, 'loss/train': 4.189426898956299} +07/25/2024 11:32:32 - INFO - __main__ - Step 1549: {'lr': 0.0004999602011682365, 'samples': 74352, 'steps': 1548, 'loss/train': 3.6450183391571045} +07/25/2024 11:32:32 - INFO - __main__ - Step 1550: {'lr': 0.0004999601072502201, 'samples': 74400, 'steps': 1549, 'loss/train': 4.722991466522217} +07/25/2024 11:32:33 - INFO - __main__ - Step 1551: {'lr': 0.0004999600132215285, 'samples': 74448, 'steps': 1550, 'loss/train': 3.901273012161255} +07/25/2024 11:32:33 - INFO - __main__ - Step 1552: {'lr': 0.0004999599190821614, 'samples': 74496, 'steps': 1551, 'loss/train': 3.0019543170928955} +07/25/2024 11:32:33 - INFO - __main__ - Step 1553: {'lr': 0.0004999598248321192, 'samples': 74544, 'steps': 1552, 'loss/train': 3.688289165496826} +07/25/2024 11:32:33 - INFO - __main__ - Step 1554: {'lr': 0.0004999597304714018, 'samples': 74592, 'steps': 1553, 'loss/train': 4.223228931427002} +07/25/2024 11:32:34 - INFO - __main__ - Step 1555: {'lr': 0.0004999596360000092, 'samples': 74640, 'steps': 1554, 'loss/train': 4.533329010009766} +07/25/2024 11:32:34 - INFO - __main__ - Step 1556: {'lr': 0.0004999595414179414, 'samples': 74688, 'steps': 1555, 'loss/train': 4.205251216888428} +07/25/2024 11:32:34 - INFO - __main__ - Step 1557: {'lr': 0.0004999594467251985, 'samples': 74736, 'steps': 1556, 'loss/train': 3.692619562149048} +07/25/2024 11:32:35 - INFO - __main__ - Step 1558: {'lr': 0.0004999593519217808, 'samples': 74784, 'steps': 1557, 'loss/train': 3.8587698936462402} +07/25/2024 11:32:35 - INFO - __main__ - Step 1559: {'lr': 0.0004999592570076878, 'samples': 74832, 'steps': 1558, 'loss/train': 2.7699522972106934} +07/25/2024 11:32:35 - INFO - __main__ - Step 1560: {'lr': 0.00049995916198292, 'samples': 74880, 'steps': 1559, 'loss/train': 4.478845119476318} +07/25/2024 11:32:35 - INFO - __main__ - Step 1561: {'lr': 0.0004999590668474773, 'samples': 74928, 'steps': 1560, 'loss/train': 3.8760859966278076} +07/25/2024 11:32:36 - INFO - __main__ - Step 1562: {'lr': 0.0004999589716013597, 'samples': 74976, 'steps': 1561, 'loss/train': 4.999029159545898} +07/25/2024 11:32:36 - INFO - __main__ - Step 1563: {'lr': 0.0004999588762445674, 'samples': 75024, 'steps': 1562, 'loss/train': 4.671128749847412} +07/25/2024 11:32:36 - INFO - __main__ - Step 1564: {'lr': 0.0004999587807771001, 'samples': 75072, 'steps': 1563, 'loss/train': 4.207265853881836} +07/25/2024 11:32:37 - INFO - __main__ - Step 1565: {'lr': 0.0004999586851989581, 'samples': 75120, 'steps': 1564, 'loss/train': 4.255036354064941} +07/25/2024 11:32:37 - INFO - __main__ - Step 1566: {'lr': 0.0004999585895101415, 'samples': 75168, 'steps': 1565, 'loss/train': 1.020559549331665} +07/25/2024 11:32:37 - INFO - __main__ - Step 1567: {'lr': 0.0004999584937106503, 'samples': 75216, 'steps': 1566, 'loss/train': 4.159649848937988} +07/25/2024 11:32:37 - INFO - __main__ - Step 1568: {'lr': 0.0004999583978004843, 'samples': 75264, 'steps': 1567, 'loss/train': 4.338928699493408} +07/25/2024 11:32:38 - INFO - __main__ - Step 1569: {'lr': 0.0004999583017796437, 'samples': 75312, 'steps': 1568, 'loss/train': 1.5116093158721924} +07/25/2024 11:32:38 - INFO - __main__ - Step 1570: {'lr': 0.0004999582056481287, 'samples': 75360, 'steps': 1569, 'loss/train': 4.6255998611450195} +07/25/2024 11:32:38 - INFO - __main__ - Step 1571: {'lr': 0.0004999581094059392, 'samples': 75408, 'steps': 1570, 'loss/train': 3.675564765930176} +07/25/2024 11:32:39 - INFO - __main__ - Step 1572: {'lr': 0.0004999580130530752, 'samples': 75456, 'steps': 1571, 'loss/train': 4.568608283996582} +07/25/2024 11:32:39 - INFO - __main__ - Step 1573: {'lr': 0.0004999579165895368, 'samples': 75504, 'steps': 1572, 'loss/train': 3.292952537536621} +07/25/2024 11:32:39 - INFO - __main__ - Step 1574: {'lr': 0.0004999578200153241, 'samples': 75552, 'steps': 1573, 'loss/train': 4.29006290435791} +07/25/2024 11:32:39 - INFO - __main__ - Step 1575: {'lr': 0.0004999577233304371, 'samples': 75600, 'steps': 1574, 'loss/train': 3.830371141433716} +07/25/2024 11:32:40 - INFO - __main__ - Step 1576: {'lr': 0.0004999576265348757, 'samples': 75648, 'steps': 1575, 'loss/train': 3.5302281379699707} +07/25/2024 11:32:40 - INFO - __main__ - Step 1577: {'lr': 0.0004999575296286402, 'samples': 75696, 'steps': 1576, 'loss/train': 3.8645761013031006} +07/25/2024 11:32:40 - INFO - __main__ - Step 1578: {'lr': 0.0004999574326117304, 'samples': 75744, 'steps': 1577, 'loss/train': 4.691342353820801} +07/25/2024 11:32:41 - INFO - __main__ - Step 1579: {'lr': 0.0004999573354841464, 'samples': 75792, 'steps': 1578, 'loss/train': 4.31577205657959} +07/25/2024 11:32:41 - INFO - __main__ - Step 1580: {'lr': 0.0004999572382458885, 'samples': 75840, 'steps': 1579, 'loss/train': 4.461881160736084} +07/25/2024 11:32:41 - INFO - __main__ - Step 1581: {'lr': 0.0004999571408969564, 'samples': 75888, 'steps': 1580, 'loss/train': 4.384960174560547} +07/25/2024 11:32:41 - INFO - __main__ - Step 1582: {'lr': 0.0004999570434373503, 'samples': 75936, 'steps': 1581, 'loss/train': 4.288477420806885} +07/25/2024 11:32:42 - INFO - __main__ - Step 1583: {'lr': 0.0004999569458670702, 'samples': 75984, 'steps': 1582, 'loss/train': 3.555758237838745} +07/25/2024 11:32:42 - INFO - __main__ - Step 1584: {'lr': 0.0004999568481861161, 'samples': 76032, 'steps': 1583, 'loss/train': 4.271367073059082} +07/25/2024 11:32:42 - INFO - __main__ - Step 1585: {'lr': 0.0004999567503944883, 'samples': 76080, 'steps': 1584, 'loss/train': 3.5865719318389893} +07/25/2024 11:32:43 - INFO - __main__ - Step 1586: {'lr': 0.0004999566524921866, 'samples': 76128, 'steps': 1585, 'loss/train': 4.09868049621582} +07/25/2024 11:32:43 - INFO - __main__ - Step 1587: {'lr': 0.0004999565544792111, 'samples': 76176, 'steps': 1586, 'loss/train': 3.866084337234497} +07/25/2024 11:32:43 - INFO - __main__ - Step 1588: {'lr': 0.0004999564563555617, 'samples': 76224, 'steps': 1587, 'loss/train': 3.0452566146850586} +07/25/2024 11:32:43 - INFO - __main__ - Step 1589: {'lr': 0.0004999563581212388, 'samples': 76272, 'steps': 1588, 'loss/train': 3.9067978858947754} +07/25/2024 11:32:44 - INFO - __main__ - Step 1590: {'lr': 0.000499956259776242, 'samples': 76320, 'steps': 1589, 'loss/train': 2.893057346343994} +07/25/2024 11:32:44 - INFO - __main__ - Step 1591: {'lr': 0.0004999561613205718, 'samples': 76368, 'steps': 1590, 'loss/train': 4.177863597869873} +07/25/2024 11:32:44 - INFO - __main__ - Step 1592: {'lr': 0.0004999560627542279, 'samples': 76416, 'steps': 1591, 'loss/train': 4.0368242263793945} +07/25/2024 11:32:45 - INFO - __main__ - Step 1593: {'lr': 0.0004999559640772105, 'samples': 76464, 'steps': 1592, 'loss/train': 1.2555770874023438} +07/25/2024 11:32:45 - INFO - __main__ - Step 1594: {'lr': 0.0004999558652895196, 'samples': 76512, 'steps': 1593, 'loss/train': 4.91914176940918} +07/25/2024 11:32:45 - INFO - __main__ - Step 1595: {'lr': 0.0004999557663911551, 'samples': 76560, 'steps': 1594, 'loss/train': 3.399209976196289} +07/25/2024 11:32:45 - INFO - __main__ - Step 1596: {'lr': 0.0004999556673821173, 'samples': 76608, 'steps': 1595, 'loss/train': 4.228862762451172} +07/25/2024 11:32:46 - INFO - __main__ - Step 1597: {'lr': 0.0004999555682624062, 'samples': 76656, 'steps': 1596, 'loss/train': 3.645184278488159} +07/25/2024 11:32:46 - INFO - __main__ - Step 1598: {'lr': 0.0004999554690320217, 'samples': 76704, 'steps': 1597, 'loss/train': 4.162902355194092} +07/25/2024 11:32:46 - INFO - __main__ - Step 1599: {'lr': 0.0004999553696909639, 'samples': 76752, 'steps': 1598, 'loss/train': 1.2004375457763672} +07/25/2024 11:32:46 - INFO - __main__ - Step 1600: {'lr': 0.000499955270239233, 'samples': 76800, 'steps': 1599, 'loss/train': 4.772523403167725} +07/25/2024 11:32:47 - INFO - __main__ - Step 1601: {'lr': 0.0004999551706768287, 'samples': 76848, 'steps': 1600, 'loss/train': 4.297962188720703} +07/25/2024 11:32:47 - INFO - __main__ - Step 1602: {'lr': 0.0004999550710037513, 'samples': 76896, 'steps': 1601, 'loss/train': 3.913917064666748} +07/25/2024 11:32:47 - INFO - __main__ - Step 1603: {'lr': 0.000499954971220001, 'samples': 76944, 'steps': 1602, 'loss/train': 4.4384002685546875} +07/25/2024 11:32:48 - INFO - __main__ - Step 1604: {'lr': 0.0004999548713255775, 'samples': 76992, 'steps': 1603, 'loss/train': 4.026443004608154} +07/25/2024 11:32:48 - INFO - __main__ - Step 1605: {'lr': 0.000499954771320481, 'samples': 77040, 'steps': 1604, 'loss/train': 4.500652313232422} +07/25/2024 11:32:48 - INFO - __main__ - Step 1606: {'lr': 0.0004999546712047115, 'samples': 77088, 'steps': 1605, 'loss/train': 4.038680553436279} +07/25/2024 11:32:48 - INFO - __main__ - Step 1607: {'lr': 0.0004999545709782692, 'samples': 77136, 'steps': 1606, 'loss/train': 3.5654003620147705} +07/25/2024 11:32:49 - INFO - __main__ - Step 1608: {'lr': 0.0004999544706411538, 'samples': 77184, 'steps': 1607, 'loss/train': 3.332026720046997} +07/25/2024 11:32:49 - INFO - __main__ - Step 1609: {'lr': 0.0004999543701933658, 'samples': 77232, 'steps': 1608, 'loss/train': 4.331302642822266} +07/25/2024 11:32:49 - INFO - __main__ - Step 1610: {'lr': 0.0004999542696349048, 'samples': 77280, 'steps': 1609, 'loss/train': 3.0789198875427246} +07/25/2024 11:32:50 - INFO - __main__ - Step 1611: {'lr': 0.0004999541689657712, 'samples': 77328, 'steps': 1610, 'loss/train': 4.6140875816345215} +07/25/2024 11:32:50 - INFO - __main__ - Step 1612: {'lr': 0.0004999540681859647, 'samples': 77376, 'steps': 1611, 'loss/train': 4.218678951263428} +07/25/2024 11:32:50 - INFO - __main__ - Step 1613: {'lr': 0.0004999539672954858, 'samples': 77424, 'steps': 1612, 'loss/train': 3.9346444606781006} +07/25/2024 11:32:50 - INFO - __main__ - Step 1614: {'lr': 0.0004999538662943341, 'samples': 77472, 'steps': 1613, 'loss/train': 3.596839666366577} +07/25/2024 11:32:51 - INFO - __main__ - Step 1615: {'lr': 0.00049995376518251, 'samples': 77520, 'steps': 1614, 'loss/train': 4.349587440490723} +07/25/2024 11:32:51 - INFO - __main__ - Step 1616: {'lr': 0.0004999536639600133, 'samples': 77568, 'steps': 1615, 'loss/train': 4.739392280578613} +07/25/2024 11:32:51 - INFO - __main__ - Step 1617: {'lr': 0.000499953562626844, 'samples': 77616, 'steps': 1616, 'loss/train': 1.010838270187378} +07/25/2024 11:32:52 - INFO - __main__ - Step 1618: {'lr': 0.0004999534611830024, 'samples': 77664, 'steps': 1617, 'loss/train': 5.193946361541748} +07/25/2024 11:32:52 - INFO - __main__ - Step 1619: {'lr': 0.0004999533596284885, 'samples': 77712, 'steps': 1618, 'loss/train': 3.4452552795410156} +07/25/2024 11:32:52 - INFO - __main__ - Step 1620: {'lr': 0.000499953257963302, 'samples': 77760, 'steps': 1619, 'loss/train': 4.09573221206665} +07/25/2024 11:32:52 - INFO - __main__ - Step 1621: {'lr': 0.0004999531561874434, 'samples': 77808, 'steps': 1620, 'loss/train': 3.8263723850250244} +07/25/2024 11:32:53 - INFO - __main__ - Step 1622: {'lr': 0.0004999530543009124, 'samples': 77856, 'steps': 1621, 'loss/train': 2.8017823696136475} +07/25/2024 11:32:53 - INFO - __main__ - Step 1623: {'lr': 0.0004999529523037093, 'samples': 77904, 'steps': 1622, 'loss/train': 0.83091139793396} +07/25/2024 11:32:53 - INFO - __main__ - Step 1624: {'lr': 0.000499952850195834, 'samples': 77952, 'steps': 1623, 'loss/train': 3.781026840209961} +07/25/2024 11:32:54 - INFO - __main__ - Step 1625: {'lr': 0.0004999527479772867, 'samples': 78000, 'steps': 1624, 'loss/train': 4.0014238357543945} +07/25/2024 11:32:54 - INFO - __main__ - Step 1626: {'lr': 0.0004999526456480671, 'samples': 78048, 'steps': 1625, 'loss/train': 3.603201150894165} +07/25/2024 11:32:54 - INFO - __main__ - Step 1627: {'lr': 0.0004999525432081756, 'samples': 78096, 'steps': 1626, 'loss/train': 4.4280781745910645} +07/25/2024 11:32:54 - INFO - __main__ - Step 1628: {'lr': 0.0004999524406576122, 'samples': 78144, 'steps': 1627, 'loss/train': 3.489168405532837} +07/25/2024 11:32:55 - INFO - __main__ - Step 1629: {'lr': 0.0004999523379963768, 'samples': 78192, 'steps': 1628, 'loss/train': 4.097573757171631} +07/25/2024 11:32:55 - INFO - __main__ - Step 1630: {'lr': 0.0004999522352244695, 'samples': 78240, 'steps': 1629, 'loss/train': 3.8141214847564697} +07/25/2024 11:32:55 - INFO - __main__ - Step 1631: {'lr': 0.0004999521323418903, 'samples': 78288, 'steps': 1630, 'loss/train': 3.8712966442108154} +07/25/2024 11:32:56 - INFO - __main__ - Step 1632: {'lr': 0.0004999520293486393, 'samples': 78336, 'steps': 1631, 'loss/train': 4.036991596221924} +07/25/2024 11:32:56 - INFO - __main__ - Step 1633: {'lr': 0.0004999519262447166, 'samples': 78384, 'steps': 1632, 'loss/train': 4.131730556488037} +07/25/2024 11:32:56 - INFO - __main__ - Step 1634: {'lr': 0.0004999518230301221, 'samples': 78432, 'steps': 1633, 'loss/train': 2.2752130031585693} +07/25/2024 11:32:56 - INFO - __main__ - Step 1635: {'lr': 0.000499951719704856, 'samples': 78480, 'steps': 1634, 'loss/train': 3.9001102447509766} +07/25/2024 11:32:57 - INFO - __main__ - Step 1636: {'lr': 0.0004999516162689184, 'samples': 78528, 'steps': 1635, 'loss/train': 3.8511343002319336} +07/25/2024 11:32:57 - INFO - __main__ - Step 1637: {'lr': 0.000499951512722309, 'samples': 78576, 'steps': 1636, 'loss/train': 3.6756503582000732} +07/25/2024 11:32:57 - INFO - __main__ - Step 1638: {'lr': 0.0004999514090650283, 'samples': 78624, 'steps': 1637, 'loss/train': 3.80741024017334} +07/25/2024 11:32:58 - INFO - __main__ - Step 1639: {'lr': 0.0004999513052970759, 'samples': 78672, 'steps': 1638, 'loss/train': 4.080711841583252} +07/25/2024 11:32:58 - INFO - __main__ - Step 1640: {'lr': 0.0004999512014184522, 'samples': 78720, 'steps': 1639, 'loss/train': 4.8657331466674805} +07/25/2024 11:32:58 - INFO - __main__ - Step 1641: {'lr': 0.0004999510974291571, 'samples': 78768, 'steps': 1640, 'loss/train': 1.0989995002746582} +07/25/2024 11:32:58 - INFO - __main__ - Step 1642: {'lr': 0.0004999509933291907, 'samples': 78816, 'steps': 1641, 'loss/train': 4.305228233337402} +07/25/2024 11:32:59 - INFO - __main__ - Step 1643: {'lr': 0.000499950889118553, 'samples': 78864, 'steps': 1642, 'loss/train': 2.7107937335968018} +07/25/2024 11:32:59 - INFO - __main__ - Step 1644: {'lr': 0.0004999507847972439, 'samples': 78912, 'steps': 1643, 'loss/train': 3.7542922496795654} +07/25/2024 11:32:59 - INFO - __main__ - Step 1645: {'lr': 0.0004999506803652637, 'samples': 78960, 'steps': 1644, 'loss/train': 3.552199363708496} +07/25/2024 11:33:00 - INFO - __main__ - Step 1646: {'lr': 0.0004999505758226122, 'samples': 79008, 'steps': 1645, 'loss/train': 3.2763359546661377} +07/25/2024 11:33:00 - INFO - __main__ - Step 1647: {'lr': 0.0004999504711692899, 'samples': 79056, 'steps': 1646, 'loss/train': 0.9021226763725281} +07/25/2024 11:33:00 - INFO - __main__ - Step 1648: {'lr': 0.0004999503664052963, 'samples': 79104, 'steps': 1647, 'loss/train': 3.520296573638916} +07/25/2024 11:33:00 - INFO - __main__ - Step 1649: {'lr': 0.0004999502615306318, 'samples': 79152, 'steps': 1648, 'loss/train': 3.935046672821045} +07/25/2024 11:33:01 - INFO - __main__ - Step 1650: {'lr': 0.0004999501565452962, 'samples': 79200, 'steps': 1649, 'loss/train': 3.830601215362549} +07/25/2024 11:33:01 - INFO - __main__ - Step 1651: {'lr': 0.0004999500514492897, 'samples': 79248, 'steps': 1650, 'loss/train': 3.7798221111297607} +07/25/2024 11:33:01 - INFO - __main__ - Step 1652: {'lr': 0.0004999499462426123, 'samples': 79296, 'steps': 1651, 'loss/train': 3.5952308177948} +07/25/2024 11:33:02 - INFO - __main__ - Step 1653: {'lr': 0.0004999498409252643, 'samples': 79344, 'steps': 1652, 'loss/train': 3.687241315841675} +07/25/2024 11:33:02 - INFO - __main__ - Step 1654: {'lr': 0.0004999497354972452, 'samples': 79392, 'steps': 1653, 'loss/train': 3.6451728343963623} +07/25/2024 11:33:02 - INFO - __main__ - Step 1655: {'lr': 0.0004999496299585556, 'samples': 79440, 'steps': 1654, 'loss/train': 3.3898539543151855} +07/25/2024 11:33:02 - INFO - __main__ - Step 1656: {'lr': 0.0004999495243091952, 'samples': 79488, 'steps': 1655, 'loss/train': 3.8540120124816895} +07/25/2024 11:33:03 - INFO - __main__ - Step 1657: {'lr': 0.0004999494185491642, 'samples': 79536, 'steps': 1656, 'loss/train': 3.9089059829711914} +07/25/2024 11:33:03 - INFO - __main__ - Step 1658: {'lr': 0.0004999493126784625, 'samples': 79584, 'steps': 1657, 'loss/train': 3.58225154876709} +07/25/2024 11:33:03 - INFO - __main__ - Step 1659: {'lr': 0.0004999492066970903, 'samples': 79632, 'steps': 1658, 'loss/train': 3.954732656478882} +07/25/2024 11:33:04 - INFO - __main__ - Step 1660: {'lr': 0.0004999491006050476, 'samples': 79680, 'steps': 1659, 'loss/train': 3.787705898284912} +07/25/2024 11:33:04 - INFO - __main__ - Step 1661: {'lr': 0.0004999489944023345, 'samples': 79728, 'steps': 1660, 'loss/train': 3.5305709838867188} +07/25/2024 11:33:04 - INFO - __main__ - Step 1662: {'lr': 0.0004999488880889509, 'samples': 79776, 'steps': 1661, 'loss/train': 3.225555419921875} +07/25/2024 11:33:04 - INFO - __main__ - Step 1663: {'lr': 0.000499948781664897, 'samples': 79824, 'steps': 1662, 'loss/train': 4.243683815002441} +07/25/2024 11:33:05 - INFO - __main__ - Step 1664: {'lr': 0.0004999486751301727, 'samples': 79872, 'steps': 1663, 'loss/train': 4.600740432739258} +07/25/2024 11:33:05 - INFO - __main__ - Step 1665: {'lr': 0.0004999485684847783, 'samples': 79920, 'steps': 1664, 'loss/train': 1.0741873979568481} +07/25/2024 11:33:05 - INFO - __main__ - Step 1666: {'lr': 0.0004999484617287136, 'samples': 79968, 'steps': 1665, 'loss/train': 4.837983131408691} +07/25/2024 11:33:06 - INFO - __main__ - Step 1667: {'lr': 0.0004999483548619786, 'samples': 80016, 'steps': 1666, 'loss/train': 2.634784698486328} +07/25/2024 11:33:06 - INFO - __main__ - Step 1668: {'lr': 0.0004999482478845736, 'samples': 80064, 'steps': 1667, 'loss/train': 4.218677997589111} +07/25/2024 11:33:06 - INFO - __main__ - Step 1669: {'lr': 0.0004999481407964984, 'samples': 80112, 'steps': 1668, 'loss/train': 4.0262908935546875} +07/25/2024 11:33:06 - INFO - __main__ - Step 1670: {'lr': 0.0004999480335977533, 'samples': 80160, 'steps': 1669, 'loss/train': 4.127504825592041} +07/25/2024 11:33:07 - INFO - __main__ - Step 1671: {'lr': 0.0004999479262883382, 'samples': 80208, 'steps': 1670, 'loss/train': 0.8168514966964722} +07/25/2024 11:33:07 - INFO - __main__ - Step 1672: {'lr': 0.0004999478188682531, 'samples': 80256, 'steps': 1671, 'loss/train': 3.800539970397949} +07/25/2024 11:33:07 - INFO - __main__ - Step 1673: {'lr': 0.0004999477113374981, 'samples': 80304, 'steps': 1672, 'loss/train': 4.094118118286133} +07/25/2024 11:33:08 - INFO - __main__ - Step 1674: {'lr': 0.0004999476036960733, 'samples': 80352, 'steps': 1673, 'loss/train': 3.773815631866455} +07/25/2024 11:33:08 - INFO - __main__ - Step 1675: {'lr': 0.0004999474959439787, 'samples': 80400, 'steps': 1674, 'loss/train': 4.062127590179443} +07/25/2024 11:33:08 - INFO - __main__ - Step 1676: {'lr': 0.0004999473880812143, 'samples': 80448, 'steps': 1675, 'loss/train': 3.9099628925323486} +07/25/2024 11:33:08 - INFO - __main__ - Step 1677: {'lr': 0.0004999472801077803, 'samples': 80496, 'steps': 1676, 'loss/train': 3.628716230392456} +07/25/2024 11:33:09 - INFO - __main__ - Step 1678: {'lr': 0.0004999471720236766, 'samples': 80544, 'steps': 1677, 'loss/train': 4.007556438446045} +07/25/2024 11:33:09 - INFO - __main__ - Step 1679: {'lr': 0.0004999470638289034, 'samples': 80592, 'steps': 1678, 'loss/train': 3.2708098888397217} +07/25/2024 11:33:09 - INFO - __main__ - Step 1680: {'lr': 0.0004999469555234604, 'samples': 80640, 'steps': 1679, 'loss/train': 3.9650065898895264} +07/25/2024 11:33:10 - INFO - __main__ - Step 1681: {'lr': 0.000499946847107348, 'samples': 80688, 'steps': 1680, 'loss/train': 3.638195276260376} +07/25/2024 11:33:10 - INFO - __main__ - Step 1682: {'lr': 0.0004999467385805662, 'samples': 80736, 'steps': 1681, 'loss/train': 3.5746607780456543} +07/25/2024 11:33:10 - INFO - __main__ - Step 1683: {'lr': 0.000499946629943115, 'samples': 80784, 'steps': 1682, 'loss/train': 3.8782882690429688} +07/25/2024 11:33:10 - INFO - __main__ - Step 1684: {'lr': 0.0004999465211949944, 'samples': 80832, 'steps': 1683, 'loss/train': 3.834268808364868} +07/25/2024 11:33:11 - INFO - __main__ - Step 1685: {'lr': 0.0004999464123362044, 'samples': 80880, 'steps': 1684, 'loss/train': 3.44153094291687} +07/25/2024 11:33:11 - INFO - __main__ - Step 1686: {'lr': 0.0004999463033667453, 'samples': 80928, 'steps': 1685, 'loss/train': 4.076027870178223} +07/25/2024 11:33:11 - INFO - __main__ - Step 1687: {'lr': 0.0004999461942866168, 'samples': 80976, 'steps': 1686, 'loss/train': 4.0355987548828125} +07/25/2024 11:33:12 - INFO - __main__ - Step 1688: {'lr': 0.0004999460850958192, 'samples': 81024, 'steps': 1687, 'loss/train': 3.6524274349212646} +07/25/2024 11:33:12 - INFO - __main__ - Step 1689: {'lr': 0.0004999459757943526, 'samples': 81072, 'steps': 1688, 'loss/train': 1.866129994392395} +07/25/2024 11:33:12 - INFO - __main__ - Step 1690: {'lr': 0.0004999458663822167, 'samples': 81120, 'steps': 1689, 'loss/train': 4.130566120147705} +07/25/2024 11:33:12 - INFO - __main__ - Step 1691: {'lr': 0.0004999457568594119, 'samples': 81168, 'steps': 1690, 'loss/train': 2.567136287689209} +07/25/2024 11:33:13 - INFO - __main__ - Step 1692: {'lr': 0.0004999456472259381, 'samples': 81216, 'steps': 1691, 'loss/train': 3.2561519145965576} +07/25/2024 11:33:13 - INFO - __main__ - Step 1693: {'lr': 0.0004999455374817954, 'samples': 81264, 'steps': 1692, 'loss/train': 3.8401870727539062} +07/25/2024 11:33:13 - INFO - __main__ - Step 1694: {'lr': 0.0004999454276269839, 'samples': 81312, 'steps': 1693, 'loss/train': 3.8708980083465576} +07/25/2024 11:33:13 - INFO - __main__ - Step 1695: {'lr': 0.0004999453176615035, 'samples': 81360, 'steps': 1694, 'loss/train': 0.7484913468360901} +07/25/2024 11:33:14 - INFO - __main__ - Step 1696: {'lr': 0.0004999452075853542, 'samples': 81408, 'steps': 1695, 'loss/train': 3.759845733642578} +07/25/2024 11:33:14 - INFO - __main__ - Step 1697: {'lr': 0.0004999450973985362, 'samples': 81456, 'steps': 1696, 'loss/train': 3.856627941131592} +07/25/2024 11:33:14 - INFO - __main__ - Step 1698: {'lr': 0.0004999449871010497, 'samples': 81504, 'steps': 1697, 'loss/train': 3.8374502658843994} +07/25/2024 11:33:15 - INFO - __main__ - Step 1699: {'lr': 0.0004999448766928944, 'samples': 81552, 'steps': 1698, 'loss/train': 3.714590072631836} +07/25/2024 11:33:15 - INFO - __main__ - Step 1700: {'lr': 0.0004999447661740705, 'samples': 81600, 'steps': 1699, 'loss/train': 3.921002149581909} +07/25/2024 11:33:15 - INFO - __main__ - Step 1701: {'lr': 0.000499944655544578, 'samples': 81648, 'steps': 1700, 'loss/train': 4.948413372039795} +07/25/2024 11:33:15 - INFO - __main__ - Step 1702: {'lr': 0.0004999445448044172, 'samples': 81696, 'steps': 1701, 'loss/train': 3.6888349056243896} +07/25/2024 11:33:16 - INFO - __main__ - Step 1703: {'lr': 0.0004999444339535879, 'samples': 81744, 'steps': 1702, 'loss/train': 3.7960641384124756} +07/25/2024 11:33:16 - INFO - __main__ - Step 1704: {'lr': 0.0004999443229920901, 'samples': 81792, 'steps': 1703, 'loss/train': 3.37654972076416} +07/25/2024 11:33:16 - INFO - __main__ - Step 1705: {'lr': 0.0004999442119199241, 'samples': 81840, 'steps': 1704, 'loss/train': 4.01531982421875} +07/25/2024 11:33:17 - INFO - __main__ - Step 1706: {'lr': 0.0004999441007370898, 'samples': 81888, 'steps': 1705, 'loss/train': 3.809922456741333} +07/25/2024 11:33:17 - INFO - __main__ - Step 1707: {'lr': 0.000499943989443587, 'samples': 81936, 'steps': 1706, 'loss/train': 4.763818740844727} +07/25/2024 11:33:17 - INFO - __main__ - Step 1708: {'lr': 0.0004999438780394163, 'samples': 81984, 'steps': 1707, 'loss/train': 4.009382724761963} +07/25/2024 11:33:17 - INFO - __main__ - Step 1709: {'lr': 0.0004999437665245773, 'samples': 82032, 'steps': 1708, 'loss/train': 4.255838871002197} +07/25/2024 11:33:18 - INFO - __main__ - Step 1710: {'lr': 0.0004999436548990704, 'samples': 82080, 'steps': 1709, 'loss/train': 3.936135768890381} +07/25/2024 11:33:18 - INFO - __main__ - Step 1711: {'lr': 0.0004999435431628953, 'samples': 82128, 'steps': 1710, 'loss/train': 4.156647682189941} +07/25/2024 11:33:18 - INFO - __main__ - Step 1712: {'lr': 0.0004999434313160521, 'samples': 82176, 'steps': 1711, 'loss/train': 3.7259504795074463} +07/25/2024 11:33:19 - INFO - __main__ - Step 1713: {'lr': 0.0004999433193585411, 'samples': 82224, 'steps': 1712, 'loss/train': 3.823317050933838} +07/25/2024 11:33:19 - INFO - __main__ - Step 1714: {'lr': 0.0004999432072903622, 'samples': 82272, 'steps': 1713, 'loss/train': 5.092266082763672} +07/25/2024 11:33:19 - INFO - __main__ - Step 1715: {'lr': 0.0004999430951115153, 'samples': 82320, 'steps': 1714, 'loss/train': 2.8324546813964844} +07/25/2024 11:33:19 - INFO - __main__ - Step 1716: {'lr': 0.0004999429828220008, 'samples': 82368, 'steps': 1715, 'loss/train': 4.235113143920898} +07/25/2024 11:33:20 - INFO - __main__ - Step 1717: {'lr': 0.0004999428704218186, 'samples': 82416, 'steps': 1716, 'loss/train': 2.8201003074645996} +07/25/2024 11:33:20 - INFO - __main__ - Step 1718: {'lr': 0.0004999427579109684, 'samples': 82464, 'steps': 1717, 'loss/train': 3.4697487354278564} +07/25/2024 11:33:20 - INFO - __main__ - Step 1719: {'lr': 0.0004999426452894507, 'samples': 82512, 'steps': 1718, 'loss/train': 1.8428771495819092} +07/25/2024 11:33:21 - INFO - __main__ - Step 1720: {'lr': 0.0004999425325572655, 'samples': 82560, 'steps': 1719, 'loss/train': 4.4850335121154785} +07/25/2024 11:33:21 - INFO - __main__ - Step 1721: {'lr': 0.0004999424197144127, 'samples': 82608, 'steps': 1720, 'loss/train': 3.8264665603637695} +07/25/2024 11:33:21 - INFO - __main__ - Step 1722: {'lr': 0.0004999423067608924, 'samples': 82656, 'steps': 1721, 'loss/train': 3.829085350036621} +07/25/2024 11:33:21 - INFO - __main__ - Step 1723: {'lr': 0.0004999421936967046, 'samples': 82704, 'steps': 1722, 'loss/train': 3.528710126876831} +07/25/2024 11:33:22 - INFO - __main__ - Step 1724: {'lr': 0.0004999420805218495, 'samples': 82752, 'steps': 1723, 'loss/train': 3.800262928009033} +07/25/2024 11:33:22 - INFO - __main__ - Step 1725: {'lr': 0.0004999419672363269, 'samples': 82800, 'steps': 1724, 'loss/train': 6.612559795379639} +07/25/2024 11:33:22 - INFO - __main__ - Step 1726: {'lr': 0.000499941853840137, 'samples': 82848, 'steps': 1725, 'loss/train': 4.772795677185059} +07/25/2024 11:33:23 - INFO - __main__ - Step 1727: {'lr': 0.00049994174033328, 'samples': 82896, 'steps': 1726, 'loss/train': 4.301530361175537} +07/25/2024 11:33:23 - INFO - __main__ - Step 1728: {'lr': 0.0004999416267157557, 'samples': 82944, 'steps': 1727, 'loss/train': 3.8070220947265625} +07/25/2024 11:33:23 - INFO - __main__ - Step 1729: {'lr': 0.0004999415129875643, 'samples': 82992, 'steps': 1728, 'loss/train': 3.807265043258667} +07/25/2024 11:33:23 - INFO - __main__ - Step 1730: {'lr': 0.0004999413991487057, 'samples': 83040, 'steps': 1729, 'loss/train': 4.041309833526611} +07/25/2024 11:33:24 - INFO - __main__ - Step 1731: {'lr': 0.0004999412851991802, 'samples': 83088, 'steps': 1730, 'loss/train': 3.9976048469543457} +07/25/2024 11:33:24 - INFO - __main__ - Step 1732: {'lr': 0.0004999411711389876, 'samples': 83136, 'steps': 1731, 'loss/train': 4.453208923339844} +07/25/2024 11:33:24 - INFO - __main__ - Step 1733: {'lr': 0.0004999410569681281, 'samples': 83184, 'steps': 1732, 'loss/train': 4.16531229019165} +07/25/2024 11:33:25 - INFO - __main__ - Step 1734: {'lr': 0.0004999409426866017, 'samples': 83232, 'steps': 1733, 'loss/train': 4.697585582733154} +07/25/2024 11:33:25 - INFO - __main__ - Step 1735: {'lr': 0.0004999408282944084, 'samples': 83280, 'steps': 1734, 'loss/train': 3.9883484840393066} +07/25/2024 11:33:25 - INFO - __main__ - Step 1736: {'lr': 0.0004999407137915484, 'samples': 83328, 'steps': 1735, 'loss/train': 3.119013547897339} +07/25/2024 11:33:25 - INFO - __main__ - Step 1737: {'lr': 0.0004999405991780216, 'samples': 83376, 'steps': 1736, 'loss/train': 4.56573486328125} +07/25/2024 11:33:26 - INFO - __main__ - Step 1738: {'lr': 0.000499940484453828, 'samples': 83424, 'steps': 1737, 'loss/train': 5.323535919189453} +07/25/2024 11:33:26 - INFO - __main__ - Step 1739: {'lr': 0.0004999403696189679, 'samples': 83472, 'steps': 1738, 'loss/train': 3.598529815673828} +07/25/2024 11:33:26 - INFO - __main__ - Step 1740: {'lr': 0.0004999402546734411, 'samples': 83520, 'steps': 1739, 'loss/train': 4.558940887451172} +07/25/2024 11:33:27 - INFO - __main__ - Step 1741: {'lr': 0.0004999401396172479, 'samples': 83568, 'steps': 1740, 'loss/train': 3.485189199447632} +07/25/2024 11:33:27 - INFO - __main__ - Step 1742: {'lr': 0.0004999400244503882, 'samples': 83616, 'steps': 1741, 'loss/train': 3.6207733154296875} +07/25/2024 11:33:27 - INFO - __main__ - Step 1743: {'lr': 0.0004999399091728619, 'samples': 83664, 'steps': 1742, 'loss/train': 3.512312412261963} +07/25/2024 11:33:27 - INFO - __main__ - Step 1744: {'lr': 0.0004999397937846693, 'samples': 83712, 'steps': 1743, 'loss/train': 3.6331450939178467} +07/25/2024 11:33:28 - INFO - __main__ - Step 1745: {'lr': 0.0004999396782858105, 'samples': 83760, 'steps': 1744, 'loss/train': 3.981555700302124} +07/25/2024 11:33:28 - INFO - __main__ - Step 1746: {'lr': 0.0004999395626762851, 'samples': 83808, 'steps': 1745, 'loss/train': 3.7002930641174316} +07/25/2024 11:33:28 - INFO - __main__ - Step 1747: {'lr': 0.0004999394469560937, 'samples': 83856, 'steps': 1746, 'loss/train': 3.8368618488311768} +07/25/2024 11:33:29 - INFO - __main__ - Step 1748: {'lr': 0.0004999393311252361, 'samples': 83904, 'steps': 1747, 'loss/train': 4.207673072814941} +07/25/2024 11:33:29 - INFO - __main__ - Step 1749: {'lr': 0.0004999392151837123, 'samples': 83952, 'steps': 1748, 'loss/train': 6.687884330749512} +07/25/2024 11:33:29 - INFO - __main__ - Step 1750: {'lr': 0.0004999390991315225, 'samples': 84000, 'steps': 1749, 'loss/train': 3.8266847133636475} +07/25/2024 11:33:29 - INFO - __main__ - Step 1751: {'lr': 0.0004999389829686665, 'samples': 84048, 'steps': 1750, 'loss/train': 3.219277858734131} +07/25/2024 11:33:30 - INFO - __main__ - Step 1752: {'lr': 0.0004999388666951448, 'samples': 84096, 'steps': 1751, 'loss/train': 3.824073076248169} +07/25/2024 11:33:30 - INFO - __main__ - Step 1753: {'lr': 0.0004999387503109569, 'samples': 84144, 'steps': 1752, 'loss/train': 3.7395975589752197} +07/25/2024 11:33:30 - INFO - __main__ - Step 1754: {'lr': 0.0004999386338161032, 'samples': 84192, 'steps': 1753, 'loss/train': 3.5957114696502686} +07/25/2024 11:33:31 - INFO - __main__ - Step 1755: {'lr': 0.0004999385172105838, 'samples': 84240, 'steps': 1754, 'loss/train': 3.917100429534912} +07/25/2024 11:33:31 - INFO - __main__ - Step 1756: {'lr': 0.0004999384004943985, 'samples': 84288, 'steps': 1755, 'loss/train': 4.1089701652526855} +07/25/2024 11:33:31 - INFO - __main__ - Step 1757: {'lr': 0.0004999382836675476, 'samples': 84336, 'steps': 1756, 'loss/train': 4.286787986755371} +07/25/2024 11:33:31 - INFO - __main__ - Step 1758: {'lr': 0.0004999381667300309, 'samples': 84384, 'steps': 1757, 'loss/train': 3.9267921447753906} +07/25/2024 11:33:32 - INFO - __main__ - Step 1759: {'lr': 0.0004999380496818486, 'samples': 84432, 'steps': 1758, 'loss/train': 4.260997295379639} +07/25/2024 11:33:32 - INFO - __main__ - Step 1760: {'lr': 0.0004999379325230009, 'samples': 84480, 'steps': 1759, 'loss/train': 3.521761894226074} +07/25/2024 11:33:32 - INFO - __main__ - Step 1761: {'lr': 0.0004999378152534874, 'samples': 84528, 'steps': 1760, 'loss/train': 3.937525510787964} +07/25/2024 11:33:33 - INFO - __main__ - Step 1762: {'lr': 0.0004999376978733087, 'samples': 84576, 'steps': 1761, 'loss/train': 4.547691822052002} +07/25/2024 11:33:33 - INFO - __main__ - Step 1763: {'lr': 0.0004999375803824645, 'samples': 84624, 'steps': 1762, 'loss/train': 1.9417058229446411} +07/25/2024 11:33:33 - INFO - __main__ - Step 1764: {'lr': 0.0004999374627809549, 'samples': 84672, 'steps': 1763, 'loss/train': 4.4680986404418945} +07/25/2024 11:33:33 - INFO - __main__ - Step 1765: {'lr': 0.0004999373450687801, 'samples': 84720, 'steps': 1764, 'loss/train': 3.5785470008850098} +07/25/2024 11:33:34 - INFO - __main__ - Step 1766: {'lr': 0.0004999372272459399, 'samples': 84768, 'steps': 1765, 'loss/train': 3.379610776901245} +07/25/2024 11:33:34 - INFO - __main__ - Step 1767: {'lr': 0.0004999371093124346, 'samples': 84816, 'steps': 1766, 'loss/train': 3.3728137016296387} +07/25/2024 11:33:34 - INFO - __main__ - Step 1768: {'lr': 0.0004999369912682641, 'samples': 84864, 'steps': 1767, 'loss/train': 3.235597848892212} +07/25/2024 11:33:35 - INFO - __main__ - Step 1769: {'lr': 0.0004999368731134285, 'samples': 84912, 'steps': 1768, 'loss/train': 3.41898775100708} +07/25/2024 11:33:35 - INFO - __main__ - Step 1770: {'lr': 0.0004999367548479278, 'samples': 84960, 'steps': 1769, 'loss/train': 3.460465669631958} +07/25/2024 11:33:35 - INFO - __main__ - Step 1771: {'lr': 0.0004999366364717623, 'samples': 85008, 'steps': 1770, 'loss/train': 1.4478120803833008} +07/25/2024 11:33:35 - INFO - __main__ - Step 1772: {'lr': 0.0004999365179849318, 'samples': 85056, 'steps': 1771, 'loss/train': 4.535628795623779} +07/25/2024 11:33:36 - INFO - __main__ - Step 1773: {'lr': 0.0004999363993874363, 'samples': 85104, 'steps': 1772, 'loss/train': 6.271758556365967} +07/25/2024 11:33:36 - INFO - __main__ - Step 1774: {'lr': 0.0004999362806792761, 'samples': 85152, 'steps': 1773, 'loss/train': 3.5365257263183594} +07/25/2024 11:33:36 - INFO - __main__ - Step 1775: {'lr': 0.000499936161860451, 'samples': 85200, 'steps': 1774, 'loss/train': 3.8609461784362793} +07/25/2024 11:33:37 - INFO - __main__ - Step 1776: {'lr': 0.0004999360429309613, 'samples': 85248, 'steps': 1775, 'loss/train': 3.768311023712158} +07/25/2024 11:33:37 - INFO - __main__ - Step 1777: {'lr': 0.0004999359238908068, 'samples': 85296, 'steps': 1776, 'loss/train': 3.6082637310028076} +07/25/2024 11:33:37 - INFO - __main__ - Step 1778: {'lr': 0.0004999358047399877, 'samples': 85344, 'steps': 1777, 'loss/train': 3.8596231937408447} +07/25/2024 11:33:37 - INFO - __main__ - Step 1779: {'lr': 0.000499935685478504, 'samples': 85392, 'steps': 1778, 'loss/train': 3.772376537322998} +07/25/2024 11:33:38 - INFO - __main__ - Step 1780: {'lr': 0.0004999355661063559, 'samples': 85440, 'steps': 1779, 'loss/train': 3.9731829166412354} +07/25/2024 11:33:38 - INFO - __main__ - Step 1781: {'lr': 0.0004999354466235431, 'samples': 85488, 'steps': 1780, 'loss/train': 4.223360538482666} +07/25/2024 11:33:38 - INFO - __main__ - Step 1782: {'lr': 0.0004999353270300662, 'samples': 85536, 'steps': 1781, 'loss/train': 3.95180606842041} +07/25/2024 11:33:39 - INFO - __main__ - Step 1783: {'lr': 0.0004999352073259248, 'samples': 85584, 'steps': 1782, 'loss/train': 4.227056980133057} +07/25/2024 11:33:39 - INFO - __main__ - Step 1784: {'lr': 0.0004999350875111191, 'samples': 85632, 'steps': 1783, 'loss/train': 3.7156057357788086} +07/25/2024 11:33:39 - INFO - __main__ - Step 1785: {'lr': 0.000499934967585649, 'samples': 85680, 'steps': 1784, 'loss/train': 3.817558526992798} +07/25/2024 11:33:39 - INFO - __main__ - Step 1786: {'lr': 0.0004999348475495149, 'samples': 85728, 'steps': 1785, 'loss/train': 3.317204713821411} +07/25/2024 11:33:40 - INFO - __main__ - Step 1787: {'lr': 0.0004999347274027166, 'samples': 85776, 'steps': 1786, 'loss/train': 1.1843246221542358} +07/25/2024 11:33:40 - INFO - __main__ - Step 1788: {'lr': 0.0004999346071452542, 'samples': 85824, 'steps': 1787, 'loss/train': 4.255121231079102} +07/25/2024 11:33:40 - INFO - __main__ - Step 1789: {'lr': 0.0004999344867771277, 'samples': 85872, 'steps': 1788, 'loss/train': 3.988492965698242} +07/25/2024 11:33:41 - INFO - __main__ - Step 1790: {'lr': 0.0004999343662983373, 'samples': 85920, 'steps': 1789, 'loss/train': 4.108956336975098} +07/25/2024 11:33:41 - INFO - __main__ - Step 1791: {'lr': 0.0004999342457088829, 'samples': 85968, 'steps': 1790, 'loss/train': 3.3686022758483887} +07/25/2024 11:33:41 - INFO - __main__ - Step 1792: {'lr': 0.0004999341250087646, 'samples': 86016, 'steps': 1791, 'loss/train': 3.0112948417663574} +07/25/2024 11:33:41 - INFO - __main__ - Step 1793: {'lr': 0.0004999340041979825, 'samples': 86064, 'steps': 1792, 'loss/train': 3.873676061630249} +07/25/2024 11:33:42 - INFO - __main__ - Step 1794: {'lr': 0.0004999338832765367, 'samples': 86112, 'steps': 1793, 'loss/train': 3.0263044834136963} +07/25/2024 11:33:42 - INFO - __main__ - Step 1795: {'lr': 0.0004999337622444271, 'samples': 86160, 'steps': 1794, 'loss/train': 2.5993807315826416} +07/25/2024 11:33:42 - INFO - __main__ - Step 1796: {'lr': 0.0004999336411016539, 'samples': 86208, 'steps': 1795, 'loss/train': 3.9383604526519775} +07/25/2024 11:33:43 - INFO - __main__ - Step 1797: {'lr': 0.000499933519848217, 'samples': 86256, 'steps': 1796, 'loss/train': 3.6343722343444824} +07/25/2024 11:33:43 - INFO - __main__ - Step 1798: {'lr': 0.0004999333984841165, 'samples': 86304, 'steps': 1797, 'loss/train': 3.5925121307373047} +07/25/2024 11:33:43 - INFO - __main__ - Step 1799: {'lr': 0.0004999332770093527, 'samples': 86352, 'steps': 1798, 'loss/train': 3.2161967754364014} +07/25/2024 11:33:43 - INFO - __main__ - Step 1800: {'lr': 0.0004999331554239253, 'samples': 86400, 'steps': 1799, 'loss/train': 3.945577621459961} +07/25/2024 11:33:44 - INFO - __main__ - Step 1801: {'lr': 0.0004999330337278345, 'samples': 86448, 'steps': 1800, 'loss/train': 3.5599334239959717} +07/25/2024 11:33:44 - INFO - __main__ - Step 1802: {'lr': 0.0004999329119210803, 'samples': 86496, 'steps': 1801, 'loss/train': 3.960554599761963} +07/25/2024 11:33:44 - INFO - __main__ - Step 1803: {'lr': 0.000499932790003663, 'samples': 86544, 'steps': 1802, 'loss/train': 3.7337350845336914} +07/25/2024 11:33:44 - INFO - __main__ - Step 1804: {'lr': 0.0004999326679755823, 'samples': 86592, 'steps': 1803, 'loss/train': 3.354907751083374} +07/25/2024 11:33:45 - INFO - __main__ - Step 1805: {'lr': 0.0004999325458368385, 'samples': 86640, 'steps': 1804, 'loss/train': 3.7541861534118652} +07/25/2024 11:33:45 - INFO - __main__ - Step 1806: {'lr': 0.0004999324235874315, 'samples': 86688, 'steps': 1805, 'loss/train': 4.036715030670166} +07/25/2024 11:33:45 - INFO - __main__ - Step 1807: {'lr': 0.0004999323012273615, 'samples': 86736, 'steps': 1806, 'loss/train': 4.321915149688721} +07/25/2024 11:33:46 - INFO - __main__ - Step 1808: {'lr': 0.0004999321787566284, 'samples': 86784, 'steps': 1807, 'loss/train': 4.085292339324951} +07/25/2024 11:33:46 - INFO - __main__ - Step 1809: {'lr': 0.0004999320561752325, 'samples': 86832, 'steps': 1808, 'loss/train': 3.7050764560699463} +07/25/2024 11:33:46 - INFO - __main__ - Step 1810: {'lr': 0.0004999319334831735, 'samples': 86880, 'steps': 1809, 'loss/train': 4.309046745300293} +07/25/2024 11:33:46 - INFO - __main__ - Step 1811: {'lr': 0.0004999318106804517, 'samples': 86928, 'steps': 1810, 'loss/train': 0.9345848560333252} +07/25/2024 11:33:47 - INFO - __main__ - Step 1812: {'lr': 0.000499931687767067, 'samples': 86976, 'steps': 1811, 'loss/train': 3.4949681758880615} +07/25/2024 11:33:47 - INFO - __main__ - Step 1813: {'lr': 0.0004999315647430198, 'samples': 87024, 'steps': 1812, 'loss/train': 4.1827239990234375} +07/25/2024 11:33:47 - INFO - __main__ - Step 1814: {'lr': 0.0004999314416083098, 'samples': 87072, 'steps': 1813, 'loss/train': 3.8778364658355713} +07/25/2024 11:33:48 - INFO - __main__ - Step 1815: {'lr': 0.000499931318362937, 'samples': 87120, 'steps': 1814, 'loss/train': 4.071133613586426} +07/25/2024 11:33:48 - INFO - __main__ - Step 1816: {'lr': 0.0004999311950069019, 'samples': 87168, 'steps': 1815, 'loss/train': 3.055159330368042} +07/25/2024 11:33:48 - INFO - __main__ - Step 1817: {'lr': 0.0004999310715402041, 'samples': 87216, 'steps': 1816, 'loss/train': 3.630788803100586} +07/25/2024 11:33:48 - INFO - __main__ - Step 1818: {'lr': 0.0004999309479628437, 'samples': 87264, 'steps': 1817, 'loss/train': 2.4632701873779297} +07/25/2024 11:33:49 - INFO - __main__ - Step 1819: {'lr': 0.0004999308242748211, 'samples': 87312, 'steps': 1818, 'loss/train': 4.1531291007995605} +07/25/2024 11:33:49 - INFO - __main__ - Step 1820: {'lr': 0.0004999307004761361, 'samples': 87360, 'steps': 1819, 'loss/train': 3.6753427982330322} +07/25/2024 11:33:49 - INFO - __main__ - Step 1821: {'lr': 0.0004999305765667888, 'samples': 87408, 'steps': 1820, 'loss/train': 4.492952823638916} +07/25/2024 11:33:50 - INFO - __main__ - Step 1822: {'lr': 0.0004999304525467791, 'samples': 87456, 'steps': 1821, 'loss/train': 3.457455635070801} +07/25/2024 11:33:50 - INFO - __main__ - Step 1823: {'lr': 0.0004999303284161073, 'samples': 87504, 'steps': 1822, 'loss/train': 3.7861976623535156} +07/25/2024 11:33:50 - INFO - __main__ - Step 1824: {'lr': 0.0004999302041747733, 'samples': 87552, 'steps': 1823, 'loss/train': 2.9245851039886475} +07/25/2024 11:33:50 - INFO - __main__ - Step 1825: {'lr': 0.0004999300798227773, 'samples': 87600, 'steps': 1824, 'loss/train': 3.454167366027832} +07/25/2024 11:33:51 - INFO - __main__ - Step 1826: {'lr': 0.0004999299553601192, 'samples': 87648, 'steps': 1825, 'loss/train': 3.8506784439086914} +07/25/2024 11:33:51 - INFO - __main__ - Step 1827: {'lr': 0.0004999298307867991, 'samples': 87696, 'steps': 1826, 'loss/train': 3.494570255279541} +07/25/2024 11:33:51 - INFO - __main__ - Step 1828: {'lr': 0.0004999297061028171, 'samples': 87744, 'steps': 1827, 'loss/train': 4.109969615936279} +07/25/2024 11:33:52 - INFO - __main__ - Step 1829: {'lr': 0.0004999295813081732, 'samples': 87792, 'steps': 1828, 'loss/train': 4.037136077880859} +07/25/2024 11:33:52 - INFO - __main__ - Step 1830: {'lr': 0.0004999294564028676, 'samples': 87840, 'steps': 1829, 'loss/train': 3.753833770751953} +07/25/2024 11:33:52 - INFO - __main__ - Step 1831: {'lr': 0.0004999293313869001, 'samples': 87888, 'steps': 1830, 'loss/train': 4.1162896156311035} +07/25/2024 11:33:52 - INFO - __main__ - Step 1832: {'lr': 0.000499929206260271, 'samples': 87936, 'steps': 1831, 'loss/train': 3.6657986640930176} +07/25/2024 11:33:53 - INFO - __main__ - Step 1833: {'lr': 0.0004999290810229802, 'samples': 87984, 'steps': 1832, 'loss/train': 4.500197887420654} +07/25/2024 11:33:53 - INFO - __main__ - Step 1834: {'lr': 0.0004999289556750278, 'samples': 88032, 'steps': 1833, 'loss/train': 3.983546495437622} +07/25/2024 11:33:53 - INFO - __main__ - Step 1835: {'lr': 0.0004999288302164138, 'samples': 88080, 'steps': 1834, 'loss/train': 1.358883023262024} +07/25/2024 11:33:54 - INFO - __main__ - Step 1836: {'lr': 0.0004999287046471385, 'samples': 88128, 'steps': 1835, 'loss/train': 3.0404558181762695} +07/25/2024 11:33:54 - INFO - __main__ - Step 1837: {'lr': 0.0004999285789672016, 'samples': 88176, 'steps': 1836, 'loss/train': 3.7440056800842285} +07/25/2024 11:33:54 - INFO - __main__ - Step 1838: {'lr': 0.0004999284531766034, 'samples': 88224, 'steps': 1837, 'loss/train': 4.008229732513428} +07/25/2024 11:33:54 - INFO - __main__ - Step 1839: {'lr': 0.0004999283272753438, 'samples': 88272, 'steps': 1838, 'loss/train': 3.7805206775665283} +07/25/2024 11:33:55 - INFO - __main__ - Step 1840: {'lr': 0.0004999282012634231, 'samples': 88320, 'steps': 1839, 'loss/train': 3.4210715293884277} +07/25/2024 11:33:55 - INFO - __main__ - Step 1841: {'lr': 0.000499928075140841, 'samples': 88368, 'steps': 1840, 'loss/train': 3.878589391708374} +07/25/2024 11:33:55 - INFO - __main__ - Step 1842: {'lr': 0.000499927948907598, 'samples': 88416, 'steps': 1841, 'loss/train': 3.7621641159057617} +07/25/2024 11:33:56 - INFO - __main__ - Step 1843: {'lr': 0.0004999278225636936, 'samples': 88464, 'steps': 1842, 'loss/train': 3.9038681983947754} +07/25/2024 11:33:56 - INFO - __main__ - Step 1844: {'lr': 0.0004999276961091284, 'samples': 88512, 'steps': 1843, 'loss/train': 4.281815052032471} +07/25/2024 11:33:56 - INFO - __main__ - Step 1845: {'lr': 0.0004999275695439022, 'samples': 88560, 'steps': 1844, 'loss/train': 3.7512757778167725} +07/25/2024 11:33:56 - INFO - __main__ - Step 1846: {'lr': 0.000499927442868015, 'samples': 88608, 'steps': 1845, 'loss/train': 3.7783613204956055} +07/25/2024 11:33:57 - INFO - __main__ - Step 1847: {'lr': 0.0004999273160814669, 'samples': 88656, 'steps': 1846, 'loss/train': 3.80804705619812} +07/25/2024 11:33:57 - INFO - __main__ - Step 1848: {'lr': 0.000499927189184258, 'samples': 88704, 'steps': 1847, 'loss/train': 1.6370625495910645} +07/25/2024 11:33:57 - INFO - __main__ - Step 1849: {'lr': 0.0004999270621763884, 'samples': 88752, 'steps': 1848, 'loss/train': 3.854491949081421} +07/25/2024 11:33:58 - INFO - __main__ - Step 1850: {'lr': 0.0004999269350578581, 'samples': 88800, 'steps': 1849, 'loss/train': 3.702065944671631} +07/25/2024 11:33:58 - INFO - __main__ - Step 1851: {'lr': 0.0004999268078286672, 'samples': 88848, 'steps': 1850, 'loss/train': 3.8735454082489014} +07/25/2024 11:33:58 - INFO - __main__ - Step 1852: {'lr': 0.0004999266804888156, 'samples': 88896, 'steps': 1851, 'loss/train': 4.408028602600098} +07/25/2024 11:33:58 - INFO - __main__ - Step 1853: {'lr': 0.0004999265530383036, 'samples': 88944, 'steps': 1852, 'loss/train': 4.354888916015625} +07/25/2024 11:33:59 - INFO - __main__ - Step 1854: {'lr': 0.000499926425477131, 'samples': 88992, 'steps': 1853, 'loss/train': 3.6876893043518066} +07/25/2024 11:33:59 - INFO - __main__ - Step 1855: {'lr': 0.0004999262978052979, 'samples': 89040, 'steps': 1854, 'loss/train': 3.909576416015625} +07/25/2024 11:33:59 - INFO - __main__ - Step 1856: {'lr': 0.0004999261700228047, 'samples': 89088, 'steps': 1855, 'loss/train': 4.2885260581970215} +07/25/2024 11:34:00 - INFO - __main__ - Step 1857: {'lr': 0.0004999260421296509, 'samples': 89136, 'steps': 1856, 'loss/train': 3.5569071769714355} +07/25/2024 11:34:00 - INFO - __main__ - Step 1858: {'lr': 0.0004999259141258371, 'samples': 89184, 'steps': 1857, 'loss/train': 2.4482169151306152} +07/25/2024 11:34:00 - INFO - __main__ - Step 1859: {'lr': 0.0004999257860113631, 'samples': 89232, 'steps': 1858, 'loss/train': 1.1926506757736206} +07/25/2024 11:34:00 - INFO - __main__ - Step 1860: {'lr': 0.0004999256577862288, 'samples': 89280, 'steps': 1859, 'loss/train': 3.426959753036499} +07/25/2024 11:34:01 - INFO - __main__ - Step 1861: {'lr': 0.0004999255294504345, 'samples': 89328, 'steps': 1860, 'loss/train': 3.927938938140869} +07/25/2024 11:34:01 - INFO - __main__ - Step 1862: {'lr': 0.0004999254010039803, 'samples': 89376, 'steps': 1861, 'loss/train': 3.870553731918335} +07/25/2024 11:34:01 - INFO - __main__ - Step 1863: {'lr': 0.0004999252724468659, 'samples': 89424, 'steps': 1862, 'loss/train': 3.574364423751831} +07/25/2024 11:34:02 - INFO - __main__ - Step 1864: {'lr': 0.0004999251437790917, 'samples': 89472, 'steps': 1863, 'loss/train': 3.995661735534668} +07/25/2024 11:34:02 - INFO - __main__ - Step 1865: {'lr': 0.0004999250150006576, 'samples': 89520, 'steps': 1864, 'loss/train': 3.9588890075683594} +07/25/2024 11:34:02 - INFO - __main__ - Step 1866: {'lr': 0.0004999248861115638, 'samples': 89568, 'steps': 1865, 'loss/train': 3.838151216506958} +07/25/2024 11:34:02 - INFO - __main__ - Step 1867: {'lr': 0.0004999247571118103, 'samples': 89616, 'steps': 1866, 'loss/train': 4.250670909881592} +07/25/2024 11:34:03 - INFO - __main__ - Step 1868: {'lr': 0.0004999246280013971, 'samples': 89664, 'steps': 1867, 'loss/train': 4.325772762298584} +07/25/2024 11:34:03 - INFO - __main__ - Step 1869: {'lr': 0.0004999244987803242, 'samples': 89712, 'steps': 1868, 'loss/train': 3.7147696018218994} +07/25/2024 11:34:03 - INFO - __main__ - Step 1870: {'lr': 0.0004999243694485917, 'samples': 89760, 'steps': 1869, 'loss/train': 4.084348201751709} +07/25/2024 11:34:04 - INFO - __main__ - Step 1871: {'lr': 0.0004999242400061997, 'samples': 89808, 'steps': 1870, 'loss/train': 4.876096248626709} +07/25/2024 11:34:04 - INFO - __main__ - Step 1872: {'lr': 0.0004999241104531484, 'samples': 89856, 'steps': 1871, 'loss/train': 4.513338088989258} +07/25/2024 11:34:04 - INFO - __main__ - Step 1873: {'lr': 0.0004999239807894376, 'samples': 89904, 'steps': 1872, 'loss/train': 4.260583877563477} +07/25/2024 11:34:04 - INFO - __main__ - Step 1874: {'lr': 0.0004999238510150674, 'samples': 89952, 'steps': 1873, 'loss/train': 3.648165702819824} +07/25/2024 11:34:05 - INFO - __main__ - Step 1875: {'lr': 0.000499923721130038, 'samples': 90000, 'steps': 1874, 'loss/train': 3.701159715652466} +07/25/2024 11:34:05 - INFO - __main__ - Step 1876: {'lr': 0.0004999235911343495, 'samples': 90048, 'steps': 1875, 'loss/train': 3.9182846546173096} +07/25/2024 11:34:05 - INFO - __main__ - Step 1877: {'lr': 0.0004999234610280017, 'samples': 90096, 'steps': 1876, 'loss/train': 4.0105485916137695} +07/25/2024 11:34:06 - INFO - __main__ - Step 1878: {'lr': 0.0004999233308109947, 'samples': 90144, 'steps': 1877, 'loss/train': 2.283745765686035} +07/25/2024 11:34:06 - INFO - __main__ - Step 1879: {'lr': 0.0004999232004833288, 'samples': 90192, 'steps': 1878, 'loss/train': 3.7173867225646973} +07/25/2024 11:34:06 - INFO - __main__ - Step 1880: {'lr': 0.0004999230700450039, 'samples': 90240, 'steps': 1879, 'loss/train': 4.537574768066406} +07/25/2024 11:34:06 - INFO - __main__ - Step 1881: {'lr': 0.00049992293949602, 'samples': 90288, 'steps': 1880, 'loss/train': 4.144777774810791} +07/25/2024 11:34:07 - INFO - __main__ - Step 1882: {'lr': 0.0004999228088363773, 'samples': 90336, 'steps': 1881, 'loss/train': 3.212167978286743} +07/25/2024 11:34:07 - INFO - __main__ - Step 1883: {'lr': 0.0004999226780660757, 'samples': 90384, 'steps': 1882, 'loss/train': 1.1260368824005127} +07/25/2024 11:34:07 - INFO - __main__ - Step 1884: {'lr': 0.0004999225471851153, 'samples': 90432, 'steps': 1883, 'loss/train': 4.262405872344971} +07/25/2024 11:34:08 - INFO - __main__ - Step 1885: {'lr': 0.0004999224161934964, 'samples': 90480, 'steps': 1884, 'loss/train': 3.671220064163208} +07/25/2024 11:34:08 - INFO - __main__ - Step 1886: {'lr': 0.0004999222850912187, 'samples': 90528, 'steps': 1885, 'loss/train': 3.9424986839294434} +07/25/2024 11:34:08 - INFO - __main__ - Step 1887: {'lr': 0.0004999221538782825, 'samples': 90576, 'steps': 1886, 'loss/train': 4.218197822570801} +07/25/2024 11:34:08 - INFO - __main__ - Step 1888: {'lr': 0.0004999220225546877, 'samples': 90624, 'steps': 1887, 'loss/train': 3.6347291469573975} +07/25/2024 11:34:09 - INFO - __main__ - Step 1889: {'lr': 0.0004999218911204345, 'samples': 90672, 'steps': 1888, 'loss/train': 3.698291778564453} +07/25/2024 11:34:09 - INFO - __main__ - Step 1890: {'lr': 0.0004999217595755228, 'samples': 90720, 'steps': 1889, 'loss/train': 3.290173292160034} +07/25/2024 11:34:09 - INFO - __main__ - Step 1891: {'lr': 0.0004999216279199528, 'samples': 90768, 'steps': 1890, 'loss/train': 4.341283321380615} +07/25/2024 11:34:10 - INFO - __main__ - Step 1892: {'lr': 0.0004999214961537245, 'samples': 90816, 'steps': 1891, 'loss/train': 3.8315465450286865} +07/25/2024 11:34:10 - INFO - __main__ - Step 1893: {'lr': 0.0004999213642768379, 'samples': 90864, 'steps': 1892, 'loss/train': 3.312495231628418} +07/25/2024 11:34:10 - INFO - __main__ - Step 1894: {'lr': 0.0004999212322892932, 'samples': 90912, 'steps': 1893, 'loss/train': 3.8931751251220703} +07/25/2024 11:34:10 - INFO - __main__ - Step 1895: {'lr': 0.0004999211001910904, 'samples': 90960, 'steps': 1894, 'loss/train': 4.320217609405518} +07/25/2024 11:34:11 - INFO - __main__ - Step 1896: {'lr': 0.0004999209679822295, 'samples': 91008, 'steps': 1895, 'loss/train': 3.768256902694702} +07/25/2024 11:34:11 - INFO - __main__ - Step 1897: {'lr': 0.0004999208356627106, 'samples': 91056, 'steps': 1896, 'loss/train': 3.809368848800659} +07/25/2024 11:34:11 - INFO - __main__ - Step 1898: {'lr': 0.0004999207032325338, 'samples': 91104, 'steps': 1897, 'loss/train': 3.410857677459717} +07/25/2024 11:34:12 - INFO - __main__ - Step 1899: {'lr': 0.000499920570691699, 'samples': 91152, 'steps': 1898, 'loss/train': 3.524233102798462} +07/25/2024 11:34:12 - INFO - __main__ - Step 1900: {'lr': 0.0004999204380402065, 'samples': 91200, 'steps': 1899, 'loss/train': 3.7678909301757812} +07/25/2024 11:34:12 - INFO - __main__ - Step 1901: {'lr': 0.0004999203052780562, 'samples': 91248, 'steps': 1900, 'loss/train': 3.727400541305542} +07/25/2024 11:34:12 - INFO - __main__ - Step 1902: {'lr': 0.0004999201724052482, 'samples': 91296, 'steps': 1901, 'loss/train': 4.0646772384643555} +07/25/2024 11:34:13 - INFO - __main__ - Step 1903: {'lr': 0.0004999200394217825, 'samples': 91344, 'steps': 1902, 'loss/train': 4.206108093261719} +07/25/2024 11:34:13 - INFO - __main__ - Step 1904: {'lr': 0.0004999199063276592, 'samples': 91392, 'steps': 1903, 'loss/train': 3.8898203372955322} +07/25/2024 11:34:13 - INFO - __main__ - Step 1905: {'lr': 0.0004999197731228784, 'samples': 91440, 'steps': 1904, 'loss/train': 3.6279542446136475} +07/25/2024 11:34:14 - INFO - __main__ - Step 1906: {'lr': 0.0004999196398074401, 'samples': 91488, 'steps': 1905, 'loss/train': 4.242966175079346} +07/25/2024 11:34:14 - INFO - __main__ - Step 1907: {'lr': 0.0004999195063813444, 'samples': 91536, 'steps': 1906, 'loss/train': 1.0362778902053833} +07/25/2024 11:34:14 - INFO - __main__ - Step 1908: {'lr': 0.0004999193728445914, 'samples': 91584, 'steps': 1907, 'loss/train': 4.951849460601807} +07/25/2024 11:34:14 - INFO - __main__ - Step 1909: {'lr': 0.0004999192391971811, 'samples': 91632, 'steps': 1908, 'loss/train': 3.4239566326141357} +07/25/2024 11:34:15 - INFO - __main__ - Step 1910: {'lr': 0.0004999191054391135, 'samples': 91680, 'steps': 1909, 'loss/train': 3.54115891456604} +07/25/2024 11:34:15 - INFO - __main__ - Step 1911: {'lr': 0.0004999189715703888, 'samples': 91728, 'steps': 1910, 'loss/train': 3.886373281478882} +07/25/2024 11:34:15 - INFO - __main__ - Step 1912: {'lr': 0.0004999188375910069, 'samples': 91776, 'steps': 1911, 'loss/train': 3.7118282318115234} +07/25/2024 11:34:16 - INFO - __main__ - Step 1913: {'lr': 0.000499918703500968, 'samples': 91824, 'steps': 1912, 'loss/train': 3.5732903480529785} +07/25/2024 11:34:16 - INFO - __main__ - Step 1914: {'lr': 0.0004999185693002721, 'samples': 91872, 'steps': 1913, 'loss/train': 3.872448682785034} +07/25/2024 11:34:16 - INFO - __main__ - Step 1915: {'lr': 0.0004999184349889193, 'samples': 91920, 'steps': 1914, 'loss/train': 5.015003204345703} +07/25/2024 11:34:16 - INFO - __main__ - Step 1916: {'lr': 0.0004999183005669095, 'samples': 91968, 'steps': 1915, 'loss/train': 4.565908432006836} +07/25/2024 11:34:17 - INFO - __main__ - Step 1917: {'lr': 0.0004999181660342429, 'samples': 92016, 'steps': 1916, 'loss/train': 3.798624038696289} +07/25/2024 11:34:17 - INFO - __main__ - Step 1918: {'lr': 0.0004999180313909196, 'samples': 92064, 'steps': 1917, 'loss/train': 4.126178741455078} +07/25/2024 11:34:17 - INFO - __main__ - Step 1919: {'lr': 0.0004999178966369395, 'samples': 92112, 'steps': 1918, 'loss/train': 4.165830135345459} +07/25/2024 11:34:18 - INFO - __main__ - Step 1920: {'lr': 0.0004999177617723029, 'samples': 92160, 'steps': 1919, 'loss/train': 3.4902150630950928} +07/25/2024 11:34:18 - INFO - __main__ - Step 1921: {'lr': 0.0004999176267970096, 'samples': 92208, 'steps': 1920, 'loss/train': 3.794621229171753} +07/25/2024 11:34:18 - INFO - __main__ - Step 1922: {'lr': 0.0004999174917110598, 'samples': 92256, 'steps': 1921, 'loss/train': 4.057922840118408} +07/25/2024 11:34:18 - INFO - __main__ - Step 1923: {'lr': 0.0004999173565144536, 'samples': 92304, 'steps': 1922, 'loss/train': 3.7085564136505127} +07/25/2024 11:34:19 - INFO - __main__ - Step 1924: {'lr': 0.0004999172212071909, 'samples': 92352, 'steps': 1923, 'loss/train': 3.721944808959961} +07/25/2024 11:34:19 - INFO - __main__ - Step 1925: {'lr': 0.000499917085789272, 'samples': 92400, 'steps': 1924, 'loss/train': 3.8861567974090576} +07/25/2024 11:34:19 - INFO - __main__ - Step 1926: {'lr': 0.0004999169502606967, 'samples': 92448, 'steps': 1925, 'loss/train': 3.8450777530670166} +07/25/2024 11:34:20 - INFO - __main__ - Step 1927: {'lr': 0.0004999168146214652, 'samples': 92496, 'steps': 1926, 'loss/train': 3.5661401748657227} +07/25/2024 11:34:20 - INFO - __main__ - Step 1928: {'lr': 0.0004999166788715775, 'samples': 92544, 'steps': 1927, 'loss/train': 3.30275559425354} +07/25/2024 11:34:20 - INFO - __main__ - Step 1929: {'lr': 0.0004999165430110338, 'samples': 92592, 'steps': 1928, 'loss/train': 3.621886730194092} +07/25/2024 11:34:20 - INFO - __main__ - Step 1930: {'lr': 0.000499916407039834, 'samples': 92640, 'steps': 1929, 'loss/train': 4.0623979568481445} +07/25/2024 11:34:21 - INFO - __main__ - Step 1931: {'lr': 0.0004999162709579783, 'samples': 92688, 'steps': 1930, 'loss/train': 3.5357487201690674} +07/25/2024 11:34:21 - INFO - __main__ - Step 1932: {'lr': 0.0004999161347654666, 'samples': 92736, 'steps': 1931, 'loss/train': 3.9121692180633545} +07/25/2024 11:34:21 - INFO - __main__ - Step 1933: {'lr': 0.0004999159984622989, 'samples': 92784, 'steps': 1932, 'loss/train': 3.6628708839416504} +07/25/2024 11:34:22 - INFO - __main__ - Step 1934: {'lr': 0.0004999158620484757, 'samples': 92832, 'steps': 1933, 'loss/train': 3.725064992904663} +07/25/2024 11:34:22 - INFO - __main__ - Step 1935: {'lr': 0.0004999157255239965, 'samples': 92880, 'steps': 1934, 'loss/train': 3.903564929962158} +07/25/2024 11:34:22 - INFO - __main__ - Step 1936: {'lr': 0.0004999155888888617, 'samples': 92928, 'steps': 1935, 'loss/train': 2.830479145050049} +07/25/2024 11:34:22 - INFO - __main__ - Step 1937: {'lr': 0.0004999154521430713, 'samples': 92976, 'steps': 1936, 'loss/train': 3.6097962856292725} +07/25/2024 11:34:23 - INFO - __main__ - Step 1938: {'lr': 0.0004999153152866253, 'samples': 93024, 'steps': 1937, 'loss/train': 3.774661064147949} +07/25/2024 11:34:23 - INFO - __main__ - Step 1939: {'lr': 0.0004999151783195238, 'samples': 93072, 'steps': 1938, 'loss/train': 3.1739249229431152} +07/25/2024 11:34:23 - INFO - __main__ - Step 1940: {'lr': 0.000499915041241767, 'samples': 93120, 'steps': 1939, 'loss/train': 4.643953800201416} +07/25/2024 11:34:24 - INFO - __main__ - Step 1941: {'lr': 0.0004999149040533547, 'samples': 93168, 'steps': 1940, 'loss/train': 3.920475482940674} +07/25/2024 11:34:24 - INFO - __main__ - Step 1942: {'lr': 0.0004999147667542871, 'samples': 93216, 'steps': 1941, 'loss/train': 3.3620262145996094} +07/25/2024 11:34:24 - INFO - __main__ - Step 1943: {'lr': 0.0004999146293445643, 'samples': 93264, 'steps': 1942, 'loss/train': 3.8377130031585693} +07/25/2024 11:34:24 - INFO - __main__ - Step 1944: {'lr': 0.0004999144918241862, 'samples': 93312, 'steps': 1943, 'loss/train': 3.725566864013672} +07/25/2024 11:34:25 - INFO - __main__ - Step 1945: {'lr': 0.0004999143541931531, 'samples': 93360, 'steps': 1944, 'loss/train': 4.065221309661865} +07/25/2024 11:34:25 - INFO - __main__ - Step 1946: {'lr': 0.0004999142164514648, 'samples': 93408, 'steps': 1945, 'loss/train': 3.8663651943206787} +07/25/2024 11:34:25 - INFO - __main__ - Step 1947: {'lr': 0.0004999140785991214, 'samples': 93456, 'steps': 1946, 'loss/train': 3.4081270694732666} +07/25/2024 11:34:26 - INFO - __main__ - Step 1948: {'lr': 0.0004999139406361233, 'samples': 93504, 'steps': 1947, 'loss/train': 3.7960705757141113} +07/25/2024 11:34:26 - INFO - __main__ - Step 1949: {'lr': 0.0004999138025624701, 'samples': 93552, 'steps': 1948, 'loss/train': 3.8998734951019287} +07/25/2024 11:34:26 - INFO - __main__ - Step 1950: {'lr': 0.0004999136643781622, 'samples': 93600, 'steps': 1949, 'loss/train': 3.6157960891723633} +07/25/2024 11:34:26 - INFO - __main__ - Step 1951: {'lr': 0.0004999135260831995, 'samples': 93648, 'steps': 1950, 'loss/train': 4.39229154586792} +07/25/2024 11:34:27 - INFO - __main__ - Step 1952: {'lr': 0.000499913387677582, 'samples': 93696, 'steps': 1951, 'loss/train': 3.352597236633301} +07/25/2024 11:34:27 - INFO - __main__ - Step 1953: {'lr': 0.00049991324916131, 'samples': 93744, 'steps': 1952, 'loss/train': 1.8482224941253662} +07/25/2024 11:34:27 - INFO - __main__ - Step 1954: {'lr': 0.0004999131105343833, 'samples': 93792, 'steps': 1953, 'loss/train': 4.886741638183594} +07/25/2024 11:34:28 - INFO - __main__ - Step 1955: {'lr': 0.0004999129717968021, 'samples': 93840, 'steps': 1954, 'loss/train': 4.907382488250732} +07/25/2024 11:34:28 - INFO - __main__ - Step 1956: {'lr': 0.0004999128329485665, 'samples': 93888, 'steps': 1955, 'loss/train': 3.0469188690185547} +07/25/2024 11:34:28 - INFO - __main__ - Step 1957: {'lr': 0.0004999126939896765, 'samples': 93936, 'steps': 1956, 'loss/train': 3.718168258666992} +07/25/2024 11:34:28 - INFO - __main__ - Step 1958: {'lr': 0.0004999125549201321, 'samples': 93984, 'steps': 1957, 'loss/train': 3.5857415199279785} +07/25/2024 11:34:29 - INFO - __main__ - Step 1959: {'lr': 0.0004999124157399335, 'samples': 94032, 'steps': 1958, 'loss/train': 4.051205635070801} +07/25/2024 11:34:29 - INFO - __main__ - Step 1960: {'lr': 0.0004999122764490807, 'samples': 94080, 'steps': 1959, 'loss/train': 3.6290714740753174} +07/25/2024 11:34:29 - INFO - __main__ - Step 1961: {'lr': 0.0004999121370475736, 'samples': 94128, 'steps': 1960, 'loss/train': 3.777845859527588} +07/25/2024 11:34:30 - INFO - __main__ - Step 1962: {'lr': 0.0004999119975354125, 'samples': 94176, 'steps': 1961, 'loss/train': 3.609647035598755} +07/25/2024 11:34:30 - INFO - __main__ - Step 1963: {'lr': 0.0004999118579125973, 'samples': 94224, 'steps': 1962, 'loss/train': 2.664808511734009} +07/25/2024 11:34:30 - INFO - __main__ - Step 1964: {'lr': 0.0004999117181791283, 'samples': 94272, 'steps': 1963, 'loss/train': 3.7527658939361572} +07/25/2024 11:34:30 - INFO - __main__ - Step 1965: {'lr': 0.0004999115783350052, 'samples': 94320, 'steps': 1964, 'loss/train': 3.646890163421631} +07/25/2024 11:34:31 - INFO - __main__ - Step 1966: {'lr': 0.0004999114383802284, 'samples': 94368, 'steps': 1965, 'loss/train': 3.8397257328033447} +07/25/2024 11:34:31 - INFO - __main__ - Step 1967: {'lr': 0.0004999112983147978, 'samples': 94416, 'steps': 1966, 'loss/train': 3.736668586730957} +07/25/2024 11:34:31 - INFO - __main__ - Step 1968: {'lr': 0.0004999111581387133, 'samples': 94464, 'steps': 1967, 'loss/train': 3.950467348098755} +07/25/2024 11:34:32 - INFO - __main__ - Step 1969: {'lr': 0.0004999110178519753, 'samples': 94512, 'steps': 1968, 'loss/train': 3.88277006149292} +07/25/2024 11:34:32 - INFO - __main__ - Step 1970: {'lr': 0.0004999108774545837, 'samples': 94560, 'steps': 1969, 'loss/train': 3.0247385501861572} +07/25/2024 11:34:32 - INFO - __main__ - Step 1971: {'lr': 0.0004999107369465386, 'samples': 94608, 'steps': 1970, 'loss/train': 3.7679710388183594} +07/25/2024 11:34:32 - INFO - __main__ - Step 1972: {'lr': 0.00049991059632784, 'samples': 94656, 'steps': 1971, 'loss/train': 3.77113938331604} +07/25/2024 11:34:33 - INFO - __main__ - Step 1973: {'lr': 0.0004999104555984879, 'samples': 94704, 'steps': 1972, 'loss/train': 3.258164167404175} +07/25/2024 11:34:33 - INFO - __main__ - Step 1974: {'lr': 0.0004999103147584826, 'samples': 94752, 'steps': 1973, 'loss/train': 3.3205983638763428} +07/25/2024 11:34:33 - INFO - __main__ - Step 1975: {'lr': 0.0004999101738078239, 'samples': 94800, 'steps': 1974, 'loss/train': 4.104626178741455} +07/25/2024 11:34:34 - INFO - __main__ - Step 1976: {'lr': 0.000499910032746512, 'samples': 94848, 'steps': 1975, 'loss/train': 4.056273937225342} +07/25/2024 11:34:34 - INFO - __main__ - Step 1977: {'lr': 0.0004999098915745469, 'samples': 94896, 'steps': 1976, 'loss/train': 1.137073278427124} +07/25/2024 11:34:34 - INFO - __main__ - Step 1978: {'lr': 0.0004999097502919288, 'samples': 94944, 'steps': 1977, 'loss/train': 3.553925037384033} +07/25/2024 11:34:34 - INFO - __main__ - Step 1979: {'lr': 0.0004999096088986576, 'samples': 94992, 'steps': 1978, 'loss/train': 3.9794578552246094} +07/25/2024 11:34:35 - INFO - __main__ - Step 1980: {'lr': 0.0004999094673947336, 'samples': 95040, 'steps': 1979, 'loss/train': 3.5545921325683594} +07/25/2024 11:34:35 - INFO - __main__ - Step 1981: {'lr': 0.0004999093257801564, 'samples': 95088, 'steps': 1980, 'loss/train': 4.085680961608887} +07/25/2024 11:34:35 - INFO - __main__ - Step 1982: {'lr': 0.0004999091840549266, 'samples': 95136, 'steps': 1981, 'loss/train': 3.7346060276031494} +07/25/2024 11:34:36 - INFO - __main__ - Step 1983: {'lr': 0.0004999090422190439, 'samples': 95184, 'steps': 1982, 'loss/train': 4.1375555992126465} +07/25/2024 11:34:36 - INFO - __main__ - Step 1984: {'lr': 0.0004999089002725084, 'samples': 95232, 'steps': 1983, 'loss/train': 3.8425509929656982} +07/25/2024 11:34:36 - INFO - __main__ - Step 1985: {'lr': 0.0004999087582153204, 'samples': 95280, 'steps': 1984, 'loss/train': 3.86224365234375} +07/25/2024 11:34:36 - INFO - __main__ - Step 1986: {'lr': 0.0004999086160474797, 'samples': 95328, 'steps': 1985, 'loss/train': 3.8867874145507812} +07/25/2024 11:34:37 - INFO - __main__ - Step 1987: {'lr': 0.0004999084737689865, 'samples': 95376, 'steps': 1986, 'loss/train': 2.6210217475891113} +07/25/2024 11:34:37 - INFO - __main__ - Step 1988: {'lr': 0.0004999083313798409, 'samples': 95424, 'steps': 1987, 'loss/train': 3.7085952758789062} +07/25/2024 11:34:37 - INFO - __main__ - Step 1989: {'lr': 0.0004999081888800428, 'samples': 95472, 'steps': 1988, 'loss/train': 3.596893787384033} +07/25/2024 11:34:38 - INFO - __main__ - Step 1990: {'lr': 0.0004999080462695923, 'samples': 95520, 'steps': 1989, 'loss/train': 3.8826372623443604} +07/25/2024 11:34:38 - INFO - __main__ - Step 1991: {'lr': 0.0004999079035484896, 'samples': 95568, 'steps': 1990, 'loss/train': 3.869541883468628} +07/25/2024 11:34:38 - INFO - __main__ - Step 1992: {'lr': 0.0004999077607167347, 'samples': 95616, 'steps': 1991, 'loss/train': 3.79087233543396} +07/25/2024 11:34:38 - INFO - __main__ - Step 1993: {'lr': 0.0004999076177743276, 'samples': 95664, 'steps': 1992, 'loss/train': 4.172741889953613} +07/25/2024 11:34:39 - INFO - __main__ - Step 1994: {'lr': 0.0004999074747212684, 'samples': 95712, 'steps': 1993, 'loss/train': 2.011597156524658} +07/25/2024 11:34:39 - INFO - __main__ - Step 1995: {'lr': 0.0004999073315575573, 'samples': 95760, 'steps': 1994, 'loss/train': 3.869655132293701} +07/25/2024 11:34:39 - INFO - __main__ - Step 1996: {'lr': 0.0004999071882831941, 'samples': 95808, 'steps': 1995, 'loss/train': 3.974590301513672} +07/25/2024 11:34:39 - INFO - __main__ - Step 1997: {'lr': 0.0004999070448981791, 'samples': 95856, 'steps': 1996, 'loss/train': 3.2798871994018555} +07/25/2024 11:34:40 - INFO - __main__ - Step 1998: {'lr': 0.0004999069014025121, 'samples': 95904, 'steps': 1997, 'loss/train': 3.1956748962402344} +07/25/2024 11:34:40 - INFO - __main__ - Step 1999: {'lr': 0.0004999067577961936, 'samples': 95952, 'steps': 1998, 'loss/train': 3.609062433242798} +07/25/2024 11:34:40 - INFO - __main__ - Step 2000: {'lr': 0.0004999066140792231, 'samples': 96000, 'steps': 1999, 'loss/train': 2.6600680351257324} +07/25/2024 11:34:41 - INFO - __main__ - Step 2001: {'lr': 0.000499906470251601, 'samples': 96048, 'steps': 2000, 'loss/train': 0.8637572526931763} +07/25/2024 11:34:41 - INFO - __main__ - Step 2002: {'lr': 0.0004999063263133275, 'samples': 96096, 'steps': 2001, 'loss/train': 2.602491617202759} +07/25/2024 11:34:41 - INFO - __main__ - Step 2003: {'lr': 0.0004999061822644024, 'samples': 96144, 'steps': 2002, 'loss/train': 3.7685201168060303} +07/25/2024 11:34:41 - INFO - __main__ - Step 2004: {'lr': 0.0004999060381048259, 'samples': 96192, 'steps': 2003, 'loss/train': 4.718381881713867} +07/25/2024 11:34:42 - INFO - __main__ - Step 2005: {'lr': 0.0004999058938345978, 'samples': 96240, 'steps': 2004, 'loss/train': 3.6185035705566406} +07/25/2024 11:34:42 - INFO - __main__ - Step 2006: {'lr': 0.0004999057494537185, 'samples': 96288, 'steps': 2005, 'loss/train': 3.6369006633758545} +07/25/2024 11:34:42 - INFO - __main__ - Step 2007: {'lr': 0.0004999056049621879, 'samples': 96336, 'steps': 2006, 'loss/train': 4.1324076652526855} +07/25/2024 11:34:43 - INFO - __main__ - Step 2008: {'lr': 0.0004999054603600061, 'samples': 96384, 'steps': 2007, 'loss/train': 3.7475554943084717} +07/25/2024 11:34:43 - INFO - __main__ - Step 2009: {'lr': 0.0004999053156471733, 'samples': 96432, 'steps': 2008, 'loss/train': 3.5755109786987305} +07/25/2024 11:34:43 - INFO - __main__ - Step 2010: {'lr': 0.0004999051708236893, 'samples': 96480, 'steps': 2009, 'loss/train': 3.889533281326294} +07/25/2024 11:34:43 - INFO - __main__ - Step 2011: {'lr': 0.0004999050258895543, 'samples': 96528, 'steps': 2010, 'loss/train': 2.6299960613250732} +07/25/2024 11:34:44 - INFO - __main__ - Step 2012: {'lr': 0.0004999048808447685, 'samples': 96576, 'steps': 2011, 'loss/train': 3.9671335220336914} +07/25/2024 11:34:44 - INFO - __main__ - Step 2013: {'lr': 0.0004999047356893316, 'samples': 96624, 'steps': 2012, 'loss/train': 3.4806857109069824} +07/25/2024 11:34:44 - INFO - __main__ - Step 2014: {'lr': 0.000499904590423244, 'samples': 96672, 'steps': 2013, 'loss/train': 4.101968288421631} +07/25/2024 11:34:45 - INFO - __main__ - Step 2015: {'lr': 0.0004999044450465056, 'samples': 96720, 'steps': 2014, 'loss/train': 3.175248861312866} +07/25/2024 11:34:45 - INFO - __main__ - Step 2016: {'lr': 0.0004999042995591166, 'samples': 96768, 'steps': 2015, 'loss/train': 3.5456395149230957} +07/25/2024 11:34:45 - INFO - __main__ - Step 2017: {'lr': 0.0004999041539610769, 'samples': 96816, 'steps': 2016, 'loss/train': 4.088836669921875} +07/25/2024 11:34:45 - INFO - __main__ - Step 2018: {'lr': 0.0004999040082523866, 'samples': 96864, 'steps': 2017, 'loss/train': 3.259402275085449} +07/25/2024 11:34:46 - INFO - __main__ - Step 2019: {'lr': 0.000499903862433046, 'samples': 96912, 'steps': 2018, 'loss/train': 3.617086887359619} +07/25/2024 11:34:46 - INFO - __main__ - Step 2020: {'lr': 0.0004999037165030548, 'samples': 96960, 'steps': 2019, 'loss/train': 3.599346876144409} +07/25/2024 11:34:46 - INFO - __main__ - Step 2021: {'lr': 0.0004999035704624133, 'samples': 97008, 'steps': 2020, 'loss/train': 3.4293212890625} +07/25/2024 11:34:47 - INFO - __main__ - Step 2022: {'lr': 0.0004999034243111215, 'samples': 97056, 'steps': 2021, 'loss/train': 3.380282402038574} +07/25/2024 11:34:47 - INFO - __main__ - Step 2023: {'lr': 0.0004999032780491795, 'samples': 97104, 'steps': 2022, 'loss/train': 3.9994640350341797} +07/25/2024 11:34:47 - INFO - __main__ - Step 2024: {'lr': 0.0004999031316765873, 'samples': 97152, 'steps': 2023, 'loss/train': 3.453049659729004} +07/25/2024 11:34:47 - INFO - __main__ - Step 2025: {'lr': 0.000499902985193345, 'samples': 97200, 'steps': 2024, 'loss/train': 0.8950731754302979} +07/25/2024 11:34:48 - INFO - __main__ - Step 2026: {'lr': 0.0004999028385994526, 'samples': 97248, 'steps': 2025, 'loss/train': 3.327817678451538} +07/25/2024 11:34:48 - INFO - __main__ - Step 2027: {'lr': 0.0004999026918949103, 'samples': 97296, 'steps': 2026, 'loss/train': 3.69390606880188} +07/25/2024 11:34:48 - INFO - __main__ - Step 2028: {'lr': 0.000499902545079718, 'samples': 97344, 'steps': 2027, 'loss/train': 3.4952924251556396} +07/25/2024 11:34:49 - INFO - __main__ - Step 2029: {'lr': 0.0004999023981538758, 'samples': 97392, 'steps': 2028, 'loss/train': 3.570594310760498} +07/25/2024 11:34:49 - INFO - __main__ - Step 2030: {'lr': 0.000499902251117384, 'samples': 97440, 'steps': 2029, 'loss/train': 5.313755512237549} +07/25/2024 11:34:49 - INFO - __main__ - Step 2031: {'lr': 0.0004999021039702424, 'samples': 97488, 'steps': 2030, 'loss/train': 3.783790111541748} +07/25/2024 11:34:49 - INFO - __main__ - Step 2032: {'lr': 0.0004999019567124512, 'samples': 97536, 'steps': 2031, 'loss/train': 4.2274603843688965} +07/25/2024 11:34:50 - INFO - __main__ - Step 2033: {'lr': 0.0004999018093440104, 'samples': 97584, 'steps': 2032, 'loss/train': 3.119685649871826} +07/25/2024 11:34:50 - INFO - __main__ - Step 2034: {'lr': 0.00049990166186492, 'samples': 97632, 'steps': 2033, 'loss/train': 3.5261154174804688} +07/25/2024 11:34:50 - INFO - __main__ - Step 2035: {'lr': 0.0004999015142751802, 'samples': 97680, 'steps': 2034, 'loss/train': 2.555929660797119} +07/25/2024 11:34:51 - INFO - __main__ - Step 2036: {'lr': 0.0004999013665747911, 'samples': 97728, 'steps': 2035, 'loss/train': 3.296757936477661} +07/25/2024 11:34:51 - INFO - __main__ - Step 2037: {'lr': 0.0004999012187637526, 'samples': 97776, 'steps': 2036, 'loss/train': 4.193873405456543} +07/25/2024 11:34:51 - INFO - __main__ - Step 2038: {'lr': 0.0004999010708420648, 'samples': 97824, 'steps': 2037, 'loss/train': 3.897965669631958} +07/25/2024 11:34:51 - INFO - __main__ - Step 2039: {'lr': 0.0004999009228097279, 'samples': 97872, 'steps': 2038, 'loss/train': 3.9612977504730225} +07/25/2024 11:34:52 - INFO - __main__ - Step 2040: {'lr': 0.0004999007746667417, 'samples': 97920, 'steps': 2039, 'loss/train': 3.4291181564331055} +07/25/2024 11:34:52 - INFO - __main__ - Step 2041: {'lr': 0.0004999006264131067, 'samples': 97968, 'steps': 2040, 'loss/train': 3.6525893211364746} +07/25/2024 11:34:52 - INFO - __main__ - Step 2042: {'lr': 0.0004999004780488226, 'samples': 98016, 'steps': 2041, 'loss/train': 4.212245464324951} +07/25/2024 11:34:53 - INFO - __main__ - Step 2043: {'lr': 0.0004999003295738894, 'samples': 98064, 'steps': 2042, 'loss/train': 3.1420962810516357} +07/25/2024 11:34:53 - INFO - __main__ - Step 2044: {'lr': 0.0004999001809883075, 'samples': 98112, 'steps': 2043, 'loss/train': 3.1215808391571045} +07/25/2024 11:34:53 - INFO - __main__ - Step 2045: {'lr': 0.0004999000322920768, 'samples': 98160, 'steps': 2044, 'loss/train': 3.0600879192352295} +07/25/2024 11:34:53 - INFO - __main__ - Step 2046: {'lr': 0.0004998998834851974, 'samples': 98208, 'steps': 2045, 'loss/train': 3.997462034225464} +07/25/2024 11:34:54 - INFO - __main__ - Step 2047: {'lr': 0.0004998997345676692, 'samples': 98256, 'steps': 2046, 'loss/train': 3.879967451095581} +07/25/2024 11:34:54 - INFO - __main__ - Step 2048: {'lr': 0.0004998995855394925, 'samples': 98304, 'steps': 2047, 'loss/train': 3.854522943496704} +07/25/2024 11:34:54 - INFO - __main__ - Step 2049: {'lr': 0.0004998994364006673, 'samples': 98352, 'steps': 2048, 'loss/train': 1.4558027982711792} +07/25/2024 11:34:55 - INFO - __main__ - Step 2050: {'lr': 0.0004998992871511935, 'samples': 98400, 'steps': 2049, 'loss/train': 3.718463897705078} +07/25/2024 11:34:55 - INFO - __main__ - Step 2051: {'lr': 0.0004998991377910714, 'samples': 98448, 'steps': 2050, 'loss/train': 4.084470272064209} +07/25/2024 11:34:55 - INFO - __main__ - Step 2052: {'lr': 0.0004998989883203008, 'samples': 98496, 'steps': 2051, 'loss/train': 3.29062557220459} +07/25/2024 11:34:55 - INFO - __main__ - Step 2053: {'lr': 0.0004998988387388822, 'samples': 98544, 'steps': 2052, 'loss/train': 3.1935575008392334} +07/25/2024 11:34:56 - INFO - __main__ - Step 2054: {'lr': 0.0004998986890468152, 'samples': 98592, 'steps': 2053, 'loss/train': 3.457812786102295} +07/25/2024 11:34:56 - INFO - __main__ - Step 2055: {'lr': 0.0004998985392441001, 'samples': 98640, 'steps': 2054, 'loss/train': 5.1015706062316895} +07/25/2024 11:34:56 - INFO - __main__ - Step 2056: {'lr': 0.0004998983893307371, 'samples': 98688, 'steps': 2055, 'loss/train': 3.4137017726898193} +07/25/2024 11:34:57 - INFO - __main__ - Step 2057: {'lr': 0.0004998982393067258, 'samples': 98736, 'steps': 2056, 'loss/train': 3.887620210647583} +07/25/2024 11:34:57 - INFO - __main__ - Step 2058: {'lr': 0.0004998980891720668, 'samples': 98784, 'steps': 2057, 'loss/train': 2.9627575874328613} +07/25/2024 11:34:57 - INFO - __main__ - Step 2059: {'lr': 0.0004998979389267598, 'samples': 98832, 'steps': 2058, 'loss/train': 2.6557629108428955} +07/25/2024 11:34:57 - INFO - __main__ - Step 2060: {'lr': 0.000499897788570805, 'samples': 98880, 'steps': 2059, 'loss/train': 2.8275952339172363} +07/25/2024 11:34:58 - INFO - __main__ - Step 2061: {'lr': 0.0004998976381042025, 'samples': 98928, 'steps': 2060, 'loss/train': 4.219286918640137} +07/25/2024 11:34:58 - INFO - __main__ - Step 2062: {'lr': 0.0004998974875269524, 'samples': 98976, 'steps': 2061, 'loss/train': 3.974642753601074} +07/25/2024 11:34:58 - INFO - __main__ - Step 2063: {'lr': 0.0004998973368390546, 'samples': 99024, 'steps': 2062, 'loss/train': 4.209718704223633} +07/25/2024 11:34:59 - INFO - __main__ - Step 2064: {'lr': 0.0004998971860405093, 'samples': 99072, 'steps': 2063, 'loss/train': 3.692730665206909} +07/25/2024 11:34:59 - INFO - __main__ - Step 2065: {'lr': 0.0004998970351313165, 'samples': 99120, 'steps': 2064, 'loss/train': 3.7789254188537598} +07/25/2024 11:34:59 - INFO - __main__ - Step 2066: {'lr': 0.0004998968841114763, 'samples': 99168, 'steps': 2065, 'loss/train': 3.565744638442993} +07/25/2024 11:34:59 - INFO - __main__ - Step 2067: {'lr': 0.0004998967329809887, 'samples': 99216, 'steps': 2066, 'loss/train': 3.5814521312713623} +07/25/2024 11:35:00 - INFO - __main__ - Step 2068: {'lr': 0.0004998965817398539, 'samples': 99264, 'steps': 2067, 'loss/train': 3.442387819290161} +07/25/2024 11:35:00 - INFO - __main__ - Step 2069: {'lr': 0.000499896430388072, 'samples': 99312, 'steps': 2068, 'loss/train': 3.2417328357696533} +07/25/2024 11:35:00 - INFO - __main__ - Step 2070: {'lr': 0.0004998962789256428, 'samples': 99360, 'steps': 2069, 'loss/train': 3.769089698791504} +07/25/2024 11:35:01 - INFO - __main__ - Step 2071: {'lr': 0.0004998961273525665, 'samples': 99408, 'steps': 2070, 'loss/train': 3.6496200561523438} +07/25/2024 11:35:01 - INFO - __main__ - Step 2072: {'lr': 0.0004998959756688433, 'samples': 99456, 'steps': 2071, 'loss/train': 3.800590991973877} +07/25/2024 11:35:01 - INFO - __main__ - Step 2073: {'lr': 0.0004998958238744732, 'samples': 99504, 'steps': 2072, 'loss/train': 1.0204880237579346} +07/25/2024 11:35:01 - INFO - __main__ - Step 2074: {'lr': 0.0004998956719694561, 'samples': 99552, 'steps': 2073, 'loss/train': 2.3979201316833496} +07/25/2024 11:35:02 - INFO - __main__ - Step 2075: {'lr': 0.0004998955199537922, 'samples': 99600, 'steps': 2074, 'loss/train': 3.3089025020599365} +07/25/2024 11:35:02 - INFO - __main__ - Step 2076: {'lr': 0.0004998953678274818, 'samples': 99648, 'steps': 2075, 'loss/train': 3.768810510635376} +07/25/2024 11:35:02 - INFO - __main__ - Step 2077: {'lr': 0.0004998952155905246, 'samples': 99696, 'steps': 2076, 'loss/train': 3.426759958267212} +07/25/2024 11:35:03 - INFO - __main__ - Step 2078: {'lr': 0.0004998950632429206, 'samples': 99744, 'steps': 2077, 'loss/train': 3.8543429374694824} +07/25/2024 11:35:03 - INFO - __main__ - Step 2079: {'lr': 0.0004998949107846703, 'samples': 99792, 'steps': 2078, 'loss/train': 4.30996561050415} +07/25/2024 11:35:03 - INFO - __main__ - Step 2080: {'lr': 0.0004998947582157734, 'samples': 99840, 'steps': 2079, 'loss/train': 2.9532153606414795} +07/25/2024 11:35:03 - INFO - __main__ - Step 2081: {'lr': 0.0004998946055362302, 'samples': 99888, 'steps': 2080, 'loss/train': 4.3216962814331055} +07/25/2024 11:35:04 - INFO - __main__ - Step 2082: {'lr': 0.0004998944527460407, 'samples': 99936, 'steps': 2081, 'loss/train': 2.930724859237671} +07/25/2024 11:35:04 - INFO - __main__ - Step 2083: {'lr': 0.0004998942998452048, 'samples': 99984, 'steps': 2082, 'loss/train': 3.4522523880004883} +07/25/2024 11:35:04 - INFO - __main__ - Step 2084: {'lr': 0.0004998941468337229, 'samples': 100032, 'steps': 2083, 'loss/train': 2.7158079147338867} +07/25/2024 11:35:05 - INFO - __main__ - Step 2085: {'lr': 0.0004998939937115947, 'samples': 100080, 'steps': 2084, 'loss/train': 3.5071187019348145} +07/25/2024 11:35:05 - INFO - __main__ - Step 2086: {'lr': 0.0004998938404788205, 'samples': 100128, 'steps': 2085, 'loss/train': 3.874145984649658} +07/25/2024 11:35:05 - INFO - __main__ - Step 2087: {'lr': 0.0004998936871354004, 'samples': 100176, 'steps': 2086, 'loss/train': 3.797868490219116} +07/25/2024 11:35:05 - INFO - __main__ - Step 2088: {'lr': 0.0004998935336813343, 'samples': 100224, 'steps': 2087, 'loss/train': 3.324153184890747} +07/25/2024 11:35:06 - INFO - __main__ - Step 2089: {'lr': 0.0004998933801166223, 'samples': 100272, 'steps': 2088, 'loss/train': 3.94594407081604} +07/25/2024 11:35:06 - INFO - __main__ - Step 2090: {'lr': 0.0004998932264412646, 'samples': 100320, 'steps': 2089, 'loss/train': 3.0591378211975098} +07/25/2024 11:35:06 - INFO - __main__ - Step 2091: {'lr': 0.0004998930726552611, 'samples': 100368, 'steps': 2090, 'loss/train': 3.957811117172241} +07/25/2024 11:35:07 - INFO - __main__ - Step 2092: {'lr': 0.000499892918758612, 'samples': 100416, 'steps': 2091, 'loss/train': 1.4333109855651855} +07/25/2024 11:35:07 - INFO - __main__ - Step 2093: {'lr': 0.0004998927647513172, 'samples': 100464, 'steps': 2092, 'loss/train': 3.020918607711792} +07/25/2024 11:35:07 - INFO - __main__ - Step 2094: {'lr': 0.000499892610633377, 'samples': 100512, 'steps': 2093, 'loss/train': 3.3568267822265625} +07/25/2024 11:35:07 - INFO - __main__ - Step 2095: {'lr': 0.0004998924564047914, 'samples': 100560, 'steps': 2094, 'loss/train': 3.927950382232666} +07/25/2024 11:35:08 - INFO - __main__ - Step 2096: {'lr': 0.0004998923020655603, 'samples': 100608, 'steps': 2095, 'loss/train': 3.336735963821411} +07/25/2024 11:35:08 - INFO - __main__ - Step 2097: {'lr': 0.0004998921476156839, 'samples': 100656, 'steps': 2096, 'loss/train': 1.0573985576629639} +07/25/2024 11:35:08 - INFO - __main__ - Step 2098: {'lr': 0.0004998919930551624, 'samples': 100704, 'steps': 2097, 'loss/train': 2.2309627532958984} +07/25/2024 11:35:09 - INFO - __main__ - Step 2099: {'lr': 0.0004998918383839956, 'samples': 100752, 'steps': 2098, 'loss/train': 3.846550464630127} +07/25/2024 11:35:09 - INFO - __main__ - Step 2100: {'lr': 0.0004998916836021838, 'samples': 100800, 'steps': 2099, 'loss/train': 3.3447823524475098} +07/25/2024 11:35:09 - INFO - __main__ - Step 2101: {'lr': 0.0004998915287097269, 'samples': 100848, 'steps': 2100, 'loss/train': 3.29022479057312} +07/25/2024 11:35:09 - INFO - __main__ - Step 2102: {'lr': 0.0004998913737066249, 'samples': 100896, 'steps': 2101, 'loss/train': 3.5587449073791504} +07/25/2024 11:35:10 - INFO - __main__ - Step 2103: {'lr': 0.0004998912185928782, 'samples': 100944, 'steps': 2102, 'loss/train': 3.567246913909912} +07/25/2024 11:35:10 - INFO - __main__ - Step 2104: {'lr': 0.0004998910633684866, 'samples': 100992, 'steps': 2103, 'loss/train': 3.7553369998931885} +07/25/2024 11:35:10 - INFO - __main__ - Step 2105: {'lr': 0.0004998909080334503, 'samples': 101040, 'steps': 2104, 'loss/train': 3.7273073196411133} +07/25/2024 11:35:11 - INFO - __main__ - Step 2106: {'lr': 0.0004998907525877691, 'samples': 101088, 'steps': 2105, 'loss/train': 3.9325170516967773} +07/25/2024 11:35:11 - INFO - __main__ - Step 2107: {'lr': 0.0004998905970314435, 'samples': 101136, 'steps': 2106, 'loss/train': 4.023120403289795} +07/25/2024 11:35:11 - INFO - __main__ - Step 2108: {'lr': 0.0004998904413644733, 'samples': 101184, 'steps': 2107, 'loss/train': 2.388300895690918} +07/25/2024 11:35:11 - INFO - __main__ - Step 2109: {'lr': 0.0004998902855868585, 'samples': 101232, 'steps': 2108, 'loss/train': 3.324606418609619} +07/25/2024 11:35:12 - INFO - __main__ - Step 2110: {'lr': 0.0004998901296985995, 'samples': 101280, 'steps': 2109, 'loss/train': 3.3073227405548096} +07/25/2024 11:35:12 - INFO - __main__ - Step 2111: {'lr': 0.000499889973699696, 'samples': 101328, 'steps': 2110, 'loss/train': 4.214605331420898} +07/25/2024 11:35:12 - INFO - __main__ - Step 2112: {'lr': 0.0004998898175901481, 'samples': 101376, 'steps': 2111, 'loss/train': 3.1528561115264893} +07/25/2024 11:35:12 - INFO - __main__ - Step 2113: {'lr': 0.0004998896613699563, 'samples': 101424, 'steps': 2112, 'loss/train': 4.081486225128174} +07/25/2024 11:35:13 - INFO - __main__ - Step 2114: {'lr': 0.0004998895050391201, 'samples': 101472, 'steps': 2113, 'loss/train': 2.934687376022339} +07/25/2024 11:35:13 - INFO - __main__ - Step 2115: {'lr': 0.00049988934859764, 'samples': 101520, 'steps': 2114, 'loss/train': 3.853386402130127} +07/25/2024 11:35:13 - INFO - __main__ - Step 2116: {'lr': 0.0004998891920455158, 'samples': 101568, 'steps': 2115, 'loss/train': 3.801435708999634} +07/25/2024 11:35:14 - INFO - __main__ - Step 2117: {'lr': 0.0004998890353827476, 'samples': 101616, 'steps': 2116, 'loss/train': 2.7578132152557373} +07/25/2024 11:35:14 - INFO - __main__ - Step 2118: {'lr': 0.0004998888786093357, 'samples': 101664, 'steps': 2117, 'loss/train': 3.5421316623687744} +07/25/2024 11:35:14 - INFO - __main__ - Step 2119: {'lr': 0.0004998887217252799, 'samples': 101712, 'steps': 2118, 'loss/train': 4.38891077041626} +07/25/2024 11:35:14 - INFO - __main__ - Step 2120: {'lr': 0.0004998885647305804, 'samples': 101760, 'steps': 2119, 'loss/train': 3.1860926151275635} +07/25/2024 11:35:15 - INFO - __main__ - Step 2121: {'lr': 0.0004998884076252373, 'samples': 101808, 'steps': 2120, 'loss/train': 0.8411234617233276} +07/25/2024 11:35:15 - INFO - __main__ - Step 2122: {'lr': 0.0004998882504092506, 'samples': 101856, 'steps': 2121, 'loss/train': 3.745351791381836} +07/25/2024 11:35:15 - INFO - __main__ - Step 2123: {'lr': 0.0004998880930826203, 'samples': 101904, 'steps': 2122, 'loss/train': 3.2076170444488525} +07/25/2024 11:35:16 - INFO - __main__ - Step 2124: {'lr': 0.0004998879356453466, 'samples': 101952, 'steps': 2123, 'loss/train': 3.4744882583618164} +07/25/2024 11:35:16 - INFO - __main__ - Step 2125: {'lr': 0.0004998877780974296, 'samples': 102000, 'steps': 2124, 'loss/train': 3.612835168838501} +07/25/2024 11:35:16 - INFO - __main__ - Step 2126: {'lr': 0.0004998876204388692, 'samples': 102048, 'steps': 2125, 'loss/train': 2.857327938079834} +07/25/2024 11:35:16 - INFO - __main__ - Step 2127: {'lr': 0.0004998874626696656, 'samples': 102096, 'steps': 2126, 'loss/train': 3.242377996444702} +07/25/2024 11:35:17 - INFO - __main__ - Step 2128: {'lr': 0.0004998873047898188, 'samples': 102144, 'steps': 2127, 'loss/train': 3.686047315597534} +07/25/2024 11:35:17 - INFO - __main__ - Step 2129: {'lr': 0.0004998871467993288, 'samples': 102192, 'steps': 2128, 'loss/train': 3.545987606048584} +07/25/2024 11:35:17 - INFO - __main__ - Step 2130: {'lr': 0.000499886988698196, 'samples': 102240, 'steps': 2129, 'loss/train': 3.186488151550293} +07/25/2024 11:35:18 - INFO - __main__ - Step 2131: {'lr': 0.0004998868304864201, 'samples': 102288, 'steps': 2130, 'loss/train': 3.649097442626953} +07/25/2024 11:35:18 - INFO - __main__ - Step 2132: {'lr': 0.0004998866721640014, 'samples': 102336, 'steps': 2131, 'loss/train': 2.3548669815063477} +07/25/2024 11:35:18 - INFO - __main__ - Step 2133: {'lr': 0.0004998865137309399, 'samples': 102384, 'steps': 2132, 'loss/train': 2.6709070205688477} +07/25/2024 11:35:18 - INFO - __main__ - Step 2134: {'lr': 0.0004998863551872355, 'samples': 102432, 'steps': 2133, 'loss/train': 3.1160693168640137} +07/25/2024 11:35:19 - INFO - __main__ - Step 2135: {'lr': 0.0004998861965328885, 'samples': 102480, 'steps': 2134, 'loss/train': 2.9876620769500732} +07/25/2024 11:35:19 - INFO - __main__ - Step 2136: {'lr': 0.0004998860377678989, 'samples': 102528, 'steps': 2135, 'loss/train': 1.987502932548523} +07/25/2024 11:35:19 - INFO - __main__ - Step 2137: {'lr': 0.0004998858788922667, 'samples': 102576, 'steps': 2136, 'loss/train': 3.8860220909118652} +07/25/2024 11:35:20 - INFO - __main__ - Step 2138: {'lr': 0.0004998857199059922, 'samples': 102624, 'steps': 2137, 'loss/train': 3.019782066345215} +07/25/2024 11:35:20 - INFO - __main__ - Step 2139: {'lr': 0.0004998855608090752, 'samples': 102672, 'steps': 2138, 'loss/train': 3.5565428733825684} +07/25/2024 11:35:20 - INFO - __main__ - Step 2140: {'lr': 0.0004998854016015159, 'samples': 102720, 'steps': 2139, 'loss/train': 3.71260929107666} +07/25/2024 11:35:20 - INFO - __main__ - Step 2141: {'lr': 0.0004998852422833143, 'samples': 102768, 'steps': 2140, 'loss/train': 2.9357712268829346} +07/25/2024 11:35:21 - INFO - __main__ - Step 2142: {'lr': 0.0004998850828544705, 'samples': 102816, 'steps': 2141, 'loss/train': 3.770533323287964} +07/25/2024 11:35:21 - INFO - __main__ - Step 2143: {'lr': 0.0004998849233149847, 'samples': 102864, 'steps': 2142, 'loss/train': 3.176511287689209} +07/25/2024 11:35:21 - INFO - __main__ - Step 2144: {'lr': 0.0004998847636648567, 'samples': 102912, 'steps': 2143, 'loss/train': 3.1923916339874268} +07/25/2024 11:35:22 - INFO - __main__ - Step 2145: {'lr': 0.0004998846039040869, 'samples': 102960, 'steps': 2144, 'loss/train': 1.0004762411117554} +07/25/2024 11:35:22 - INFO - __main__ - Step 2146: {'lr': 0.0004998844440326751, 'samples': 103008, 'steps': 2145, 'loss/train': 2.8494837284088135} +07/25/2024 11:35:22 - INFO - __main__ - Step 2147: {'lr': 0.0004998842840506215, 'samples': 103056, 'steps': 2146, 'loss/train': 4.274225234985352} +07/25/2024 11:35:22 - INFO - __main__ - Step 2148: {'lr': 0.0004998841239579262, 'samples': 103104, 'steps': 2147, 'loss/train': 3.165332555770874} +07/25/2024 11:35:23 - INFO - __main__ - Step 2149: {'lr': 0.0004998839637545891, 'samples': 103152, 'steps': 2148, 'loss/train': 4.040173053741455} +07/25/2024 11:35:23 - INFO - __main__ - Step 2150: {'lr': 0.0004998838034406105, 'samples': 103200, 'steps': 2149, 'loss/train': 3.2372164726257324} +07/25/2024 11:35:23 - INFO - __main__ - Step 2151: {'lr': 0.0004998836430159902, 'samples': 103248, 'steps': 2150, 'loss/train': 3.856032371520996} +07/25/2024 11:35:24 - INFO - __main__ - Step 2152: {'lr': 0.0004998834824807286, 'samples': 103296, 'steps': 2151, 'loss/train': 3.5035855770111084} +07/25/2024 11:35:24 - INFO - __main__ - Step 2153: {'lr': 0.0004998833218348254, 'samples': 103344, 'steps': 2152, 'loss/train': 2.5412938594818115} +07/25/2024 11:35:24 - INFO - __main__ - Step 2154: {'lr': 0.0004998831610782809, 'samples': 103392, 'steps': 2153, 'loss/train': 3.224691390991211} +07/25/2024 11:35:24 - INFO - __main__ - Step 2155: {'lr': 0.0004998830002110953, 'samples': 103440, 'steps': 2154, 'loss/train': 3.02286434173584} +07/25/2024 11:35:25 - INFO - __main__ - Step 2156: {'lr': 0.0004998828392332684, 'samples': 103488, 'steps': 2155, 'loss/train': 2.355377197265625} +07/25/2024 11:35:25 - INFO - __main__ - Step 2157: {'lr': 0.0004998826781448004, 'samples': 103536, 'steps': 2156, 'loss/train': 2.8206140995025635} +07/25/2024 11:35:25 - INFO - __main__ - Step 2158: {'lr': 0.0004998825169456913, 'samples': 103584, 'steps': 2157, 'loss/train': 3.601661443710327} +07/25/2024 11:35:26 - INFO - __main__ - Step 2159: {'lr': 0.0004998823556359413, 'samples': 103632, 'steps': 2158, 'loss/train': 3.788145065307617} +07/25/2024 11:35:26 - INFO - __main__ - Step 2160: {'lr': 0.0004998821942155503, 'samples': 103680, 'steps': 2159, 'loss/train': 2.88140869140625} +07/25/2024 11:35:26 - INFO - __main__ - Step 2161: {'lr': 0.0004998820326845186, 'samples': 103728, 'steps': 2160, 'loss/train': 3.8270068168640137} +07/25/2024 11:35:26 - INFO - __main__ - Step 2162: {'lr': 0.000499881871042846, 'samples': 103776, 'steps': 2161, 'loss/train': 2.6120214462280273} +07/25/2024 11:35:27 - INFO - __main__ - Step 2163: {'lr': 0.0004998817092905328, 'samples': 103824, 'steps': 2162, 'loss/train': 3.4369750022888184} +07/25/2024 11:35:27 - INFO - __main__ - Step 2164: {'lr': 0.0004998815474275789, 'samples': 103872, 'steps': 2163, 'loss/train': 4.67138671875} +07/25/2024 11:35:27 - INFO - __main__ - Step 2165: {'lr': 0.0004998813854539846, 'samples': 103920, 'steps': 2164, 'loss/train': 3.578748941421509} +07/25/2024 11:35:28 - INFO - __main__ - Step 2166: {'lr': 0.0004998812233697498, 'samples': 103968, 'steps': 2165, 'loss/train': 3.2087297439575195} +07/25/2024 11:35:28 - INFO - __main__ - Step 2167: {'lr': 0.0004998810611748744, 'samples': 104016, 'steps': 2166, 'loss/train': 1.915100336074829} +07/25/2024 11:35:28 - INFO - __main__ - Step 2168: {'lr': 0.0004998808988693588, 'samples': 104064, 'steps': 2167, 'loss/train': 3.667188882827759} +07/25/2024 11:35:28 - INFO - __main__ - Step 2169: {'lr': 0.000499880736453203, 'samples': 104112, 'steps': 2168, 'loss/train': 0.9382704496383667} +07/25/2024 11:35:29 - INFO - __main__ - Step 2170: {'lr': 0.0004998805739264069, 'samples': 104160, 'steps': 2169, 'loss/train': 3.4101173877716064} +07/25/2024 11:35:29 - INFO - __main__ - Step 2171: {'lr': 0.0004998804112889707, 'samples': 104208, 'steps': 2170, 'loss/train': 4.011873722076416} +07/25/2024 11:35:29 - INFO - __main__ - Step 2172: {'lr': 0.0004998802485408945, 'samples': 104256, 'steps': 2171, 'loss/train': 3.650477170944214} +07/25/2024 11:35:30 - INFO - __main__ - Step 2173: {'lr': 0.0004998800856821783, 'samples': 104304, 'steps': 2172, 'loss/train': 3.999865770339966} +07/25/2024 11:35:30 - INFO - __main__ - Step 2174: {'lr': 0.0004998799227128222, 'samples': 104352, 'steps': 2173, 'loss/train': 1.9048188924789429} +07/25/2024 11:35:30 - INFO - __main__ - Step 2175: {'lr': 0.0004998797596328263, 'samples': 104400, 'steps': 2174, 'loss/train': 3.8481268882751465} +07/25/2024 11:35:30 - INFO - __main__ - Step 2176: {'lr': 0.0004998795964421906, 'samples': 104448, 'steps': 2175, 'loss/train': 2.0035324096679688} +07/25/2024 11:35:31 - INFO - __main__ - Step 2177: {'lr': 0.0004998794331409153, 'samples': 104496, 'steps': 2176, 'loss/train': 0.874019205570221} +07/25/2024 11:35:31 - INFO - __main__ - Step 2178: {'lr': 0.0004998792697290004, 'samples': 104544, 'steps': 2177, 'loss/train': 3.6365721225738525} +07/25/2024 11:35:31 - INFO - __main__ - Step 2179: {'lr': 0.0004998791062064458, 'samples': 104592, 'steps': 2178, 'loss/train': 3.4028382301330566} +07/25/2024 11:35:32 - INFO - __main__ - Step 2180: {'lr': 0.0004998789425732518, 'samples': 104640, 'steps': 2179, 'loss/train': 2.1114280223846436} +07/25/2024 11:35:32 - INFO - __main__ - Step 2181: {'lr': 0.0004998787788294185, 'samples': 104688, 'steps': 2180, 'loss/train': 3.0715279579162598} +07/25/2024 11:35:32 - INFO - __main__ - Step 2182: {'lr': 0.0004998786149749458, 'samples': 104736, 'steps': 2181, 'loss/train': 3.9062349796295166} +07/25/2024 11:35:32 - INFO - __main__ - Step 2183: {'lr': 0.0004998784510098339, 'samples': 104784, 'steps': 2182, 'loss/train': 4.9642791748046875} +07/25/2024 11:35:33 - INFO - __main__ - Step 2184: {'lr': 0.0004998782869340828, 'samples': 104832, 'steps': 2183, 'loss/train': 3.370971441268921} +07/25/2024 11:35:33 - INFO - __main__ - Step 2185: {'lr': 0.0004998781227476925, 'samples': 104880, 'steps': 2184, 'loss/train': 3.997767448425293} +07/25/2024 11:35:33 - INFO - __main__ - Step 2186: {'lr': 0.0004998779584506633, 'samples': 104928, 'steps': 2185, 'loss/train': 2.5537397861480713} +07/25/2024 11:35:34 - INFO - __main__ - Step 2187: {'lr': 0.000499877794042995, 'samples': 104976, 'steps': 2186, 'loss/train': 3.8334546089172363} +07/25/2024 11:35:34 - INFO - __main__ - Step 2188: {'lr': 0.0004998776295246879, 'samples': 105024, 'steps': 2187, 'loss/train': 3.711604118347168} +07/25/2024 11:35:34 - INFO - __main__ - Step 2189: {'lr': 0.000499877464895742, 'samples': 105072, 'steps': 2188, 'loss/train': 3.4435997009277344} +07/25/2024 11:35:34 - INFO - __main__ - Step 2190: {'lr': 0.0004998773001561574, 'samples': 105120, 'steps': 2189, 'loss/train': 2.96065616607666} +07/25/2024 11:35:35 - INFO - __main__ - Step 2191: {'lr': 0.0004998771353059341, 'samples': 105168, 'steps': 2190, 'loss/train': 3.7847232818603516} +07/25/2024 11:35:35 - INFO - __main__ - Step 2192: {'lr': 0.0004998769703450722, 'samples': 105216, 'steps': 2191, 'loss/train': 3.149230718612671} +07/25/2024 11:35:35 - INFO - __main__ - Step 2193: {'lr': 0.0004998768052735718, 'samples': 105264, 'steps': 2192, 'loss/train': 2.2654428482055664} +07/25/2024 11:35:36 - INFO - __main__ - Step 2194: {'lr': 0.0004998766400914329, 'samples': 105312, 'steps': 2193, 'loss/train': 3.9979546070098877} +07/25/2024 11:35:36 - INFO - __main__ - Step 2195: {'lr': 0.0004998764747986556, 'samples': 105360, 'steps': 2194, 'loss/train': 3.155364990234375} +07/25/2024 11:35:36 - INFO - __main__ - Step 2196: {'lr': 0.0004998763093952401, 'samples': 105408, 'steps': 2195, 'loss/train': 3.1565144062042236} +07/25/2024 11:35:36 - INFO - __main__ - Step 2197: {'lr': 0.0004998761438811863, 'samples': 105456, 'steps': 2196, 'loss/train': 3.6936757564544678} +07/25/2024 11:35:37 - INFO - __main__ - Step 2198: {'lr': 0.0004998759782564944, 'samples': 105504, 'steps': 2197, 'loss/train': 1.2628309726715088} +07/25/2024 11:35:37 - INFO - __main__ - Step 2199: {'lr': 0.0004998758125211643, 'samples': 105552, 'steps': 2198, 'loss/train': 3.3115811347961426} +07/25/2024 11:35:37 - INFO - __main__ - Step 2200: {'lr': 0.0004998756466751964, 'samples': 105600, 'steps': 2199, 'loss/train': 3.418748140335083} +07/25/2024 11:35:38 - INFO - __main__ - Step 2201: {'lr': 0.0004998754807185904, 'samples': 105648, 'steps': 2200, 'loss/train': 0.6578316688537598} +07/25/2024 11:35:38 - INFO - __main__ - Step 2202: {'lr': 0.0004998753146513465, 'samples': 105696, 'steps': 2201, 'loss/train': 3.7446937561035156} +07/25/2024 11:35:38 - INFO - __main__ - Step 2203: {'lr': 0.000499875148473465, 'samples': 105744, 'steps': 2202, 'loss/train': 4.124364376068115} +07/25/2024 11:35:38 - INFO - __main__ - Step 2204: {'lr': 0.0004998749821849455, 'samples': 105792, 'steps': 2203, 'loss/train': 1.9965453147888184} +07/25/2024 11:35:39 - INFO - __main__ - Step 2205: {'lr': 0.0004998748157857886, 'samples': 105840, 'steps': 2204, 'loss/train': 3.57562255859375} +07/25/2024 11:35:39 - INFO - __main__ - Step 2206: {'lr': 0.0004998746492759942, 'samples': 105888, 'steps': 2205, 'loss/train': 3.8696908950805664} +07/25/2024 11:35:39 - INFO - __main__ - Step 2207: {'lr': 0.0004998744826555621, 'samples': 105936, 'steps': 2206, 'loss/train': 4.601612567901611} +07/25/2024 11:35:40 - INFO - __main__ - Step 2208: {'lr': 0.0004998743159244926, 'samples': 105984, 'steps': 2207, 'loss/train': 3.0065488815307617} +07/25/2024 11:35:40 - INFO - __main__ - Step 2209: {'lr': 0.0004998741490827858, 'samples': 106032, 'steps': 2208, 'loss/train': 3.886742353439331} +07/25/2024 11:35:40 - INFO - __main__ - Step 2210: {'lr': 0.0004998739821304418, 'samples': 106080, 'steps': 2209, 'loss/train': 2.712890386581421} +07/25/2024 11:35:40 - INFO - __main__ - Step 2211: {'lr': 0.0004998738150674605, 'samples': 106128, 'steps': 2210, 'loss/train': 3.2196826934814453} +07/25/2024 11:35:41 - INFO - __main__ - Step 2212: {'lr': 0.0004998736478938422, 'samples': 106176, 'steps': 2211, 'loss/train': 3.6563518047332764} +07/25/2024 11:35:41 - INFO - __main__ - Step 2213: {'lr': 0.0004998734806095867, 'samples': 106224, 'steps': 2212, 'loss/train': 3.60038685798645} +07/25/2024 11:35:41 - INFO - __main__ - Step 2214: {'lr': 0.0004998733132146943, 'samples': 106272, 'steps': 2213, 'loss/train': 3.0373764038085938} +07/25/2024 11:35:42 - INFO - __main__ - Step 2215: {'lr': 0.000499873145709165, 'samples': 106320, 'steps': 2214, 'loss/train': 3.9154720306396484} +07/25/2024 11:35:42 - INFO - __main__ - Step 2216: {'lr': 0.0004998729780929988, 'samples': 106368, 'steps': 2215, 'loss/train': 3.7872369289398193} +07/25/2024 11:35:42 - INFO - __main__ - Step 2217: {'lr': 0.000499872810366196, 'samples': 106416, 'steps': 2216, 'loss/train': 4.088732719421387} +07/25/2024 11:35:42 - INFO - __main__ - Step 2218: {'lr': 0.0004998726425287564, 'samples': 106464, 'steps': 2217, 'loss/train': 3.7097885608673096} +07/25/2024 11:35:43 - INFO - __main__ - Step 2219: {'lr': 0.0004998724745806801, 'samples': 106512, 'steps': 2218, 'loss/train': 3.594327926635742} +07/25/2024 11:35:43 - INFO - __main__ - Step 2220: {'lr': 0.0004998723065219675, 'samples': 106560, 'steps': 2219, 'loss/train': 3.1829917430877686} +07/25/2024 11:35:43 - INFO - __main__ - Step 2221: {'lr': 0.0004998721383526183, 'samples': 106608, 'steps': 2220, 'loss/train': 3.5823326110839844} +07/25/2024 11:35:43 - INFO - __main__ - Step 2222: {'lr': 0.0004998719700726327, 'samples': 106656, 'steps': 2221, 'loss/train': 0.764244019985199} +07/25/2024 11:35:44 - INFO - __main__ - Step 2223: {'lr': 0.0004998718016820109, 'samples': 106704, 'steps': 2222, 'loss/train': 3.4511373043060303} +07/25/2024 11:35:44 - INFO - __main__ - Step 2224: {'lr': 0.0004998716331807528, 'samples': 106752, 'steps': 2223, 'loss/train': 3.300065279006958} +07/25/2024 11:35:44 - INFO - __main__ - Step 2225: {'lr': 0.0004998714645688586, 'samples': 106800, 'steps': 2224, 'loss/train': 0.8324265480041504} +07/25/2024 11:35:45 - INFO - __main__ - Step 2226: {'lr': 0.0004998712958463283, 'samples': 106848, 'steps': 2225, 'loss/train': 3.2387027740478516} +07/25/2024 11:35:45 - INFO - __main__ - Step 2227: {'lr': 0.000499871127013162, 'samples': 106896, 'steps': 2226, 'loss/train': 3.274503707885742} +07/25/2024 11:35:45 - INFO - __main__ - Step 2228: {'lr': 0.0004998709580693597, 'samples': 106944, 'steps': 2227, 'loss/train': 1.7178754806518555} +07/25/2024 11:35:45 - INFO - __main__ - Step 2229: {'lr': 0.0004998707890149216, 'samples': 106992, 'steps': 2228, 'loss/train': 4.030239582061768} +07/25/2024 11:35:46 - INFO - __main__ - Step 2230: {'lr': 0.0004998706198498476, 'samples': 107040, 'steps': 2229, 'loss/train': 3.8892006874084473} +07/25/2024 11:35:46 - INFO - __main__ - Step 2231: {'lr': 0.000499870450574138, 'samples': 107088, 'steps': 2230, 'loss/train': 3.925027370452881} +07/25/2024 11:35:46 - INFO - __main__ - Step 2232: {'lr': 0.0004998702811877928, 'samples': 107136, 'steps': 2231, 'loss/train': 3.401859998703003} +07/25/2024 11:35:47 - INFO - __main__ - Step 2233: {'lr': 0.0004998701116908119, 'samples': 107184, 'steps': 2232, 'loss/train': 3.679063558578491} +07/25/2024 11:35:47 - INFO - __main__ - Step 2234: {'lr': 0.0004998699420831955, 'samples': 107232, 'steps': 2233, 'loss/train': 2.5446131229400635} +07/25/2024 11:35:47 - INFO - __main__ - Step 2235: {'lr': 0.0004998697723649439, 'samples': 107280, 'steps': 2234, 'loss/train': 3.032790422439575} +07/25/2024 11:35:47 - INFO - __main__ - Step 2236: {'lr': 0.0004998696025360569, 'samples': 107328, 'steps': 2235, 'loss/train': 3.5812745094299316} +07/25/2024 11:35:48 - INFO - __main__ - Step 2237: {'lr': 0.0004998694325965346, 'samples': 107376, 'steps': 2236, 'loss/train': 3.8627753257751465} +07/25/2024 11:35:48 - INFO - __main__ - Step 2238: {'lr': 0.0004998692625463771, 'samples': 107424, 'steps': 2237, 'loss/train': 1.7781836986541748} +07/25/2024 11:35:48 - INFO - __main__ - Step 2239: {'lr': 0.0004998690923855845, 'samples': 107472, 'steps': 2238, 'loss/train': 2.966273546218872} +07/25/2024 11:35:49 - INFO - __main__ - Step 2240: {'lr': 0.0004998689221141569, 'samples': 107520, 'steps': 2239, 'loss/train': 2.9994466304779053} +07/25/2024 11:35:49 - INFO - __main__ - Step 2241: {'lr': 0.0004998687517320943, 'samples': 107568, 'steps': 2240, 'loss/train': 4.5928053855896} +07/25/2024 11:35:49 - INFO - __main__ - Step 2242: {'lr': 0.0004998685812393968, 'samples': 107616, 'steps': 2241, 'loss/train': 3.898937940597534} +07/25/2024 11:35:49 - INFO - __main__ - Step 2243: {'lr': 0.0004998684106360645, 'samples': 107664, 'steps': 2242, 'loss/train': 4.050120830535889} +07/25/2024 11:35:50 - INFO - __main__ - Step 2244: {'lr': 0.0004998682399220975, 'samples': 107712, 'steps': 2243, 'loss/train': 2.6648662090301514} +07/25/2024 11:35:50 - INFO - __main__ - Step 2245: {'lr': 0.0004998680690974958, 'samples': 107760, 'steps': 2244, 'loss/train': 3.4629721641540527} +07/25/2024 11:35:50 - INFO - __main__ - Step 2246: {'lr': 0.0004998678981622596, 'samples': 107808, 'steps': 2245, 'loss/train': 0.9142275452613831} +07/25/2024 11:35:51 - INFO - __main__ - Step 2247: {'lr': 0.000499867727116389, 'samples': 107856, 'steps': 2246, 'loss/train': 2.846893548965454} +07/25/2024 11:35:51 - INFO - __main__ - Step 2248: {'lr': 0.0004998675559598838, 'samples': 107904, 'steps': 2247, 'loss/train': 2.873394250869751} +07/25/2024 11:35:51 - INFO - __main__ - Step 2249: {'lr': 0.0004998673846927442, 'samples': 107952, 'steps': 2248, 'loss/train': 0.7137474417686462} +07/25/2024 11:35:51 - INFO - __main__ - Step 2250: {'lr': 0.0004998672133149704, 'samples': 108000, 'steps': 2249, 'loss/train': 3.0606632232666016} +07/25/2024 11:35:52 - INFO - __main__ - Step 2251: {'lr': 0.0004998670418265624, 'samples': 108048, 'steps': 2250, 'loss/train': 2.9687693119049072} +07/25/2024 11:35:52 - INFO - __main__ - Step 2252: {'lr': 0.0004998668702275203, 'samples': 108096, 'steps': 2251, 'loss/train': 1.7202285528182983} +07/25/2024 11:35:52 - INFO - __main__ - Step 2253: {'lr': 0.0004998666985178441, 'samples': 108144, 'steps': 2252, 'loss/train': 3.7214839458465576} +07/25/2024 11:35:53 - INFO - __main__ - Step 2254: {'lr': 0.000499866526697534, 'samples': 108192, 'steps': 2253, 'loss/train': 3.9415576457977295} +07/25/2024 11:35:53 - INFO - __main__ - Step 2255: {'lr': 0.0004998663547665899, 'samples': 108240, 'steps': 2254, 'loss/train': 3.790440559387207} +07/25/2024 11:35:53 - INFO - __main__ - Step 2256: {'lr': 0.0004998661827250121, 'samples': 108288, 'steps': 2255, 'loss/train': 3.677527904510498} +07/25/2024 11:35:53 - INFO - __main__ - Step 2257: {'lr': 0.0004998660105728004, 'samples': 108336, 'steps': 2256, 'loss/train': 3.6795198917388916} +07/25/2024 11:35:54 - INFO - __main__ - Step 2258: {'lr': 0.0004998658383099551, 'samples': 108384, 'steps': 2257, 'loss/train': 2.748892068862915} +07/25/2024 11:35:54 - INFO - __main__ - Step 2259: {'lr': 0.0004998656659364763, 'samples': 108432, 'steps': 2258, 'loss/train': 3.031162738800049} +07/25/2024 11:35:54 - INFO - __main__ - Step 2260: {'lr': 0.0004998654934523639, 'samples': 108480, 'steps': 2259, 'loss/train': 2.8522374629974365} +07/25/2024 11:35:55 - INFO - __main__ - Step 2261: {'lr': 0.0004998653208576181, 'samples': 108528, 'steps': 2260, 'loss/train': 3.1291723251342773} +07/25/2024 11:35:55 - INFO - __main__ - Step 2262: {'lr': 0.0004998651481522389, 'samples': 108576, 'steps': 2261, 'loss/train': 0.8049426078796387} +07/25/2024 11:35:55 - INFO - __main__ - Step 2263: {'lr': 0.0004998649753362264, 'samples': 108624, 'steps': 2262, 'loss/train': 3.070988893508911} +07/25/2024 11:35:55 - INFO - __main__ - Step 2264: {'lr': 0.0004998648024095808, 'samples': 108672, 'steps': 2263, 'loss/train': 3.6504533290863037} +07/25/2024 11:35:56 - INFO - __main__ - Step 2265: {'lr': 0.0004998646293723019, 'samples': 108720, 'steps': 2264, 'loss/train': 4.062924861907959} +07/25/2024 11:35:56 - INFO - __main__ - Step 2266: {'lr': 0.00049986445622439, 'samples': 108768, 'steps': 2265, 'loss/train': 3.7227303981781006} +07/25/2024 11:35:56 - INFO - __main__ - Step 2267: {'lr': 0.0004998642829658452, 'samples': 108816, 'steps': 2266, 'loss/train': 4.032710075378418} +07/25/2024 11:35:57 - INFO - __main__ - Step 2268: {'lr': 0.0004998641095966675, 'samples': 108864, 'steps': 2267, 'loss/train': 3.485382080078125} +07/25/2024 11:35:57 - INFO - __main__ - Step 2269: {'lr': 0.0004998639361168569, 'samples': 108912, 'steps': 2268, 'loss/train': 2.2493627071380615} +07/25/2024 11:35:57 - INFO - __main__ - Step 2270: {'lr': 0.0004998637625264136, 'samples': 108960, 'steps': 2269, 'loss/train': 0.8882149457931519} +07/25/2024 11:35:57 - INFO - __main__ - Step 2271: {'lr': 0.0004998635888253376, 'samples': 109008, 'steps': 2270, 'loss/train': 3.448897123336792} +07/25/2024 11:35:58 - INFO - __main__ - Step 2272: {'lr': 0.000499863415013629, 'samples': 109056, 'steps': 2271, 'loss/train': 4.026601791381836} +07/25/2024 11:35:58 - INFO - __main__ - Step 2273: {'lr': 0.0004998632410912878, 'samples': 109104, 'steps': 2272, 'loss/train': 1.369881510734558} +07/25/2024 11:35:58 - INFO - __main__ - Step 2274: {'lr': 0.0004998630670583142, 'samples': 109152, 'steps': 2273, 'loss/train': 3.235579490661621} +07/25/2024 11:35:59 - INFO - __main__ - Step 2275: {'lr': 0.0004998628929147083, 'samples': 109200, 'steps': 2274, 'loss/train': 3.3756446838378906} +07/25/2024 11:35:59 - INFO - __main__ - Step 2276: {'lr': 0.0004998627186604701, 'samples': 109248, 'steps': 2275, 'loss/train': 1.447083592414856} +07/25/2024 11:35:59 - INFO - __main__ - Step 2277: {'lr': 0.0004998625442955996, 'samples': 109296, 'steps': 2276, 'loss/train': 3.4334769248962402} +07/25/2024 11:35:59 - INFO - __main__ - Step 2278: {'lr': 0.0004998623698200972, 'samples': 109344, 'steps': 2277, 'loss/train': 3.9016923904418945} +07/25/2024 11:36:00 - INFO - __main__ - Step 2279: {'lr': 0.0004998621952339625, 'samples': 109392, 'steps': 2278, 'loss/train': 3.6154048442840576} +07/25/2024 11:36:00 - INFO - __main__ - Step 2280: {'lr': 0.0004998620205371959, 'samples': 109440, 'steps': 2279, 'loss/train': 4.121209144592285} +07/25/2024 11:36:00 - INFO - __main__ - Step 2281: {'lr': 0.0004998618457297974, 'samples': 109488, 'steps': 2280, 'loss/train': 3.5473639965057373} +07/25/2024 11:36:01 - INFO - __main__ - Step 2282: {'lr': 0.000499861670811767, 'samples': 109536, 'steps': 2281, 'loss/train': 2.393754720687866} +07/25/2024 11:36:01 - INFO - __main__ - Step 2283: {'lr': 0.000499861495783105, 'samples': 109584, 'steps': 2282, 'loss/train': 2.6770975589752197} +07/25/2024 11:36:01 - INFO - __main__ - Step 2284: {'lr': 0.0004998613206438113, 'samples': 109632, 'steps': 2283, 'loss/train': 1.9174435138702393} +07/25/2024 11:36:01 - INFO - __main__ - Step 2285: {'lr': 0.0004998611453938859, 'samples': 109680, 'steps': 2284, 'loss/train': 3.281001091003418} +07/25/2024 11:36:02 - INFO - __main__ - Step 2286: {'lr': 0.0004998609700333291, 'samples': 109728, 'steps': 2285, 'loss/train': 0.6425403952598572} +07/25/2024 11:36:02 - INFO - __main__ - Step 2287: {'lr': 0.0004998607945621409, 'samples': 109776, 'steps': 2286, 'loss/train': 2.6458585262298584} +07/25/2024 11:36:02 - INFO - __main__ - Step 2288: {'lr': 0.0004998606189803212, 'samples': 109824, 'steps': 2287, 'loss/train': 3.602816104888916} +07/25/2024 11:36:03 - INFO - __main__ - Step 2289: {'lr': 0.0004998604432878703, 'samples': 109872, 'steps': 2288, 'loss/train': 3.981424331665039} +07/25/2024 11:36:03 - INFO - __main__ - Step 2290: {'lr': 0.0004998602674847881, 'samples': 109920, 'steps': 2289, 'loss/train': 2.950683832168579} +07/25/2024 11:36:03 - INFO - __main__ - Step 2291: {'lr': 0.0004998600915710749, 'samples': 109968, 'steps': 2290, 'loss/train': 3.4455416202545166} +07/25/2024 11:36:03 - INFO - __main__ - Step 2292: {'lr': 0.0004998599155467306, 'samples': 110016, 'steps': 2291, 'loss/train': 3.940934658050537} +07/25/2024 11:36:04 - INFO - __main__ - Step 2293: {'lr': 0.0004998597394117553, 'samples': 110064, 'steps': 2292, 'loss/train': 3.269218683242798} +07/25/2024 11:36:04 - INFO - __main__ - Step 2294: {'lr': 0.0004998595631661491, 'samples': 110112, 'steps': 2293, 'loss/train': 0.7935392260551453} +07/25/2024 11:36:04 - INFO - __main__ - Step 2295: {'lr': 0.0004998593868099122, 'samples': 110160, 'steps': 2294, 'loss/train': 2.8458921909332275} +07/25/2024 11:36:05 - INFO - __main__ - Step 2296: {'lr': 0.0004998592103430445, 'samples': 110208, 'steps': 2295, 'loss/train': 4.049722671508789} +07/25/2024 11:36:05 - INFO - __main__ - Step 2297: {'lr': 0.0004998590337655461, 'samples': 110256, 'steps': 2296, 'loss/train': 2.2864632606506348} +07/25/2024 11:36:05 - INFO - __main__ - Step 2298: {'lr': 0.0004998588570774172, 'samples': 110304, 'steps': 2297, 'loss/train': 3.760341167449951} +07/25/2024 11:36:05 - INFO - __main__ - Step 2299: {'lr': 0.0004998586802786577, 'samples': 110352, 'steps': 2298, 'loss/train': 2.986137866973877} +07/25/2024 11:36:06 - INFO - __main__ - Step 2300: {'lr': 0.0004998585033692679, 'samples': 110400, 'steps': 2299, 'loss/train': 1.805532455444336} +07/25/2024 11:36:06 - INFO - __main__ - Step 2301: {'lr': 0.0004998583263492477, 'samples': 110448, 'steps': 2300, 'loss/train': 2.872697591781616} +07/25/2024 11:36:06 - INFO - __main__ - Step 2302: {'lr': 0.0004998581492185972, 'samples': 110496, 'steps': 2301, 'loss/train': 3.5486299991607666} +07/25/2024 11:36:07 - INFO - __main__ - Step 2303: {'lr': 0.0004998579719773165, 'samples': 110544, 'steps': 2302, 'loss/train': 3.376861572265625} +07/25/2024 11:36:07 - INFO - __main__ - Step 2304: {'lr': 0.0004998577946254057, 'samples': 110592, 'steps': 2303, 'loss/train': 3.6718029975891113} +07/25/2024 11:36:07 - INFO - __main__ - Step 2305: {'lr': 0.0004998576171628649, 'samples': 110640, 'steps': 2304, 'loss/train': 3.598828077316284} +07/25/2024 11:36:07 - INFO - __main__ - Step 2306: {'lr': 0.0004998574395896941, 'samples': 110688, 'steps': 2305, 'loss/train': 3.405545473098755} +07/25/2024 11:36:08 - INFO - __main__ - Step 2307: {'lr': 0.0004998572619058935, 'samples': 110736, 'steps': 2306, 'loss/train': 3.203799247741699} +07/25/2024 11:36:08 - INFO - __main__ - Step 2308: {'lr': 0.0004998570841114632, 'samples': 110784, 'steps': 2307, 'loss/train': 3.026562213897705} +07/25/2024 11:36:08 - INFO - __main__ - Step 2309: {'lr': 0.0004998569062064029, 'samples': 110832, 'steps': 2308, 'loss/train': 3.3380675315856934} +07/25/2024 11:36:09 - INFO - __main__ - Step 2310: {'lr': 0.0004998567281907132, 'samples': 110880, 'steps': 2309, 'loss/train': 0.700564980506897} +07/25/2024 11:36:09 - INFO - __main__ - Step 2311: {'lr': 0.0004998565500643938, 'samples': 110928, 'steps': 2310, 'loss/train': 2.925384759902954} +07/25/2024 11:36:09 - INFO - __main__ - Step 2312: {'lr': 0.000499856371827445, 'samples': 110976, 'steps': 2311, 'loss/train': 2.901233196258545} +07/25/2024 11:36:09 - INFO - __main__ - Step 2313: {'lr': 0.0004998561934798668, 'samples': 111024, 'steps': 2312, 'loss/train': 3.4341745376586914} +07/25/2024 11:36:10 - INFO - __main__ - Step 2314: {'lr': 0.0004998560150216592, 'samples': 111072, 'steps': 2313, 'loss/train': 3.283043384552002} +07/25/2024 11:36:10 - INFO - __main__ - Step 2315: {'lr': 0.0004998558364528225, 'samples': 111120, 'steps': 2314, 'loss/train': 2.2754933834075928} +07/25/2024 11:36:10 - INFO - __main__ - Step 2316: {'lr': 0.0004998556577733565, 'samples': 111168, 'steps': 2315, 'loss/train': 3.229036569595337} +07/25/2024 11:36:11 - INFO - __main__ - Step 2317: {'lr': 0.0004998554789832615, 'samples': 111216, 'steps': 2316, 'loss/train': 3.607135057449341} +07/25/2024 11:36:11 - INFO - __main__ - Step 2318: {'lr': 0.0004998553000825375, 'samples': 111264, 'steps': 2317, 'loss/train': 0.6686445474624634} +07/25/2024 11:36:11 - INFO - __main__ - Step 2319: {'lr': 0.0004998551210711845, 'samples': 111312, 'steps': 2318, 'loss/train': 3.705096483230591} +07/25/2024 11:36:11 - INFO - __main__ - Step 2320: {'lr': 0.0004998549419492027, 'samples': 111360, 'steps': 2319, 'loss/train': 3.835973024368286} +07/25/2024 11:36:12 - INFO - __main__ - Step 2321: {'lr': 0.0004998547627165921, 'samples': 111408, 'steps': 2320, 'loss/train': 4.381070137023926} +07/25/2024 11:36:12 - INFO - __main__ - Step 2322: {'lr': 0.0004998545833733529, 'samples': 111456, 'steps': 2321, 'loss/train': 3.899097442626953} +07/25/2024 11:36:12 - INFO - __main__ - Step 2323: {'lr': 0.000499854403919485, 'samples': 111504, 'steps': 2322, 'loss/train': 2.8703622817993164} +07/25/2024 11:36:12 - INFO - __main__ - Step 2324: {'lr': 0.0004998542243549886, 'samples': 111552, 'steps': 2323, 'loss/train': 1.6448208093643188} +07/25/2024 11:36:13 - INFO - __main__ - Step 2325: {'lr': 0.0004998540446798637, 'samples': 111600, 'steps': 2324, 'loss/train': 2.1997857093811035} +07/25/2024 11:36:13 - INFO - __main__ - Step 2326: {'lr': 0.0004998538648941105, 'samples': 111648, 'steps': 2325, 'loss/train': 3.5801706314086914} +07/25/2024 11:36:13 - INFO - __main__ - Step 2327: {'lr': 0.000499853684997729, 'samples': 111696, 'steps': 2326, 'loss/train': 3.8329830169677734} +07/25/2024 11:36:14 - INFO - __main__ - Step 2328: {'lr': 0.0004998535049907194, 'samples': 111744, 'steps': 2327, 'loss/train': 2.8572022914886475} +07/25/2024 11:36:14 - INFO - __main__ - Step 2329: {'lr': 0.0004998533248730814, 'samples': 111792, 'steps': 2328, 'loss/train': 3.443979501724243} +07/25/2024 11:36:14 - INFO - __main__ - Step 2330: {'lr': 0.0004998531446448155, 'samples': 111840, 'steps': 2329, 'loss/train': 3.4007465839385986} +07/25/2024 11:36:14 - INFO - __main__ - Step 2331: {'lr': 0.0004998529643059216, 'samples': 111888, 'steps': 2330, 'loss/train': 3.4246439933776855} +07/25/2024 11:36:15 - INFO - __main__ - Step 2332: {'lr': 0.0004998527838563999, 'samples': 111936, 'steps': 2331, 'loss/train': 2.8957102298736572} +07/25/2024 11:36:15 - INFO - __main__ - Step 2333: {'lr': 0.0004998526032962504, 'samples': 111984, 'steps': 2332, 'loss/train': 2.841271162033081} +07/25/2024 11:36:15 - INFO - __main__ - Step 2334: {'lr': 0.000499852422625473, 'samples': 112032, 'steps': 2333, 'loss/train': 0.6571175456047058} +07/25/2024 11:36:16 - INFO - __main__ - Step 2335: {'lr': 0.0004998522418440681, 'samples': 112080, 'steps': 2334, 'loss/train': 2.8326287269592285} +07/25/2024 11:36:16 - INFO - __main__ - Step 2336: {'lr': 0.0004998520609520355, 'samples': 112128, 'steps': 2335, 'loss/train': 3.257902145385742} +07/25/2024 11:36:16 - INFO - __main__ - Step 2337: {'lr': 0.0004998518799493755, 'samples': 112176, 'steps': 2336, 'loss/train': 2.9127111434936523} +07/25/2024 11:36:16 - INFO - __main__ - Step 2338: {'lr': 0.000499851698836088, 'samples': 112224, 'steps': 2337, 'loss/train': 3.3938040733337402} +07/25/2024 11:36:17 - INFO - __main__ - Step 2339: {'lr': 0.0004998515176121733, 'samples': 112272, 'steps': 2338, 'loss/train': 2.28813099861145} +07/25/2024 11:36:17 - INFO - __main__ - Step 2340: {'lr': 0.0004998513362776312, 'samples': 112320, 'steps': 2339, 'loss/train': 3.0433061122894287} +07/25/2024 11:36:17 - INFO - __main__ - Step 2341: {'lr': 0.000499851154832462, 'samples': 112368, 'steps': 2340, 'loss/train': 3.211643934249878} +07/25/2024 11:36:18 - INFO - __main__ - Step 2342: {'lr': 0.0004998509732766658, 'samples': 112416, 'steps': 2341, 'loss/train': 0.9348688125610352} +07/25/2024 11:36:18 - INFO - __main__ - Step 2343: {'lr': 0.0004998507916102425, 'samples': 112464, 'steps': 2342, 'loss/train': 3.1336023807525635} +07/25/2024 11:36:18 - INFO - __main__ - Step 2344: {'lr': 0.0004998506098331922, 'samples': 112512, 'steps': 2343, 'loss/train': 3.2884509563446045} +07/25/2024 11:36:18 - INFO - __main__ - Step 2345: {'lr': 0.0004998504279455152, 'samples': 112560, 'steps': 2344, 'loss/train': 3.037235975265503} +07/25/2024 11:36:19 - INFO - __main__ - Step 2346: {'lr': 0.0004998502459472113, 'samples': 112608, 'steps': 2345, 'loss/train': 3.017127275466919} +07/25/2024 11:36:19 - INFO - __main__ - Step 2347: {'lr': 0.0004998500638382808, 'samples': 112656, 'steps': 2346, 'loss/train': 3.270021915435791} +07/25/2024 11:36:19 - INFO - __main__ - Step 2348: {'lr': 0.0004998498816187237, 'samples': 112704, 'steps': 2347, 'loss/train': 1.6333467960357666} +07/25/2024 11:36:20 - INFO - __main__ - Step 2349: {'lr': 0.0004998496992885401, 'samples': 112752, 'steps': 2348, 'loss/train': 3.584733486175537} +07/25/2024 11:36:20 - INFO - __main__ - Step 2350: {'lr': 0.0004998495168477299, 'samples': 112800, 'steps': 2349, 'loss/train': 3.237522602081299} +07/25/2024 11:36:20 - INFO - __main__ - Step 2351: {'lr': 0.0004998493342962935, 'samples': 112848, 'steps': 2350, 'loss/train': 3.7890329360961914} +07/25/2024 11:36:20 - INFO - __main__ - Step 2352: {'lr': 0.0004998491516342307, 'samples': 112896, 'steps': 2351, 'loss/train': 3.3513057231903076} +07/25/2024 11:36:21 - INFO - __main__ - Step 2353: {'lr': 0.0004998489688615417, 'samples': 112944, 'steps': 2352, 'loss/train': 3.7918059825897217} +07/25/2024 11:36:21 - INFO - __main__ - Step 2354: {'lr': 0.0004998487859782266, 'samples': 112992, 'steps': 2353, 'loss/train': 3.387467861175537} +07/25/2024 11:36:21 - INFO - __main__ - Step 2355: {'lr': 0.0004998486029842854, 'samples': 113040, 'steps': 2354, 'loss/train': 2.5338099002838135} +07/25/2024 11:36:22 - INFO - __main__ - Step 2356: {'lr': 0.0004998484198797185, 'samples': 113088, 'steps': 2355, 'loss/train': 3.11422061920166} +07/25/2024 11:36:22 - INFO - __main__ - Step 2357: {'lr': 0.0004998482366645254, 'samples': 113136, 'steps': 2356, 'loss/train': 3.4276907444000244} +07/25/2024 11:36:22 - INFO - __main__ - Step 2358: {'lr': 0.0004998480533387066, 'samples': 113184, 'steps': 2357, 'loss/train': 0.6702114343643188} +07/25/2024 11:36:22 - INFO - __main__ - Step 2359: {'lr': 0.0004998478699022622, 'samples': 113232, 'steps': 2358, 'loss/train': 2.7623770236968994} +07/25/2024 11:36:23 - INFO - __main__ - Step 2360: {'lr': 0.0004998476863551921, 'samples': 113280, 'steps': 2359, 'loss/train': 3.808037519454956} +07/25/2024 11:36:23 - INFO - __main__ - Step 2361: {'lr': 0.0004998475026974964, 'samples': 113328, 'steps': 2360, 'loss/train': 3.4236414432525635} +07/25/2024 11:36:23 - INFO - __main__ - Step 2362: {'lr': 0.0004998473189291753, 'samples': 113376, 'steps': 2361, 'loss/train': 3.5937588214874268} +07/25/2024 11:36:24 - INFO - __main__ - Step 2363: {'lr': 0.0004998471350502287, 'samples': 113424, 'steps': 2362, 'loss/train': 4.204398155212402} +07/25/2024 11:36:24 - INFO - __main__ - Step 2364: {'lr': 0.0004998469510606568, 'samples': 113472, 'steps': 2363, 'loss/train': 3.4561290740966797} +07/25/2024 11:36:24 - INFO - __main__ - Step 2365: {'lr': 0.0004998467669604597, 'samples': 113520, 'steps': 2364, 'loss/train': 3.573782444000244} +07/25/2024 11:36:24 - INFO - __main__ - Step 2366: {'lr': 0.0004998465827496375, 'samples': 113568, 'steps': 2365, 'loss/train': 0.7901307940483093} +07/25/2024 11:36:25 - INFO - __main__ - Step 2367: {'lr': 0.0004998463984281903, 'samples': 113616, 'steps': 2366, 'loss/train': 3.0381808280944824} +07/25/2024 11:36:25 - INFO - __main__ - Step 2368: {'lr': 0.000499846213996118, 'samples': 113664, 'steps': 2367, 'loss/train': 3.7803118228912354} +07/25/2024 11:36:25 - INFO - __main__ - Step 2369: {'lr': 0.0004998460294534207, 'samples': 113712, 'steps': 2368, 'loss/train': 3.1525521278381348} +07/25/2024 11:36:26 - INFO - __main__ - Step 2370: {'lr': 0.0004998458448000988, 'samples': 113760, 'steps': 2369, 'loss/train': 3.2357070446014404} +07/25/2024 11:36:26 - INFO - __main__ - Step 2371: {'lr': 0.000499845660036152, 'samples': 113808, 'steps': 2370, 'loss/train': 3.5383148193359375} +07/25/2024 11:36:26 - INFO - __main__ - Step 2372: {'lr': 0.0004998454751615806, 'samples': 113856, 'steps': 2371, 'loss/train': 1.2938446998596191} +07/25/2024 11:36:26 - INFO - __main__ - Step 2373: {'lr': 0.0004998452901763846, 'samples': 113904, 'steps': 2372, 'loss/train': 3.2935431003570557} +07/25/2024 11:36:27 - INFO - __main__ - Step 2374: {'lr': 0.0004998451050805641, 'samples': 113952, 'steps': 2373, 'loss/train': 2.4343743324279785} +07/25/2024 11:36:27 - INFO - __main__ - Step 2375: {'lr': 0.0004998449198741193, 'samples': 114000, 'steps': 2374, 'loss/train': 3.4182090759277344} +07/25/2024 11:36:27 - INFO - __main__ - Step 2376: {'lr': 0.00049984473455705, 'samples': 114048, 'steps': 2375, 'loss/train': 3.6170926094055176} +07/25/2024 11:36:28 - INFO - __main__ - Step 2377: {'lr': 0.0004998445491293566, 'samples': 114096, 'steps': 2376, 'loss/train': 3.6842310428619385} +07/25/2024 11:36:28 - INFO - __main__ - Step 2378: {'lr': 0.0004998443635910389, 'samples': 114144, 'steps': 2377, 'loss/train': 3.809546709060669} +07/25/2024 11:36:28 - INFO - __main__ - Step 2379: {'lr': 0.0004998441779420972, 'samples': 114192, 'steps': 2378, 'loss/train': 3.4123103618621826} +07/25/2024 11:36:28 - INFO - __main__ - Step 2380: {'lr': 0.0004998439921825315, 'samples': 114240, 'steps': 2379, 'loss/train': 4.233869552612305} +07/25/2024 11:36:29 - INFO - __main__ - Step 2381: {'lr': 0.0004998438063123419, 'samples': 114288, 'steps': 2380, 'loss/train': 3.4600534439086914} +07/25/2024 11:36:29 - INFO - __main__ - Step 2382: {'lr': 0.0004998436203315284, 'samples': 114336, 'steps': 2381, 'loss/train': 0.6896295547485352} +07/25/2024 11:36:29 - INFO - __main__ - Step 2383: {'lr': 0.0004998434342400913, 'samples': 114384, 'steps': 2382, 'loss/train': 3.4219348430633545} +07/25/2024 11:36:30 - INFO - __main__ - Step 2384: {'lr': 0.0004998432480380304, 'samples': 114432, 'steps': 2383, 'loss/train': 3.618839979171753} +07/25/2024 11:36:30 - INFO - __main__ - Step 2385: {'lr': 0.0004998430617253459, 'samples': 114480, 'steps': 2384, 'loss/train': 3.0272316932678223} +07/25/2024 11:36:30 - INFO - __main__ - Step 2386: {'lr': 0.0004998428753020379, 'samples': 114528, 'steps': 2385, 'loss/train': 3.5203397274017334} +07/25/2024 11:36:30 - INFO - __main__ - Step 2387: {'lr': 0.0004998426887681065, 'samples': 114576, 'steps': 2386, 'loss/train': 3.5132765769958496} +07/25/2024 11:36:31 - INFO - __main__ - Step 2388: {'lr': 0.0004998425021235516, 'samples': 114624, 'steps': 2387, 'loss/train': 3.777353286743164} +07/25/2024 11:36:31 - INFO - __main__ - Step 2389: {'lr': 0.0004998423153683737, 'samples': 114672, 'steps': 2388, 'loss/train': 4.443736553192139} +07/25/2024 11:36:31 - INFO - __main__ - Step 2390: {'lr': 0.0004998421285025725, 'samples': 114720, 'steps': 2389, 'loss/train': 1.3498493432998657} +07/25/2024 11:36:32 - INFO - __main__ - Step 2391: {'lr': 0.0004998419415261482, 'samples': 114768, 'steps': 2390, 'loss/train': 3.507406234741211} +07/25/2024 11:36:32 - INFO - __main__ - Step 2392: {'lr': 0.0004998417544391008, 'samples': 114816, 'steps': 2391, 'loss/train': 3.243664264678955} +07/25/2024 11:36:32 - INFO - __main__ - Step 2393: {'lr': 0.0004998415672414307, 'samples': 114864, 'steps': 2392, 'loss/train': 2.7647228240966797} +07/25/2024 11:36:32 - INFO - __main__ - Step 2394: {'lr': 0.0004998413799331376, 'samples': 114912, 'steps': 2393, 'loss/train': 2.7219066619873047} +07/25/2024 11:36:33 - INFO - __main__ - Step 2395: {'lr': 0.0004998411925142218, 'samples': 114960, 'steps': 2394, 'loss/train': 3.3743836879730225} +07/25/2024 11:36:33 - INFO - __main__ - Step 2396: {'lr': 0.0004998410049846832, 'samples': 115008, 'steps': 2395, 'loss/train': 1.2980695962905884} +07/25/2024 11:36:33 - INFO - __main__ - Step 2397: {'lr': 0.0004998408173445221, 'samples': 115056, 'steps': 2396, 'loss/train': 3.9604458808898926} +07/25/2024 11:36:34 - INFO - __main__ - Step 2398: {'lr': 0.0004998406295937385, 'samples': 115104, 'steps': 2397, 'loss/train': 3.2034034729003906} +07/25/2024 11:36:34 - INFO - __main__ - Step 2399: {'lr': 0.0004998404417323324, 'samples': 115152, 'steps': 2398, 'loss/train': 3.4216372966766357} +07/25/2024 11:36:34 - INFO - __main__ - Step 2400: {'lr': 0.0004998402537603041, 'samples': 115200, 'steps': 2399, 'loss/train': 3.3115580081939697} +07/25/2024 11:36:34 - INFO - __main__ - Step 2401: {'lr': 0.0004998400656776534, 'samples': 115248, 'steps': 2400, 'loss/train': 3.356390953063965} +07/25/2024 11:36:35 - INFO - __main__ - Step 2402: {'lr': 0.0004998398774843805, 'samples': 115296, 'steps': 2401, 'loss/train': 2.772491455078125} +07/25/2024 11:36:35 - INFO - __main__ - Step 2403: {'lr': 0.0004998396891804855, 'samples': 115344, 'steps': 2402, 'loss/train': 3.307333469390869} +07/25/2024 11:36:35 - INFO - __main__ - Step 2404: {'lr': 0.0004998395007659686, 'samples': 115392, 'steps': 2403, 'loss/train': 3.6566617488861084} +07/25/2024 11:36:36 - INFO - __main__ - Step 2405: {'lr': 0.0004998393122408297, 'samples': 115440, 'steps': 2404, 'loss/train': 2.9125561714172363} +07/25/2024 11:36:36 - INFO - __main__ - Step 2406: {'lr': 0.000499839123605069, 'samples': 115488, 'steps': 2405, 'loss/train': 0.8038381338119507} +07/25/2024 11:36:36 - INFO - __main__ - Step 2407: {'lr': 0.0004998389348586865, 'samples': 115536, 'steps': 2406, 'loss/train': 3.9903881549835205} +07/25/2024 11:36:36 - INFO - __main__ - Step 2408: {'lr': 0.0004998387460016824, 'samples': 115584, 'steps': 2407, 'loss/train': 3.771139144897461} +07/25/2024 11:36:37 - INFO - __main__ - Step 2409: {'lr': 0.0004998385570340565, 'samples': 115632, 'steps': 2408, 'loss/train': 3.0228216648101807} +07/25/2024 11:36:37 - INFO - __main__ - Step 2410: {'lr': 0.0004998383679558092, 'samples': 115680, 'steps': 2409, 'loss/train': 3.0123884677886963} +07/25/2024 11:36:37 - INFO - __main__ - Step 2411: {'lr': 0.0004998381787669404, 'samples': 115728, 'steps': 2410, 'loss/train': 2.8909084796905518} +07/25/2024 11:36:37 - INFO - __main__ - Step 2412: {'lr': 0.0004998379894674505, 'samples': 115776, 'steps': 2411, 'loss/train': 3.276932716369629} +07/25/2024 11:36:38 - INFO - __main__ - Step 2413: {'lr': 0.0004998378000573391, 'samples': 115824, 'steps': 2412, 'loss/train': 3.441922664642334} +07/25/2024 11:36:38 - INFO - __main__ - Step 2414: {'lr': 0.0004998376105366066, 'samples': 115872, 'steps': 2413, 'loss/train': 2.352599620819092} +07/25/2024 11:36:38 - INFO - __main__ - Step 2415: {'lr': 0.0004998374209052531, 'samples': 115920, 'steps': 2414, 'loss/train': 2.384355306625366} +07/25/2024 11:36:39 - INFO - __main__ - Step 2416: {'lr': 0.0004998372311632783, 'samples': 115968, 'steps': 2415, 'loss/train': 3.5783941745758057} +07/25/2024 11:36:39 - INFO - __main__ - Step 2417: {'lr': 0.0004998370413106829, 'samples': 116016, 'steps': 2416, 'loss/train': 3.114708185195923} +07/25/2024 11:36:39 - INFO - __main__ - Step 2418: {'lr': 0.0004998368513474665, 'samples': 116064, 'steps': 2417, 'loss/train': 2.1347389221191406} +07/25/2024 11:36:39 - INFO - __main__ - Step 2419: {'lr': 0.0004998366612736294, 'samples': 116112, 'steps': 2418, 'loss/train': 2.744443893432617} +07/25/2024 11:36:40 - INFO - __main__ - Step 2420: {'lr': 0.0004998364710891716, 'samples': 116160, 'steps': 2419, 'loss/train': 1.1463111639022827} +07/25/2024 11:36:40 - INFO - __main__ - Step 2421: {'lr': 0.0004998362807940932, 'samples': 116208, 'steps': 2420, 'loss/train': 3.0418789386749268} +07/25/2024 11:36:40 - INFO - __main__ - Step 2422: {'lr': 0.0004998360903883943, 'samples': 116256, 'steps': 2421, 'loss/train': 3.329710006713867} +07/25/2024 11:36:41 - INFO - __main__ - Step 2423: {'lr': 0.000499835899872075, 'samples': 116304, 'steps': 2422, 'loss/train': 3.6214723587036133} +07/25/2024 11:36:41 - INFO - __main__ - Step 2424: {'lr': 0.0004998357092451353, 'samples': 116352, 'steps': 2423, 'loss/train': 3.174440383911133} +07/25/2024 11:36:41 - INFO - __main__ - Step 2425: {'lr': 0.0004998355185075754, 'samples': 116400, 'steps': 2424, 'loss/train': 3.3774030208587646} +07/25/2024 11:36:41 - INFO - __main__ - Step 2426: {'lr': 0.0004998353276593953, 'samples': 116448, 'steps': 2425, 'loss/train': 3.506350040435791} +07/25/2024 11:36:42 - INFO - __main__ - Step 2427: {'lr': 0.0004998351367005952, 'samples': 116496, 'steps': 2426, 'loss/train': 3.5764758586883545} +07/25/2024 11:36:42 - INFO - __main__ - Step 2428: {'lr': 0.000499834945631175, 'samples': 116544, 'steps': 2427, 'loss/train': 3.293396472930908} +07/25/2024 11:36:42 - INFO - __main__ - Step 2429: {'lr': 0.0004998347544511349, 'samples': 116592, 'steps': 2428, 'loss/train': 3.480348825454712} +07/25/2024 11:36:43 - INFO - __main__ - Step 2430: {'lr': 0.0004998345631604751, 'samples': 116640, 'steps': 2429, 'loss/train': 0.6178845763206482} +07/25/2024 11:36:43 - INFO - __main__ - Step 2431: {'lr': 0.0004998343717591955, 'samples': 116688, 'steps': 2430, 'loss/train': 2.0745882987976074} +07/25/2024 11:36:43 - INFO - __main__ - Step 2432: {'lr': 0.0004998341802472961, 'samples': 116736, 'steps': 2431, 'loss/train': 3.5343234539031982} +07/25/2024 11:36:43 - INFO - __main__ - Step 2433: {'lr': 0.0004998339886247773, 'samples': 116784, 'steps': 2432, 'loss/train': 3.308260202407837} +07/25/2024 11:36:44 - INFO - __main__ - Step 2434: {'lr': 0.0004998337968916389, 'samples': 116832, 'steps': 2433, 'loss/train': 1.2035273313522339} +07/25/2024 11:36:44 - INFO - __main__ - Step 2435: {'lr': 0.0004998336050478811, 'samples': 116880, 'steps': 2434, 'loss/train': 2.822214365005493} +07/25/2024 11:36:44 - INFO - __main__ - Step 2436: {'lr': 0.000499833413093504, 'samples': 116928, 'steps': 2435, 'loss/train': 3.2448792457580566} +07/25/2024 11:36:45 - INFO - __main__ - Step 2437: {'lr': 0.0004998332210285077, 'samples': 116976, 'steps': 2436, 'loss/train': 3.527048110961914} +07/25/2024 11:36:45 - INFO - __main__ - Step 2438: {'lr': 0.0004998330288528922, 'samples': 117024, 'steps': 2437, 'loss/train': 2.726274251937866} +07/25/2024 11:36:45 - INFO - __main__ - Step 2439: {'lr': 0.0004998328365666577, 'samples': 117072, 'steps': 2438, 'loss/train': 1.2290031909942627} +07/25/2024 11:36:45 - INFO - __main__ - Step 2440: {'lr': 0.0004998326441698042, 'samples': 117120, 'steps': 2439, 'loss/train': 2.9938409328460693} +07/25/2024 11:36:46 - INFO - __main__ - Step 2441: {'lr': 0.0004998324516623317, 'samples': 117168, 'steps': 2440, 'loss/train': 3.3313510417938232} +07/25/2024 11:36:46 - INFO - __main__ - Step 2442: {'lr': 0.0004998322590442406, 'samples': 117216, 'steps': 2441, 'loss/train': 2.6521427631378174} +07/25/2024 11:36:46 - INFO - __main__ - Step 2443: {'lr': 0.0004998320663155306, 'samples': 117264, 'steps': 2442, 'loss/train': 2.6430211067199707} +07/25/2024 11:36:47 - INFO - __main__ - Step 2444: {'lr': 0.0004998318734762019, 'samples': 117312, 'steps': 2443, 'loss/train': 1.5476092100143433} +07/25/2024 11:36:47 - INFO - __main__ - Step 2445: {'lr': 0.0004998316805262549, 'samples': 117360, 'steps': 2444, 'loss/train': 4.2230401039123535} +07/25/2024 11:36:47 - INFO - __main__ - Step 2446: {'lr': 0.0004998314874656892, 'samples': 117408, 'steps': 2445, 'loss/train': 3.483185291290283} +07/25/2024 11:36:47 - INFO - __main__ - Step 2447: {'lr': 0.0004998312942945053, 'samples': 117456, 'steps': 2446, 'loss/train': 3.4899802207946777} +07/25/2024 11:36:48 - INFO - __main__ - Step 2448: {'lr': 0.0004998311010127029, 'samples': 117504, 'steps': 2447, 'loss/train': 3.5417277812957764} +07/25/2024 11:36:48 - INFO - __main__ - Step 2449: {'lr': 0.0004998309076202825, 'samples': 117552, 'steps': 2448, 'loss/train': 3.302577495574951} +07/25/2024 11:36:48 - INFO - __main__ - Step 2450: {'lr': 0.0004998307141172438, 'samples': 117600, 'steps': 2449, 'loss/train': 3.214630126953125} +07/25/2024 11:36:49 - INFO - __main__ - Step 2451: {'lr': 0.0004998305205035871, 'samples': 117648, 'steps': 2450, 'loss/train': 3.9734413623809814} +07/25/2024 11:36:49 - INFO - __main__ - Step 2452: {'lr': 0.0004998303267793125, 'samples': 117696, 'steps': 2451, 'loss/train': 2.789900064468384} +07/25/2024 11:36:49 - INFO - __main__ - Step 2453: {'lr': 0.0004998301329444201, 'samples': 117744, 'steps': 2452, 'loss/train': 3.4624855518341064} +07/25/2024 11:36:49 - INFO - __main__ - Step 2454: {'lr': 0.0004998299389989098, 'samples': 117792, 'steps': 2453, 'loss/train': 0.9988271594047546} +07/25/2024 11:36:50 - INFO - __main__ - Step 2455: {'lr': 0.0004998297449427818, 'samples': 117840, 'steps': 2454, 'loss/train': 2.1583337783813477} +07/25/2024 11:36:50 - INFO - __main__ - Step 2456: {'lr': 0.0004998295507760362, 'samples': 117888, 'steps': 2455, 'loss/train': 3.1487884521484375} +07/25/2024 11:36:50 - INFO - __main__ - Step 2457: {'lr': 0.0004998293564986732, 'samples': 117936, 'steps': 2456, 'loss/train': 3.458111524581909} +07/25/2024 11:36:51 - INFO - __main__ - Step 2458: {'lr': 0.0004998291621106926, 'samples': 117984, 'steps': 2457, 'loss/train': 3.5694596767425537} +07/25/2024 11:36:51 - INFO - __main__ - Step 2459: {'lr': 0.0004998289676120947, 'samples': 118032, 'steps': 2458, 'loss/train': 3.4536805152893066} +07/25/2024 11:36:51 - INFO - __main__ - Step 2460: {'lr': 0.0004998287730028795, 'samples': 118080, 'steps': 2459, 'loss/train': 3.5815608501434326} +07/25/2024 11:36:51 - INFO - __main__ - Step 2461: {'lr': 0.0004998285782830472, 'samples': 118128, 'steps': 2460, 'loss/train': 4.418567180633545} +07/25/2024 11:36:52 - INFO - __main__ - Step 2462: {'lr': 0.0004998283834525977, 'samples': 118176, 'steps': 2461, 'loss/train': 3.1893351078033447} +07/25/2024 11:36:52 - INFO - __main__ - Step 2463: {'lr': 0.0004998281885115314, 'samples': 118224, 'steps': 2462, 'loss/train': 1.1247204542160034} +07/25/2024 11:36:52 - INFO - __main__ - Step 2464: {'lr': 0.000499827993459848, 'samples': 118272, 'steps': 2463, 'loss/train': 3.0090537071228027} +07/25/2024 11:36:53 - INFO - __main__ - Step 2465: {'lr': 0.0004998277982975477, 'samples': 118320, 'steps': 2464, 'loss/train': 3.6904850006103516} +07/25/2024 11:36:53 - INFO - __main__ - Step 2466: {'lr': 0.0004998276030246308, 'samples': 118368, 'steps': 2465, 'loss/train': 2.2861058712005615} +07/25/2024 11:36:53 - INFO - __main__ - Step 2467: {'lr': 0.0004998274076410971, 'samples': 118416, 'steps': 2466, 'loss/train': 3.308227062225342} +07/25/2024 11:36:53 - INFO - __main__ - Step 2468: {'lr': 0.000499827212146947, 'samples': 118464, 'steps': 2467, 'loss/train': 1.8720580339431763} +07/25/2024 11:36:54 - INFO - __main__ - Step 2469: {'lr': 0.0004998270165421803, 'samples': 118512, 'steps': 2468, 'loss/train': 4.048393249511719} +07/25/2024 11:36:54 - INFO - __main__ - Step 2470: {'lr': 0.0004998268208267972, 'samples': 118560, 'steps': 2469, 'loss/train': 3.045137643814087} +07/25/2024 11:36:54 - INFO - __main__ - Step 2471: {'lr': 0.0004998266250007977, 'samples': 118608, 'steps': 2470, 'loss/train': 3.4126827716827393} +07/25/2024 11:36:55 - INFO - __main__ - Step 2472: {'lr': 0.0004998264290641822, 'samples': 118656, 'steps': 2471, 'loss/train': 3.0090749263763428} +07/25/2024 11:36:55 - INFO - __main__ - Step 2473: {'lr': 0.0004998262330169504, 'samples': 118704, 'steps': 2472, 'loss/train': 3.241502046585083} +07/25/2024 11:36:55 - INFO - __main__ - Step 2474: {'lr': 0.0004998260368591026, 'samples': 118752, 'steps': 2473, 'loss/train': 2.2977683544158936} +07/25/2024 11:36:55 - INFO - __main__ - Step 2475: {'lr': 0.0004998258405906388, 'samples': 118800, 'steps': 2474, 'loss/train': 2.9803249835968018} +07/25/2024 11:36:56 - INFO - __main__ - Step 2476: {'lr': 0.0004998256442115592, 'samples': 118848, 'steps': 2475, 'loss/train': 3.9847412109375} +07/25/2024 11:36:56 - INFO - __main__ - Step 2477: {'lr': 0.0004998254477218637, 'samples': 118896, 'steps': 2476, 'loss/train': 3.3970723152160645} +07/25/2024 11:36:56 - INFO - __main__ - Step 2478: {'lr': 0.0004998252511215524, 'samples': 118944, 'steps': 2477, 'loss/train': 2.9525980949401855} +07/25/2024 11:36:57 - INFO - __main__ - Step 2479: {'lr': 0.0004998250544106256, 'samples': 118992, 'steps': 2478, 'loss/train': 2.625288248062134} +07/25/2024 11:36:57 - INFO - __main__ - Step 2480: {'lr': 0.0004998248575890834, 'samples': 119040, 'steps': 2479, 'loss/train': 3.635202169418335} +07/25/2024 11:36:57 - INFO - __main__ - Step 2481: {'lr': 0.0004998246606569256, 'samples': 119088, 'steps': 2480, 'loss/train': 3.7865233421325684} +07/25/2024 11:36:57 - INFO - __main__ - Step 2482: {'lr': 0.0004998244636141524, 'samples': 119136, 'steps': 2481, 'loss/train': 2.9874179363250732} +07/25/2024 11:36:58 - INFO - __main__ - Step 2483: {'lr': 0.000499824266460764, 'samples': 119184, 'steps': 2482, 'loss/train': 3.6804938316345215} +07/25/2024 11:36:58 - INFO - __main__ - Step 2484: {'lr': 0.0004998240691967604, 'samples': 119232, 'steps': 2483, 'loss/train': 3.7672698497772217} +07/25/2024 11:36:58 - INFO - __main__ - Step 2485: {'lr': 0.0004998238718221416, 'samples': 119280, 'steps': 2484, 'loss/train': 3.1416146755218506} +07/25/2024 11:36:59 - INFO - __main__ - Step 2486: {'lr': 0.000499823674336908, 'samples': 119328, 'steps': 2485, 'loss/train': 2.8134233951568604} +07/25/2024 11:36:59 - INFO - __main__ - Step 2487: {'lr': 0.0004998234767410593, 'samples': 119376, 'steps': 2486, 'loss/train': 1.1478407382965088} +07/25/2024 11:36:59 - INFO - __main__ - Step 2488: {'lr': 0.0004998232790345959, 'samples': 119424, 'steps': 2487, 'loss/train': 2.888922691345215} +07/25/2024 11:36:59 - INFO - __main__ - Step 2489: {'lr': 0.0004998230812175177, 'samples': 119472, 'steps': 2488, 'loss/train': 3.248115062713623} +07/25/2024 11:37:00 - INFO - __main__ - Step 2490: {'lr': 0.0004998228832898248, 'samples': 119520, 'steps': 2489, 'loss/train': 1.6285266876220703} +07/25/2024 11:37:00 - INFO - __main__ - Step 2491: {'lr': 0.0004998226852515173, 'samples': 119568, 'steps': 2490, 'loss/train': 3.1041102409362793} +07/25/2024 11:37:00 - INFO - __main__ - Step 2492: {'lr': 0.0004998224871025954, 'samples': 119616, 'steps': 2491, 'loss/train': 2.769462823867798} +07/25/2024 11:37:01 - INFO - __main__ - Step 2493: {'lr': 0.0004998222888430591, 'samples': 119664, 'steps': 2492, 'loss/train': 3.8058278560638428} +07/25/2024 11:37:01 - INFO - __main__ - Step 2494: {'lr': 0.0004998220904729084, 'samples': 119712, 'steps': 2493, 'loss/train': 4.007496356964111} +07/25/2024 11:37:01 - INFO - __main__ - Step 2495: {'lr': 0.0004998218919921436, 'samples': 119760, 'steps': 2494, 'loss/train': 3.2358715534210205} +07/25/2024 11:37:01 - INFO - __main__ - Step 2496: {'lr': 0.0004998216934007646, 'samples': 119808, 'steps': 2495, 'loss/train': 2.735001564025879} +07/25/2024 11:37:02 - INFO - __main__ - Step 2497: {'lr': 0.0004998214946987715, 'samples': 119856, 'steps': 2496, 'loss/train': 3.329292058944702} +07/25/2024 11:37:02 - INFO - __main__ - Step 2498: {'lr': 0.0004998212958861645, 'samples': 119904, 'steps': 2497, 'loss/train': 2.9665210247039795} +07/25/2024 11:37:02 - INFO - __main__ - Step 2499: {'lr': 0.0004998210969629437, 'samples': 119952, 'steps': 2498, 'loss/train': 3.4506983757019043} +07/25/2024 11:37:03 - INFO - __main__ - Step 2500: {'lr': 0.000499820897929109, 'samples': 120000, 'steps': 2499, 'loss/train': 3.860872745513916} +07/25/2024 11:37:03 - INFO - __main__ - Step 2501: {'lr': 0.0004998206987846607, 'samples': 120048, 'steps': 2500, 'loss/train': 2.8236889839172363} +07/25/2024 11:37:03 - INFO - __main__ - Step 2502: {'lr': 0.0004998204995295988, 'samples': 120096, 'steps': 2501, 'loss/train': 3.496203899383545} +07/25/2024 11:37:03 - INFO - __main__ - Step 2503: {'lr': 0.0004998203001639232, 'samples': 120144, 'steps': 2502, 'loss/train': 2.1312756538391113} +07/25/2024 11:37:04 - INFO - __main__ - Step 2504: {'lr': 0.0004998201006876343, 'samples': 120192, 'steps': 2503, 'loss/train': 3.1648714542388916} +07/25/2024 11:37:04 - INFO - __main__ - Step 2505: {'lr': 0.0004998199011007321, 'samples': 120240, 'steps': 2504, 'loss/train': 3.0476064682006836} +07/25/2024 11:37:04 - INFO - __main__ - Step 2506: {'lr': 0.0004998197014032166, 'samples': 120288, 'steps': 2505, 'loss/train': 2.7170591354370117} +07/25/2024 11:37:04 - INFO - __main__ - Step 2507: {'lr': 0.0004998195015950879, 'samples': 120336, 'steps': 2506, 'loss/train': 2.6755406856536865} +07/25/2024 11:37:05 - INFO - __main__ - Step 2508: {'lr': 0.0004998193016763462, 'samples': 120384, 'steps': 2507, 'loss/train': 3.989732265472412} +07/25/2024 11:37:05 - INFO - __main__ - Step 2509: {'lr': 0.0004998191016469915, 'samples': 120432, 'steps': 2508, 'loss/train': 3.0508813858032227} +07/25/2024 11:37:05 - INFO - __main__ - Step 2510: {'lr': 0.0004998189015070239, 'samples': 120480, 'steps': 2509, 'loss/train': 2.8314571380615234} +07/25/2024 11:37:06 - INFO - __main__ - Step 2511: {'lr': 0.0004998187012564435, 'samples': 120528, 'steps': 2510, 'loss/train': 1.2229390144348145} +07/25/2024 11:37:06 - INFO - __main__ - Step 2512: {'lr': 0.0004998185008952504, 'samples': 120576, 'steps': 2511, 'loss/train': 2.67535138130188} +07/25/2024 11:37:06 - INFO - __main__ - Step 2513: {'lr': 0.0004998183004234445, 'samples': 120624, 'steps': 2512, 'loss/train': 3.4531097412109375} +07/25/2024 11:37:06 - INFO - __main__ - Step 2514: {'lr': 0.0004998180998410263, 'samples': 120672, 'steps': 2513, 'loss/train': 3.4014785289764404} +07/25/2024 11:37:07 - INFO - __main__ - Step 2515: {'lr': 0.0004998178991479955, 'samples': 120720, 'steps': 2514, 'loss/train': 3.5123510360717773} +07/25/2024 11:37:07 - INFO - __main__ - Step 2516: {'lr': 0.0004998176983443523, 'samples': 120768, 'steps': 2515, 'loss/train': 2.875117301940918} +07/25/2024 11:37:07 - INFO - __main__ - Step 2517: {'lr': 0.0004998174974300968, 'samples': 120816, 'steps': 2516, 'loss/train': 3.448636531829834} +07/25/2024 11:37:08 - INFO - __main__ - Step 2518: {'lr': 0.0004998172964052293, 'samples': 120864, 'steps': 2517, 'loss/train': 4.136673450469971} +07/25/2024 11:37:08 - INFO - __main__ - Step 2519: {'lr': 0.0004998170952697495, 'samples': 120912, 'steps': 2518, 'loss/train': 3.2668182849884033} +07/25/2024 11:37:08 - INFO - __main__ - Step 2520: {'lr': 0.0004998168940236578, 'samples': 120960, 'steps': 2519, 'loss/train': 3.0283913612365723} +07/25/2024 11:37:08 - INFO - __main__ - Step 2521: {'lr': 0.0004998166926669542, 'samples': 121008, 'steps': 2520, 'loss/train': 3.64265513420105} +07/25/2024 11:37:09 - INFO - __main__ - Step 2522: {'lr': 0.0004998164911996387, 'samples': 121056, 'steps': 2521, 'loss/train': 3.0198421478271484} +07/25/2024 11:37:09 - INFO - __main__ - Step 2523: {'lr': 0.0004998162896217115, 'samples': 121104, 'steps': 2522, 'loss/train': 3.155773878097534} +07/25/2024 11:37:09 - INFO - __main__ - Step 2524: {'lr': 0.0004998160879331727, 'samples': 121152, 'steps': 2523, 'loss/train': 3.4077672958374023} +07/25/2024 11:37:10 - INFO - __main__ - Step 2525: {'lr': 0.0004998158861340222, 'samples': 121200, 'steps': 2524, 'loss/train': 2.9701554775238037} +07/25/2024 11:37:10 - INFO - __main__ - Step 2526: {'lr': 0.0004998156842242603, 'samples': 121248, 'steps': 2525, 'loss/train': 3.166111707687378} +07/25/2024 11:37:10 - INFO - __main__ - Step 2527: {'lr': 0.0004998154822038871, 'samples': 121296, 'steps': 2526, 'loss/train': 2.0455472469329834} +07/25/2024 11:37:10 - INFO - __main__ - Step 2528: {'lr': 0.0004998152800729024, 'samples': 121344, 'steps': 2527, 'loss/train': 3.780217170715332} +07/25/2024 11:37:11 - INFO - __main__ - Step 2529: {'lr': 0.0004998150778313066, 'samples': 121392, 'steps': 2528, 'loss/train': 2.6969730854034424} +07/25/2024 11:37:11 - INFO - __main__ - Step 2530: {'lr': 0.0004998148754790998, 'samples': 121440, 'steps': 2529, 'loss/train': 2.707364797592163} +07/25/2024 11:37:11 - INFO - __main__ - Step 2531: {'lr': 0.0004998146730162818, 'samples': 121488, 'steps': 2530, 'loss/train': 3.2915384769439697} +07/25/2024 11:37:12 - INFO - __main__ - Step 2532: {'lr': 0.000499814470442853, 'samples': 121536, 'steps': 2531, 'loss/train': 3.2764434814453125} +07/25/2024 11:37:12 - INFO - __main__ - Step 2533: {'lr': 0.0004998142677588132, 'samples': 121584, 'steps': 2532, 'loss/train': 2.650413751602173} +07/25/2024 11:37:12 - INFO - __main__ - Step 2534: {'lr': 0.0004998140649641627, 'samples': 121632, 'steps': 2533, 'loss/train': 3.126735210418701} +07/25/2024 11:37:12 - INFO - __main__ - Step 2535: {'lr': 0.0004998138620589016, 'samples': 121680, 'steps': 2534, 'loss/train': 1.0322580337524414} +07/25/2024 11:37:13 - INFO - __main__ - Step 2536: {'lr': 0.0004998136590430299, 'samples': 121728, 'steps': 2535, 'loss/train': 2.824979305267334} +07/25/2024 11:37:13 - INFO - __main__ - Step 2537: {'lr': 0.0004998134559165477, 'samples': 121776, 'steps': 2536, 'loss/train': 3.403825521469116} +07/25/2024 11:37:13 - INFO - __main__ - Step 2538: {'lr': 0.000499813252679455, 'samples': 121824, 'steps': 2537, 'loss/train': 2.8509793281555176} +07/25/2024 11:37:14 - INFO - __main__ - Step 2539: {'lr': 0.0004998130493317521, 'samples': 121872, 'steps': 2538, 'loss/train': 3.3567256927490234} +07/25/2024 11:37:14 - INFO - __main__ - Step 2540: {'lr': 0.000499812845873439, 'samples': 121920, 'steps': 2539, 'loss/train': 3.1478207111358643} +07/25/2024 11:37:14 - INFO - __main__ - Step 2541: {'lr': 0.0004998126423045158, 'samples': 121968, 'steps': 2540, 'loss/train': 3.4279048442840576} +07/25/2024 11:37:14 - INFO - __main__ - Step 2542: {'lr': 0.0004998124386249823, 'samples': 122016, 'steps': 2541, 'loss/train': 2.863826036453247} +07/25/2024 11:37:15 - INFO - __main__ - Step 2543: {'lr': 0.0004998122348348392, 'samples': 122064, 'steps': 2542, 'loss/train': 3.089484214782715} +07/25/2024 11:37:15 - INFO - __main__ - Step 2544: {'lr': 0.000499812030934086, 'samples': 122112, 'steps': 2543, 'loss/train': 2.998013496398926} +07/25/2024 11:37:15 - INFO - __main__ - Step 2545: {'lr': 0.0004998118269227232, 'samples': 122160, 'steps': 2544, 'loss/train': 3.4285471439361572} +07/25/2024 11:37:16 - INFO - __main__ - Step 2546: {'lr': 0.0004998116228007506, 'samples': 122208, 'steps': 2545, 'loss/train': 2.7790298461914062} +07/25/2024 11:37:16 - INFO - __main__ - Step 2547: {'lr': 0.0004998114185681685, 'samples': 122256, 'steps': 2546, 'loss/train': 2.082829475402832} +07/25/2024 11:37:16 - INFO - __main__ - Step 2548: {'lr': 0.0004998112142249768, 'samples': 122304, 'steps': 2547, 'loss/train': 3.1061480045318604} +07/25/2024 11:37:16 - INFO - __main__ - Step 2549: {'lr': 0.0004998110097711758, 'samples': 122352, 'steps': 2548, 'loss/train': 3.0662577152252197} +07/25/2024 11:37:17 - INFO - __main__ - Step 2550: {'lr': 0.0004998108052067654, 'samples': 122400, 'steps': 2549, 'loss/train': 2.5481503009796143} +07/25/2024 11:37:17 - INFO - __main__ - Step 2551: {'lr': 0.0004998106005317458, 'samples': 122448, 'steps': 2550, 'loss/train': 2.4143238067626953} +07/25/2024 11:37:17 - INFO - __main__ - Step 2552: {'lr': 0.0004998103957461171, 'samples': 122496, 'steps': 2551, 'loss/train': 3.4639110565185547} +07/25/2024 11:37:18 - INFO - __main__ - Step 2553: {'lr': 0.0004998101908498794, 'samples': 122544, 'steps': 2552, 'loss/train': 2.3200902938842773} +07/25/2024 11:37:18 - INFO - __main__ - Step 2554: {'lr': 0.0004998099858430326, 'samples': 122592, 'steps': 2553, 'loss/train': 3.1957039833068848} +07/25/2024 11:37:18 - INFO - __main__ - Step 2555: {'lr': 0.000499809780725577, 'samples': 122640, 'steps': 2554, 'loss/train': 3.345466136932373} +07/25/2024 11:37:18 - INFO - __main__ - Step 2556: {'lr': 0.0004998095754975126, 'samples': 122688, 'steps': 2555, 'loss/train': 2.909780979156494} +07/25/2024 11:37:19 - INFO - __main__ - Step 2557: {'lr': 0.0004998093701588396, 'samples': 122736, 'steps': 2556, 'loss/train': 2.92793345451355} +07/25/2024 11:37:19 - INFO - __main__ - Step 2558: {'lr': 0.000499809164709558, 'samples': 122784, 'steps': 2557, 'loss/train': 3.673325777053833} +07/25/2024 11:37:19 - INFO - __main__ - Step 2559: {'lr': 0.0004998089591496679, 'samples': 122832, 'steps': 2558, 'loss/train': 0.9923586249351501} +07/25/2024 11:37:20 - INFO - __main__ - Step 2560: {'lr': 0.0004998087534791693, 'samples': 122880, 'steps': 2559, 'loss/train': 2.663663387298584} +07/25/2024 11:37:20 - INFO - __main__ - Step 2561: {'lr': 0.0004998085476980625, 'samples': 122928, 'steps': 2560, 'loss/train': 3.1333460807800293} +07/25/2024 11:37:20 - INFO - __main__ - Step 2562: {'lr': 0.0004998083418063475, 'samples': 122976, 'steps': 2561, 'loss/train': 2.8667795658111572} +07/25/2024 11:37:20 - INFO - __main__ - Step 2563: {'lr': 0.0004998081358040242, 'samples': 123024, 'steps': 2562, 'loss/train': 3.4536490440368652} +07/25/2024 11:37:21 - INFO - __main__ - Step 2564: {'lr': 0.000499807929691093, 'samples': 123072, 'steps': 2563, 'loss/train': 3.298919200897217} +07/25/2024 11:37:21 - INFO - __main__ - Step 2565: {'lr': 0.0004998077234675538, 'samples': 123120, 'steps': 2564, 'loss/train': 3.650057077407837} +07/25/2024 11:37:21 - INFO - __main__ - Step 2566: {'lr': 0.0004998075171334067, 'samples': 123168, 'steps': 2565, 'loss/train': 3.440065622329712} +07/25/2024 11:37:22 - INFO - __main__ - Step 2567: {'lr': 0.0004998073106886519, 'samples': 123216, 'steps': 2566, 'loss/train': 3.3859379291534424} +07/25/2024 11:37:22 - INFO - __main__ - Step 2568: {'lr': 0.0004998071041332893, 'samples': 123264, 'steps': 2567, 'loss/train': 3.356278657913208} +07/25/2024 11:37:22 - INFO - __main__ - Step 2569: {'lr': 0.0004998068974673193, 'samples': 123312, 'steps': 2568, 'loss/train': 2.9442121982574463} +07/25/2024 11:37:22 - INFO - __main__ - Step 2570: {'lr': 0.0004998066906907417, 'samples': 123360, 'steps': 2569, 'loss/train': 3.8309998512268066} +07/25/2024 11:37:23 - INFO - __main__ - Step 2571: {'lr': 0.0004998064838035568, 'samples': 123408, 'steps': 2570, 'loss/train': 2.596388101577759} +07/25/2024 11:37:23 - INFO - __main__ - Step 2572: {'lr': 0.0004998062768057644, 'samples': 123456, 'steps': 2571, 'loss/train': 3.445077657699585} +07/25/2024 11:37:23 - INFO - __main__ - Step 2573: {'lr': 0.0004998060696973648, 'samples': 123504, 'steps': 2572, 'loss/train': 2.89377498626709} +07/25/2024 11:37:24 - INFO - __main__ - Step 2574: {'lr': 0.0004998058624783582, 'samples': 123552, 'steps': 2573, 'loss/train': 4.458150386810303} +07/25/2024 11:37:24 - INFO - __main__ - Step 2575: {'lr': 0.0004998056551487446, 'samples': 123600, 'steps': 2574, 'loss/train': 1.218666434288025} +07/25/2024 11:37:24 - INFO - __main__ - Step 2576: {'lr': 0.000499805447708524, 'samples': 123648, 'steps': 2575, 'loss/train': 3.295433521270752} +07/25/2024 11:37:24 - INFO - __main__ - Step 2577: {'lr': 0.0004998052401576964, 'samples': 123696, 'steps': 2576, 'loss/train': 3.0797817707061768} +07/25/2024 11:37:25 - INFO - __main__ - Step 2578: {'lr': 0.0004998050324962622, 'samples': 123744, 'steps': 2577, 'loss/train': 3.025609016418457} +07/25/2024 11:37:25 - INFO - __main__ - Step 2579: {'lr': 0.0004998048247242213, 'samples': 123792, 'steps': 2578, 'loss/train': 3.6678061485290527} +07/25/2024 11:37:25 - INFO - __main__ - Step 2580: {'lr': 0.0004998046168415738, 'samples': 123840, 'steps': 2579, 'loss/train': 3.099931478500366} +07/25/2024 11:37:26 - INFO - __main__ - Step 2581: {'lr': 0.0004998044088483197, 'samples': 123888, 'steps': 2580, 'loss/train': 2.8738648891448975} +07/25/2024 11:37:26 - INFO - __main__ - Step 2582: {'lr': 0.0004998042007444594, 'samples': 123936, 'steps': 2581, 'loss/train': 3.123692750930786} +07/25/2024 11:37:26 - INFO - __main__ - Step 2583: {'lr': 0.0004998039925299927, 'samples': 123984, 'steps': 2582, 'loss/train': 0.8839350342750549} +07/25/2024 11:37:26 - INFO - __main__ - Step 2584: {'lr': 0.0004998037842049199, 'samples': 124032, 'steps': 2583, 'loss/train': 3.2293903827667236} +07/25/2024 11:37:27 - INFO - __main__ - Step 2585: {'lr': 0.0004998035757692409, 'samples': 124080, 'steps': 2584, 'loss/train': 3.1145365238189697} +07/25/2024 11:37:27 - INFO - __main__ - Step 2586: {'lr': 0.000499803367222956, 'samples': 124128, 'steps': 2585, 'loss/train': 2.0143043994903564} +07/25/2024 11:37:27 - INFO - __main__ - Step 2587: {'lr': 0.000499803158566065, 'samples': 124176, 'steps': 2586, 'loss/train': 2.899887800216675} +07/25/2024 11:37:27 - INFO - __main__ - Step 2588: {'lr': 0.0004998029497985682, 'samples': 124224, 'steps': 2587, 'loss/train': 3.1973752975463867} +07/25/2024 11:37:28 - INFO - __main__ - Step 2589: {'lr': 0.0004998027409204656, 'samples': 124272, 'steps': 2588, 'loss/train': 3.275024652481079} +07/25/2024 11:37:28 - INFO - __main__ - Step 2590: {'lr': 0.0004998025319317574, 'samples': 124320, 'steps': 2589, 'loss/train': 3.3779354095458984} +07/25/2024 11:37:28 - INFO - __main__ - Step 2591: {'lr': 0.0004998023228324436, 'samples': 124368, 'steps': 2590, 'loss/train': 3.228499412536621} +07/25/2024 11:37:29 - INFO - __main__ - Step 2592: {'lr': 0.0004998021136225245, 'samples': 124416, 'steps': 2591, 'loss/train': 3.438401222229004} +07/25/2024 11:37:29 - INFO - __main__ - Step 2593: {'lr': 0.000499801904302, 'samples': 124464, 'steps': 2592, 'loss/train': 3.2402448654174805} +07/25/2024 11:37:29 - INFO - __main__ - Step 2594: {'lr': 0.0004998016948708701, 'samples': 124512, 'steps': 2593, 'loss/train': 2.886871814727783} +07/25/2024 11:37:29 - INFO - __main__ - Step 2595: {'lr': 0.000499801485329135, 'samples': 124560, 'steps': 2594, 'loss/train': 2.876124858856201} +07/25/2024 11:37:30 - INFO - __main__ - Step 2596: {'lr': 0.0004998012756767948, 'samples': 124608, 'steps': 2595, 'loss/train': 2.984173059463501} +07/25/2024 11:37:30 - INFO - __main__ - Step 2597: {'lr': 0.0004998010659138496, 'samples': 124656, 'steps': 2596, 'loss/train': 3.232090711593628} +07/25/2024 11:37:30 - INFO - __main__ - Step 2598: {'lr': 0.0004998008560402994, 'samples': 124704, 'steps': 2597, 'loss/train': 6.481935024261475} +07/25/2024 11:37:31 - INFO - __main__ - Step 2599: {'lr': 0.0004998006460561446, 'samples': 124752, 'steps': 2598, 'loss/train': 1.3982412815093994} +07/25/2024 11:37:31 - INFO - __main__ - Step 2600: {'lr': 0.0004998004359613849, 'samples': 124800, 'steps': 2599, 'loss/train': 3.3399360179901123} +07/25/2024 11:37:31 - INFO - __main__ - Step 2601: {'lr': 0.0004998002257560207, 'samples': 124848, 'steps': 2600, 'loss/train': 2.761005401611328} +07/25/2024 11:37:31 - INFO - __main__ - Step 2602: {'lr': 0.0004998000154400518, 'samples': 124896, 'steps': 2601, 'loss/train': 3.059262990951538} +07/25/2024 11:37:32 - INFO - __main__ - Step 2603: {'lr': 0.0004997998050134786, 'samples': 124944, 'steps': 2602, 'loss/train': 3.329425811767578} +07/25/2024 11:37:32 - INFO - __main__ - Step 2604: {'lr': 0.000499799594476301, 'samples': 124992, 'steps': 2603, 'loss/train': 3.1169514656066895} +07/25/2024 11:37:32 - INFO - __main__ - Step 2605: {'lr': 0.0004997993838285191, 'samples': 125040, 'steps': 2604, 'loss/train': 3.5058183670043945} +07/25/2024 11:37:33 - INFO - __main__ - Step 2606: {'lr': 0.0004997991730701331, 'samples': 125088, 'steps': 2605, 'loss/train': 3.072399616241455} +07/25/2024 11:37:33 - INFO - __main__ - Step 2607: {'lr': 0.000499798962201143, 'samples': 125136, 'steps': 2606, 'loss/train': 1.6461373567581177} +07/25/2024 11:37:33 - INFO - __main__ - Step 2608: {'lr': 0.0004997987512215488, 'samples': 125184, 'steps': 2607, 'loss/train': 2.7727859020233154} +07/25/2024 11:37:33 - INFO - __main__ - Step 2609: {'lr': 0.0004997985401313509, 'samples': 125232, 'steps': 2608, 'loss/train': 3.1019127368927} +07/25/2024 11:37:34 - INFO - __main__ - Step 2610: {'lr': 0.0004997983289305491, 'samples': 125280, 'steps': 2609, 'loss/train': 2.7177162170410156} +07/25/2024 11:37:34 - INFO - __main__ - Step 2611: {'lr': 0.0004997981176191437, 'samples': 125328, 'steps': 2610, 'loss/train': 2.5852034091949463} +07/25/2024 11:37:34 - INFO - __main__ - Step 2612: {'lr': 0.0004997979061971345, 'samples': 125376, 'steps': 2611, 'loss/train': 3.2611732482910156} +07/25/2024 11:37:35 - INFO - __main__ - Step 2613: {'lr': 0.0004997976946645219, 'samples': 125424, 'steps': 2612, 'loss/train': 3.163621187210083} +07/25/2024 11:37:35 - INFO - __main__ - Step 2614: {'lr': 0.0004997974830213059, 'samples': 125472, 'steps': 2613, 'loss/train': 3.100328207015991} +07/25/2024 11:37:35 - INFO - __main__ - Step 2615: {'lr': 0.0004997972712674865, 'samples': 125520, 'steps': 2614, 'loss/train': 4.0581231117248535} +07/25/2024 11:37:35 - INFO - __main__ - Step 2616: {'lr': 0.000499797059403064, 'samples': 125568, 'steps': 2615, 'loss/train': 3.2705211639404297} +07/25/2024 11:37:36 - INFO - __main__ - Step 2617: {'lr': 0.0004997968474280383, 'samples': 125616, 'steps': 2616, 'loss/train': 3.806226968765259} +07/25/2024 11:37:36 - INFO - __main__ - Step 2618: {'lr': 0.0004997966353424095, 'samples': 125664, 'steps': 2617, 'loss/train': 3.1662023067474365} +07/25/2024 11:37:36 - INFO - __main__ - Step 2619: {'lr': 0.0004997964231461778, 'samples': 125712, 'steps': 2618, 'loss/train': 3.3048672676086426} +07/25/2024 11:37:37 - INFO - __main__ - Step 2620: {'lr': 0.0004997962108393432, 'samples': 125760, 'steps': 2619, 'loss/train': 3.2857258319854736} +07/25/2024 11:37:37 - INFO - __main__ - Step 2621: {'lr': 0.000499795998421906, 'samples': 125808, 'steps': 2620, 'loss/train': 3.5198709964752197} +07/25/2024 11:37:37 - INFO - __main__ - Step 2622: {'lr': 0.0004997957858938659, 'samples': 125856, 'steps': 2621, 'loss/train': 6.430179595947266} +07/25/2024 11:37:37 - INFO - __main__ - Step 2623: {'lr': 0.0004997955732552233, 'samples': 125904, 'steps': 2622, 'loss/train': 2.2688496112823486} +07/25/2024 11:37:38 - INFO - __main__ - Step 2624: {'lr': 0.0004997953605059783, 'samples': 125952, 'steps': 2623, 'loss/train': 3.086132764816284} +07/25/2024 11:37:38 - INFO - __main__ - Step 2625: {'lr': 0.0004997951476461308, 'samples': 126000, 'steps': 2624, 'loss/train': 3.090059280395508} +07/25/2024 11:37:38 - INFO - __main__ - Step 2626: {'lr': 0.0004997949346756811, 'samples': 126048, 'steps': 2625, 'loss/train': 3.2400031089782715} +07/25/2024 11:37:39 - INFO - __main__ - Step 2627: {'lr': 0.0004997947215946291, 'samples': 126096, 'steps': 2626, 'loss/train': 2.7617342472076416} +07/25/2024 11:37:39 - INFO - __main__ - Step 2628: {'lr': 0.0004997945084029751, 'samples': 126144, 'steps': 2627, 'loss/train': 2.9773776531219482} +07/25/2024 11:37:39 - INFO - __main__ - Step 2629: {'lr': 0.000499794295100719, 'samples': 126192, 'steps': 2628, 'loss/train': 2.813192129135132} +07/25/2024 11:37:39 - INFO - __main__ - Step 2630: {'lr': 0.0004997940816878611, 'samples': 126240, 'steps': 2629, 'loss/train': 3.4924566745758057} +07/25/2024 11:37:40 - INFO - __main__ - Step 2631: {'lr': 0.0004997938681644013, 'samples': 126288, 'steps': 2630, 'loss/train': 2.9311821460723877} +07/25/2024 11:37:40 - INFO - __main__ - Step 2632: {'lr': 0.0004997936545303398, 'samples': 126336, 'steps': 2631, 'loss/train': 2.774045467376709} +07/25/2024 11:37:40 - INFO - __main__ - Step 2633: {'lr': 0.0004997934407856766, 'samples': 126384, 'steps': 2632, 'loss/train': 3.365133762359619} +07/25/2024 11:37:41 - INFO - __main__ - Step 2634: {'lr': 0.000499793226930412, 'samples': 126432, 'steps': 2633, 'loss/train': 2.3208980560302734} +07/25/2024 11:37:41 - INFO - __main__ - Step 2635: {'lr': 0.0004997930129645459, 'samples': 126480, 'steps': 2634, 'loss/train': 3.433462381362915} +07/25/2024 11:37:41 - INFO - __main__ - Step 2636: {'lr': 0.0004997927988880784, 'samples': 126528, 'steps': 2635, 'loss/train': 3.758944034576416} +07/25/2024 11:37:41 - INFO - __main__ - Step 2637: {'lr': 0.0004997925847010097, 'samples': 126576, 'steps': 2636, 'loss/train': 3.315746307373047} +07/25/2024 11:37:42 - INFO - __main__ - Step 2638: {'lr': 0.0004997923704033398, 'samples': 126624, 'steps': 2637, 'loss/train': 2.410900354385376} +07/25/2024 11:37:42 - INFO - __main__ - Step 2639: {'lr': 0.0004997921559950688, 'samples': 126672, 'steps': 2638, 'loss/train': 3.9663078784942627} +07/25/2024 11:37:42 - INFO - __main__ - Step 2640: {'lr': 0.000499791941476197, 'samples': 126720, 'steps': 2639, 'loss/train': 3.0405631065368652} +07/25/2024 11:37:43 - INFO - __main__ - Step 2641: {'lr': 0.0004997917268467243, 'samples': 126768, 'steps': 2640, 'loss/train': 2.6888957023620605} +07/25/2024 11:37:43 - INFO - __main__ - Step 2642: {'lr': 0.0004997915121066507, 'samples': 126816, 'steps': 2641, 'loss/train': 2.6938130855560303} +07/25/2024 11:37:43 - INFO - __main__ - Step 2643: {'lr': 0.0004997912972559764, 'samples': 126864, 'steps': 2642, 'loss/train': 3.1933705806732178} +07/25/2024 11:37:43 - INFO - __main__ - Step 2644: {'lr': 0.0004997910822947016, 'samples': 126912, 'steps': 2643, 'loss/train': 2.2762699127197266} +07/25/2024 11:37:44 - INFO - __main__ - Step 2645: {'lr': 0.0004997908672228263, 'samples': 126960, 'steps': 2644, 'loss/train': 3.1810567378997803} +07/25/2024 11:37:44 - INFO - __main__ - Step 2646: {'lr': 0.0004997906520403506, 'samples': 127008, 'steps': 2645, 'loss/train': 4.475833892822266} +07/25/2024 11:37:44 - INFO - __main__ - Step 2647: {'lr': 0.0004997904367472746, 'samples': 127056, 'steps': 2646, 'loss/train': 2.798363208770752} +07/25/2024 11:37:45 - INFO - __main__ - Step 2648: {'lr': 0.0004997902213435984, 'samples': 127104, 'steps': 2647, 'loss/train': 3.2049412727355957} +07/25/2024 11:37:45 - INFO - __main__ - Step 2649: {'lr': 0.000499790005829322, 'samples': 127152, 'steps': 2648, 'loss/train': 2.980680465698242} +07/25/2024 11:37:45 - INFO - __main__ - Step 2650: {'lr': 0.0004997897902044457, 'samples': 127200, 'steps': 2649, 'loss/train': 3.5018863677978516} +07/25/2024 11:37:45 - INFO - __main__ - Step 2651: {'lr': 0.0004997895744689694, 'samples': 127248, 'steps': 2650, 'loss/train': 3.6140286922454834} +07/25/2024 11:37:46 - INFO - __main__ - Step 2652: {'lr': 0.0004997893586228934, 'samples': 127296, 'steps': 2651, 'loss/train': 3.278829336166382} +07/25/2024 11:37:46 - INFO - __main__ - Step 2653: {'lr': 0.0004997891426662174, 'samples': 127344, 'steps': 2652, 'loss/train': 2.5792744159698486} +07/25/2024 11:37:46 - INFO - __main__ - Step 2654: {'lr': 0.000499788926598942, 'samples': 127392, 'steps': 2653, 'loss/train': 2.989767551422119} +07/25/2024 11:37:47 - INFO - __main__ - Step 2655: {'lr': 0.000499788710421067, 'samples': 127440, 'steps': 2654, 'loss/train': 3.257991313934326} +07/25/2024 11:37:47 - INFO - __main__ - Step 2656: {'lr': 0.0004997884941325926, 'samples': 127488, 'steps': 2655, 'loss/train': 3.234438896179199} +07/25/2024 11:37:47 - INFO - __main__ - Step 2657: {'lr': 0.0004997882777335188, 'samples': 127536, 'steps': 2656, 'loss/train': 2.254908561706543} +07/25/2024 11:37:47 - INFO - __main__ - Step 2658: {'lr': 0.0004997880612238458, 'samples': 127584, 'steps': 2657, 'loss/train': 2.8752071857452393} +07/25/2024 11:37:48 - INFO - __main__ - Step 2659: {'lr': 0.0004997878446035736, 'samples': 127632, 'steps': 2658, 'loss/train': 3.5780386924743652} +07/25/2024 11:37:48 - INFO - __main__ - Step 2660: {'lr': 0.0004997876278727024, 'samples': 127680, 'steps': 2659, 'loss/train': 2.9981751441955566} +07/25/2024 11:37:48 - INFO - __main__ - Step 2661: {'lr': 0.0004997874110312322, 'samples': 127728, 'steps': 2660, 'loss/train': 2.702784299850464} +07/25/2024 11:37:49 - INFO - __main__ - Step 2662: {'lr': 0.000499787194079163, 'samples': 127776, 'steps': 2661, 'loss/train': 2.6139988899230957} +07/25/2024 11:37:49 - INFO - __main__ - Step 2663: {'lr': 0.0004997869770164953, 'samples': 127824, 'steps': 2662, 'loss/train': 3.6096603870391846} +07/25/2024 11:37:49 - INFO - __main__ - Step 2664: {'lr': 0.0004997867598432287, 'samples': 127872, 'steps': 2663, 'loss/train': 3.179455280303955} +07/25/2024 11:37:49 - INFO - __main__ - Step 2665: {'lr': 0.0004997865425593636, 'samples': 127920, 'steps': 2664, 'loss/train': 2.98939847946167} +07/25/2024 11:37:50 - INFO - __main__ - Step 2666: {'lr': 0.0004997863251649, 'samples': 127968, 'steps': 2665, 'loss/train': 2.540323257446289} +07/25/2024 11:37:50 - INFO - __main__ - Step 2667: {'lr': 0.0004997861076598381, 'samples': 128016, 'steps': 2666, 'loss/train': 3.181575298309326} +07/25/2024 11:37:50 - INFO - __main__ - Step 2668: {'lr': 0.0004997858900441779, 'samples': 128064, 'steps': 2667, 'loss/train': 2.7310962677001953} +07/25/2024 11:37:50 - INFO - __main__ - Step 2669: {'lr': 0.0004997856723179194, 'samples': 128112, 'steps': 2668, 'loss/train': 3.25948429107666} +07/25/2024 11:37:51 - INFO - __main__ - Step 2670: {'lr': 0.0004997854544810628, 'samples': 128160, 'steps': 2669, 'loss/train': 3.1697185039520264} +07/25/2024 11:37:51 - INFO - __main__ - Step 2671: {'lr': 0.0004997852365336083, 'samples': 128208, 'steps': 2670, 'loss/train': 2.821375608444214} +07/25/2024 11:37:51 - INFO - __main__ - Step 2672: {'lr': 0.0004997850184755558, 'samples': 128256, 'steps': 2671, 'loss/train': 3.2172346115112305} +07/25/2024 11:37:52 - INFO - __main__ - Step 2673: {'lr': 0.0004997848003069056, 'samples': 128304, 'steps': 2672, 'loss/train': 3.4048941135406494} +07/25/2024 11:37:52 - INFO - __main__ - Step 2674: {'lr': 0.0004997845820276576, 'samples': 128352, 'steps': 2673, 'loss/train': 2.9130659103393555} +07/25/2024 11:37:52 - INFO - __main__ - Step 2675: {'lr': 0.000499784363637812, 'samples': 128400, 'steps': 2674, 'loss/train': 3.1038694381713867} +07/25/2024 11:37:52 - INFO - __main__ - Step 2676: {'lr': 0.0004997841451373689, 'samples': 128448, 'steps': 2675, 'loss/train': 2.3976995944976807} +07/25/2024 11:37:53 - INFO - __main__ - Step 2677: {'lr': 0.0004997839265263284, 'samples': 128496, 'steps': 2676, 'loss/train': 2.8256328105926514} +07/25/2024 11:37:53 - INFO - __main__ - Step 2678: {'lr': 0.0004997837078046906, 'samples': 128544, 'steps': 2677, 'loss/train': 2.775730848312378} +07/25/2024 11:37:53 - INFO - __main__ - Step 2679: {'lr': 0.0004997834889724555, 'samples': 128592, 'steps': 2678, 'loss/train': 2.8925745487213135} +07/25/2024 11:37:54 - INFO - __main__ - Step 2680: {'lr': 0.0004997832700296233, 'samples': 128640, 'steps': 2679, 'loss/train': 3.0456063747406006} +07/25/2024 11:37:54 - INFO - __main__ - Step 2681: {'lr': 0.000499783050976194, 'samples': 128688, 'steps': 2680, 'loss/train': 1.9500480890274048} +07/25/2024 11:37:54 - INFO - __main__ - Step 2682: {'lr': 0.0004997828318121679, 'samples': 128736, 'steps': 2681, 'loss/train': 2.8915443420410156} +07/25/2024 11:37:54 - INFO - __main__ - Step 2683: {'lr': 0.0004997826125375448, 'samples': 128784, 'steps': 2682, 'loss/train': 2.70052433013916} +07/25/2024 11:37:55 - INFO - __main__ - Step 2684: {'lr': 0.0004997823931523252, 'samples': 128832, 'steps': 2683, 'loss/train': 3.0733590126037598} +07/25/2024 11:37:55 - INFO - __main__ - Step 2685: {'lr': 0.0004997821736565087, 'samples': 128880, 'steps': 2684, 'loss/train': 2.958343267440796} +07/25/2024 11:37:55 - INFO - __main__ - Step 2686: {'lr': 0.0004997819540500958, 'samples': 128928, 'steps': 2685, 'loss/train': 3.0074148178100586} +07/25/2024 11:37:56 - INFO - __main__ - Step 2687: {'lr': 0.0004997817343330863, 'samples': 128976, 'steps': 2686, 'loss/train': 3.3305537700653076} +07/25/2024 11:37:56 - INFO - __main__ - Step 2688: {'lr': 0.0004997815145054807, 'samples': 129024, 'steps': 2687, 'loss/train': 3.2534468173980713} +07/25/2024 11:37:56 - INFO - __main__ - Step 2689: {'lr': 0.0004997812945672787, 'samples': 129072, 'steps': 2688, 'loss/train': 2.6026904582977295} +07/25/2024 11:37:56 - INFO - __main__ - Step 2690: {'lr': 0.0004997810745184805, 'samples': 129120, 'steps': 2689, 'loss/train': 3.051292657852173} +07/25/2024 11:37:57 - INFO - __main__ - Step 2691: {'lr': 0.0004997808543590863, 'samples': 129168, 'steps': 2690, 'loss/train': 3.055288791656494} +07/25/2024 11:37:57 - INFO - __main__ - Step 2692: {'lr': 0.0004997806340890961, 'samples': 129216, 'steps': 2691, 'loss/train': 2.469810724258423} +07/25/2024 11:37:57 - INFO - __main__ - Step 2693: {'lr': 0.0004997804137085101, 'samples': 129264, 'steps': 2692, 'loss/train': 3.0100255012512207} +07/25/2024 11:37:58 - INFO - __main__ - Step 2694: {'lr': 0.0004997801932173282, 'samples': 129312, 'steps': 2693, 'loss/train': 2.998746871948242} +07/25/2024 11:37:58 - INFO - __main__ - Step 2695: {'lr': 0.0004997799726155507, 'samples': 129360, 'steps': 2694, 'loss/train': 2.5857582092285156} +07/25/2024 11:37:58 - INFO - __main__ - Step 2696: {'lr': 0.0004997797519031777, 'samples': 129408, 'steps': 2695, 'loss/train': 3.663379430770874} +07/25/2024 11:37:58 - INFO - __main__ - Step 2697: {'lr': 0.0004997795310802091, 'samples': 129456, 'steps': 2696, 'loss/train': 2.378770112991333} +07/25/2024 11:37:59 - INFO - __main__ - Step 2698: {'lr': 0.0004997793101466452, 'samples': 129504, 'steps': 2697, 'loss/train': 2.9561681747436523} +07/25/2024 11:37:59 - INFO - __main__ - Step 2699: {'lr': 0.000499779089102486, 'samples': 129552, 'steps': 2698, 'loss/train': 2.563084363937378} +07/25/2024 11:37:59 - INFO - __main__ - Step 2700: {'lr': 0.0004997788679477317, 'samples': 129600, 'steps': 2699, 'loss/train': 2.014599323272705} +07/25/2024 11:38:00 - INFO - __main__ - Step 2701: {'lr': 0.0004997786466823821, 'samples': 129648, 'steps': 2700, 'loss/train': 2.92646861076355} +07/25/2024 11:38:00 - INFO - __main__ - Step 2702: {'lr': 0.0004997784253064377, 'samples': 129696, 'steps': 2701, 'loss/train': 2.8571972846984863} +07/25/2024 11:38:00 - INFO - __main__ - Step 2703: {'lr': 0.0004997782038198984, 'samples': 129744, 'steps': 2702, 'loss/train': 2.422731637954712} +07/25/2024 11:38:00 - INFO - __main__ - Step 2704: {'lr': 0.0004997779822227642, 'samples': 129792, 'steps': 2703, 'loss/train': 3.183072805404663} +07/25/2024 11:38:01 - INFO - __main__ - Step 2705: {'lr': 0.0004997777605150355, 'samples': 129840, 'steps': 2704, 'loss/train': 2.9743216037750244} +07/25/2024 11:38:01 - INFO - __main__ - Step 2706: {'lr': 0.0004997775386967121, 'samples': 129888, 'steps': 2705, 'loss/train': 3.0030088424682617} +07/25/2024 11:38:01 - INFO - __main__ - Step 2707: {'lr': 0.0004997773167677941, 'samples': 129936, 'steps': 2706, 'loss/train': 3.2316572666168213} +07/25/2024 11:38:02 - INFO - __main__ - Step 2708: {'lr': 0.0004997770947282818, 'samples': 129984, 'steps': 2707, 'loss/train': 2.883281946182251} +07/25/2024 11:38:02 - INFO - __main__ - Step 2709: {'lr': 0.0004997768725781752, 'samples': 130032, 'steps': 2708, 'loss/train': 2.9441068172454834} +07/25/2024 11:38:02 - INFO - __main__ - Step 2710: {'lr': 0.0004997766503174744, 'samples': 130080, 'steps': 2709, 'loss/train': 2.740633726119995} +07/25/2024 11:38:02 - INFO - __main__ - Step 2711: {'lr': 0.0004997764279461795, 'samples': 130128, 'steps': 2710, 'loss/train': 3.8950390815734863} +07/25/2024 11:38:03 - INFO - __main__ - Step 2712: {'lr': 0.0004997762054642906, 'samples': 130176, 'steps': 2711, 'loss/train': 3.2969274520874023} +07/25/2024 11:38:03 - INFO - __main__ - Step 2713: {'lr': 0.0004997759828718079, 'samples': 130224, 'steps': 2712, 'loss/train': 3.2673757076263428} +07/25/2024 11:38:03 - INFO - __main__ - Step 2714: {'lr': 0.0004997757601687312, 'samples': 130272, 'steps': 2713, 'loss/train': 2.9556870460510254} +07/25/2024 11:38:04 - INFO - __main__ - Step 2715: {'lr': 0.0004997755373550609, 'samples': 130320, 'steps': 2714, 'loss/train': 3.4481687545776367} +07/25/2024 11:38:04 - INFO - __main__ - Step 2716: {'lr': 0.000499775314430797, 'samples': 130368, 'steps': 2715, 'loss/train': 2.6817290782928467} +07/25/2024 11:38:04 - INFO - __main__ - Step 2717: {'lr': 0.0004997750913959395, 'samples': 130416, 'steps': 2716, 'loss/train': 2.6481685638427734} +07/25/2024 11:38:04 - INFO - __main__ - Step 2718: {'lr': 0.0004997748682504886, 'samples': 130464, 'steps': 2717, 'loss/train': 2.516695976257324} +07/25/2024 11:38:05 - INFO - __main__ - Step 2719: {'lr': 0.0004997746449944445, 'samples': 130512, 'steps': 2718, 'loss/train': 2.896655559539795} +07/25/2024 11:38:05 - INFO - __main__ - Step 2720: {'lr': 0.000499774421627807, 'samples': 130560, 'steps': 2719, 'loss/train': 3.1158485412597656} +07/25/2024 11:38:05 - INFO - __main__ - Step 2721: {'lr': 0.0004997741981505766, 'samples': 130608, 'steps': 2720, 'loss/train': 2.6354053020477295} +07/25/2024 11:38:06 - INFO - __main__ - Step 2722: {'lr': 0.000499773974562753, 'samples': 130656, 'steps': 2721, 'loss/train': 2.390965461730957} +07/25/2024 11:38:06 - INFO - __main__ - Step 2723: {'lr': 0.0004997737508643365, 'samples': 130704, 'steps': 2722, 'loss/train': 3.513512134552002} +07/25/2024 11:38:06 - INFO - __main__ - Step 2724: {'lr': 0.0004997735270553273, 'samples': 130752, 'steps': 2723, 'loss/train': 2.8772709369659424} +07/25/2024 11:38:06 - INFO - __main__ - Step 2725: {'lr': 0.0004997733031357253, 'samples': 130800, 'steps': 2724, 'loss/train': 3.0786564350128174} +07/25/2024 11:38:07 - INFO - __main__ - Step 2726: {'lr': 0.0004997730791055307, 'samples': 130848, 'steps': 2725, 'loss/train': 2.559884786605835} +07/25/2024 11:38:07 - INFO - __main__ - Step 2727: {'lr': 0.0004997728549647436, 'samples': 130896, 'steps': 2726, 'loss/train': 1.9269776344299316} +07/25/2024 11:38:07 - INFO - __main__ - Step 2728: {'lr': 0.0004997726307133641, 'samples': 130944, 'steps': 2727, 'loss/train': 3.2568247318267822} +07/25/2024 11:38:08 - INFO - __main__ - Step 2729: {'lr': 0.0004997724063513923, 'samples': 130992, 'steps': 2728, 'loss/train': 1.2128907442092896} +07/25/2024 11:38:08 - INFO - __main__ - Step 2730: {'lr': 0.0004997721818788282, 'samples': 131040, 'steps': 2729, 'loss/train': 2.6372759342193604} +07/25/2024 11:38:08 - INFO - __main__ - Step 2731: {'lr': 0.000499771957295672, 'samples': 131088, 'steps': 2730, 'loss/train': 3.532810688018799} +07/25/2024 11:38:08 - INFO - __main__ - Step 2732: {'lr': 0.0004997717326019238, 'samples': 131136, 'steps': 2731, 'loss/train': 2.8416762351989746} +07/25/2024 11:38:09 - INFO - __main__ - Step 2733: {'lr': 0.0004997715077975836, 'samples': 131184, 'steps': 2732, 'loss/train': 2.9081239700317383} +07/25/2024 11:38:09 - INFO - __main__ - Step 2734: {'lr': 0.0004997712828826516, 'samples': 131232, 'steps': 2733, 'loss/train': 2.0093090534210205} +07/25/2024 11:38:09 - INFO - __main__ - Step 2735: {'lr': 0.0004997710578571279, 'samples': 131280, 'steps': 2734, 'loss/train': 3.1231696605682373} +07/25/2024 11:38:10 - INFO - __main__ - Step 2736: {'lr': 0.0004997708327210126, 'samples': 131328, 'steps': 2735, 'loss/train': 3.0797927379608154} +07/25/2024 11:38:10 - INFO - __main__ - Step 2737: {'lr': 0.0004997706074743057, 'samples': 131376, 'steps': 2736, 'loss/train': 3.2094321250915527} +07/25/2024 11:38:10 - INFO - __main__ - Step 2738: {'lr': 0.0004997703821170074, 'samples': 131424, 'steps': 2737, 'loss/train': 3.2559306621551514} +07/25/2024 11:38:10 - INFO - __main__ - Step 2739: {'lr': 0.0004997701566491178, 'samples': 131472, 'steps': 2738, 'loss/train': 2.674396514892578} +07/25/2024 11:38:11 - INFO - __main__ - Step 2740: {'lr': 0.000499769931070637, 'samples': 131520, 'steps': 2739, 'loss/train': 3.3178870677948} +07/25/2024 11:38:11 - INFO - __main__ - Step 2741: {'lr': 0.000499769705381565, 'samples': 131568, 'steps': 2740, 'loss/train': 2.843590497970581} +07/25/2024 11:38:11 - INFO - __main__ - Step 2742: {'lr': 0.000499769479581902, 'samples': 131616, 'steps': 2741, 'loss/train': 3.039670467376709} +07/25/2024 11:38:12 - INFO - __main__ - Step 2743: {'lr': 0.0004997692536716481, 'samples': 131664, 'steps': 2742, 'loss/train': 3.0685248374938965} +07/25/2024 11:38:12 - INFO - __main__ - Step 2744: {'lr': 0.0004997690276508034, 'samples': 131712, 'steps': 2743, 'loss/train': 3.42759370803833} +07/25/2024 11:38:12 - INFO - __main__ - Step 2745: {'lr': 0.0004997688015193678, 'samples': 131760, 'steps': 2744, 'loss/train': 2.407886028289795} +07/25/2024 11:38:12 - INFO - __main__ - Step 2746: {'lr': 0.0004997685752773418, 'samples': 131808, 'steps': 2745, 'loss/train': 3.441906690597534} +07/25/2024 11:38:13 - INFO - __main__ - Step 2747: {'lr': 0.0004997683489247251, 'samples': 131856, 'steps': 2746, 'loss/train': 3.3313939571380615} +07/25/2024 11:38:13 - INFO - __main__ - Step 2748: {'lr': 0.0004997681224615181, 'samples': 131904, 'steps': 2747, 'loss/train': 3.3450961112976074} +07/25/2024 11:38:13 - INFO - __main__ - Step 2749: {'lr': 0.0004997678958877207, 'samples': 131952, 'steps': 2748, 'loss/train': 3.0541505813598633} +07/25/2024 11:38:13 - INFO - __main__ - Step 2750: {'lr': 0.0004997676692033331, 'samples': 132000, 'steps': 2749, 'loss/train': 3.9279699325561523} +07/25/2024 11:38:14 - INFO - __main__ - Step 2751: {'lr': 0.0004997674424083553, 'samples': 132048, 'steps': 2750, 'loss/train': 3.3042104244232178} +07/25/2024 11:38:14 - INFO - __main__ - Step 2752: {'lr': 0.0004997672155027876, 'samples': 132096, 'steps': 2751, 'loss/train': 3.061506986618042} +07/25/2024 11:38:14 - INFO - __main__ - Step 2753: {'lr': 0.0004997669884866299, 'samples': 132144, 'steps': 2752, 'loss/train': 3.0437967777252197} +07/25/2024 11:38:15 - INFO - __main__ - Step 2754: {'lr': 0.0004997667613598824, 'samples': 132192, 'steps': 2753, 'loss/train': 3.068905830383301} +07/25/2024 11:38:15 - INFO - __main__ - Step 2755: {'lr': 0.0004997665341225451, 'samples': 132240, 'steps': 2754, 'loss/train': 4.0369439125061035} +07/25/2024 11:38:15 - INFO - __main__ - Step 2756: {'lr': 0.0004997663067746183, 'samples': 132288, 'steps': 2755, 'loss/train': 3.1746420860290527} +07/25/2024 11:38:15 - INFO - __main__ - Step 2757: {'lr': 0.000499766079316102, 'samples': 132336, 'steps': 2756, 'loss/train': 3.319272994995117} +07/25/2024 11:38:16 - INFO - __main__ - Step 2758: {'lr': 0.0004997658517469962, 'samples': 132384, 'steps': 2757, 'loss/train': 2.170220375061035} +07/25/2024 11:38:16 - INFO - __main__ - Step 2759: {'lr': 0.0004997656240673011, 'samples': 132432, 'steps': 2758, 'loss/train': 2.7018632888793945} +07/25/2024 11:38:16 - INFO - __main__ - Step 2760: {'lr': 0.0004997653962770167, 'samples': 132480, 'steps': 2759, 'loss/train': 3.0298635959625244} +07/25/2024 11:38:17 - INFO - __main__ - Step 2761: {'lr': 0.0004997651683761433, 'samples': 132528, 'steps': 2760, 'loss/train': 3.8211333751678467} +07/25/2024 11:38:17 - INFO - __main__ - Step 2762: {'lr': 0.0004997649403646807, 'samples': 132576, 'steps': 2761, 'loss/train': 3.5184993743896484} +07/25/2024 11:38:17 - INFO - __main__ - Step 2763: {'lr': 0.0004997647122426294, 'samples': 132624, 'steps': 2762, 'loss/train': 4.576268672943115} +07/25/2024 11:38:17 - INFO - __main__ - Step 2764: {'lr': 0.0004997644840099892, 'samples': 132672, 'steps': 2763, 'loss/train': 3.1817374229431152} +07/25/2024 11:38:18 - INFO - __main__ - Step 2765: {'lr': 0.0004997642556667603, 'samples': 132720, 'steps': 2764, 'loss/train': 3.3253722190856934} +07/25/2024 11:38:18 - INFO - __main__ - Step 2766: {'lr': 0.0004997640272129428, 'samples': 132768, 'steps': 2765, 'loss/train': 3.083261013031006} +07/25/2024 11:38:18 - INFO - __main__ - Step 2767: {'lr': 0.0004997637986485367, 'samples': 132816, 'steps': 2766, 'loss/train': 3.3439104557037354} +07/25/2024 11:38:19 - INFO - __main__ - Step 2768: {'lr': 0.0004997635699735422, 'samples': 132864, 'steps': 2767, 'loss/train': 2.729647397994995} +07/25/2024 11:38:19 - INFO - __main__ - Step 2769: {'lr': 0.0004997633411879595, 'samples': 132912, 'steps': 2768, 'loss/train': 2.726670026779175} +07/25/2024 11:38:19 - INFO - __main__ - Step 2770: {'lr': 0.0004997631122917885, 'samples': 132960, 'steps': 2769, 'loss/train': 3.3429014682769775} +07/25/2024 11:38:19 - INFO - __main__ - Step 2771: {'lr': 0.0004997628832850294, 'samples': 133008, 'steps': 2770, 'loss/train': 3.3069498538970947} +07/25/2024 11:38:20 - INFO - __main__ - Step 2772: {'lr': 0.0004997626541676823, 'samples': 133056, 'steps': 2771, 'loss/train': 3.208714008331299} +07/25/2024 11:38:20 - INFO - __main__ - Step 2773: {'lr': 0.0004997624249397473, 'samples': 133104, 'steps': 2772, 'loss/train': 3.8781473636627197} +07/25/2024 11:38:20 - INFO - __main__ - Step 2774: {'lr': 0.0004997621956012245, 'samples': 133152, 'steps': 2773, 'loss/train': 3.392496109008789} +07/25/2024 11:38:21 - INFO - __main__ - Step 2775: {'lr': 0.000499761966152114, 'samples': 133200, 'steps': 2774, 'loss/train': 2.6157288551330566} +07/25/2024 11:38:21 - INFO - __main__ - Step 2776: {'lr': 0.0004997617365924159, 'samples': 133248, 'steps': 2775, 'loss/train': 2.9743764400482178} +07/25/2024 11:38:21 - INFO - __main__ - Step 2777: {'lr': 0.0004997615069221303, 'samples': 133296, 'steps': 2776, 'loss/train': 2.2135491371154785} +07/25/2024 11:38:21 - INFO - __main__ - Step 2778: {'lr': 0.0004997612771412573, 'samples': 133344, 'steps': 2777, 'loss/train': 2.4748923778533936} +07/25/2024 11:38:22 - INFO - __main__ - Step 2779: {'lr': 0.000499761047249797, 'samples': 133392, 'steps': 2778, 'loss/train': 4.040026664733887} +07/25/2024 11:38:22 - INFO - __main__ - Step 2780: {'lr': 0.0004997608172477494, 'samples': 133440, 'steps': 2779, 'loss/train': 3.232616901397705} +07/25/2024 11:38:22 - INFO - __main__ - Step 2781: {'lr': 0.0004997605871351149, 'samples': 133488, 'steps': 2780, 'loss/train': 2.9566516876220703} +07/25/2024 11:38:23 - INFO - __main__ - Step 2782: {'lr': 0.0004997603569118934, 'samples': 133536, 'steps': 2781, 'loss/train': 1.8771685361862183} +07/25/2024 11:38:23 - INFO - __main__ - Step 2783: {'lr': 0.0004997601265780848, 'samples': 133584, 'steps': 2782, 'loss/train': 2.662928819656372} +07/25/2024 11:38:23 - INFO - __main__ - Step 2784: {'lr': 0.0004997598961336896, 'samples': 133632, 'steps': 2783, 'loss/train': 3.085844039916992} +07/25/2024 11:38:23 - INFO - __main__ - Step 2785: {'lr': 0.0004997596655787077, 'samples': 133680, 'steps': 2784, 'loss/train': 2.9191486835479736} +07/25/2024 11:38:24 - INFO - __main__ - Step 2786: {'lr': 0.0004997594349131392, 'samples': 133728, 'steps': 2785, 'loss/train': 3.229923963546753} +07/25/2024 11:38:24 - INFO - __main__ - Step 2787: {'lr': 0.0004997592041369841, 'samples': 133776, 'steps': 2786, 'loss/train': 4.200041770935059} +07/25/2024 11:38:24 - INFO - __main__ - Step 2788: {'lr': 0.0004997589732502429, 'samples': 133824, 'steps': 2787, 'loss/train': 2.956104040145874} +07/25/2024 11:38:25 - INFO - __main__ - Step 2789: {'lr': 0.0004997587422529152, 'samples': 133872, 'steps': 2788, 'loss/train': 3.292520046234131} +07/25/2024 11:38:25 - INFO - __main__ - Step 2790: {'lr': 0.0004997585111450013, 'samples': 133920, 'steps': 2789, 'loss/train': 3.142484188079834} +07/25/2024 11:38:25 - INFO - __main__ - Step 2791: {'lr': 0.0004997582799265015, 'samples': 133968, 'steps': 2790, 'loss/train': 2.81526255607605} +07/25/2024 11:38:25 - INFO - __main__ - Step 2792: {'lr': 0.0004997580485974157, 'samples': 134016, 'steps': 2791, 'loss/train': 3.326308012008667} +07/25/2024 11:38:26 - INFO - __main__ - Step 2793: {'lr': 0.0004997578171577439, 'samples': 134064, 'steps': 2792, 'loss/train': 2.7465405464172363} +07/25/2024 11:38:26 - INFO - __main__ - Step 2794: {'lr': 0.0004997575856074864, 'samples': 134112, 'steps': 2793, 'loss/train': 3.3049395084381104} +07/25/2024 11:38:26 - INFO - __main__ - Step 2795: {'lr': 0.0004997573539466432, 'samples': 134160, 'steps': 2794, 'loss/train': 3.472895860671997} +07/25/2024 11:38:27 - INFO - __main__ - Step 2796: {'lr': 0.0004997571221752146, 'samples': 134208, 'steps': 2795, 'loss/train': 2.7739131450653076} +07/25/2024 11:38:27 - INFO - __main__ - Step 2797: {'lr': 0.0004997568902932005, 'samples': 134256, 'steps': 2796, 'loss/train': 3.435621738433838} +07/25/2024 11:38:27 - INFO - __main__ - Step 2798: {'lr': 0.000499756658300601, 'samples': 134304, 'steps': 2797, 'loss/train': 3.3959062099456787} +07/25/2024 11:38:27 - INFO - __main__ - Step 2799: {'lr': 0.0004997564261974161, 'samples': 134352, 'steps': 2798, 'loss/train': 2.6606826782226562} +07/25/2024 11:38:28 - INFO - __main__ - Step 2800: {'lr': 0.0004997561939836463, 'samples': 134400, 'steps': 2799, 'loss/train': 3.2387382984161377} +07/25/2024 11:38:28 - INFO - __main__ - Step 2801: {'lr': 0.0004997559616592913, 'samples': 134448, 'steps': 2800, 'loss/train': 1.7125006914138794} +07/25/2024 11:38:28 - INFO - __main__ - Step 2802: {'lr': 0.0004997557292243514, 'samples': 134496, 'steps': 2801, 'loss/train': 1.991712212562561} +07/25/2024 11:38:29 - INFO - __main__ - Step 2803: {'lr': 0.0004997554966788267, 'samples': 134544, 'steps': 2802, 'loss/train': 3.1213924884796143} +07/25/2024 11:38:29 - INFO - __main__ - Step 2804: {'lr': 0.0004997552640227172, 'samples': 134592, 'steps': 2803, 'loss/train': 3.115727663040161} +07/25/2024 11:38:29 - INFO - __main__ - Step 2805: {'lr': 0.0004997550312560231, 'samples': 134640, 'steps': 2804, 'loss/train': 2.631993055343628} +07/25/2024 11:38:29 - INFO - __main__ - Step 2806: {'lr': 0.0004997547983787444, 'samples': 134688, 'steps': 2805, 'loss/train': 2.004591464996338} +07/25/2024 11:38:30 - INFO - __main__ - Step 2807: {'lr': 0.0004997545653908814, 'samples': 134736, 'steps': 2806, 'loss/train': 2.9179437160491943} +07/25/2024 11:38:30 - INFO - __main__ - Step 2808: {'lr': 0.000499754332292434, 'samples': 134784, 'steps': 2807, 'loss/train': 3.657015562057495} +07/25/2024 11:38:30 - INFO - __main__ - Step 2809: {'lr': 0.0004997540990834024, 'samples': 134832, 'steps': 2808, 'loss/train': 3.7771623134613037} +07/25/2024 11:38:31 - INFO - __main__ - Step 2810: {'lr': 0.0004997538657637866, 'samples': 134880, 'steps': 2809, 'loss/train': 3.627192735671997} +07/25/2024 11:38:31 - INFO - __main__ - Step 2811: {'lr': 0.000499753632333587, 'samples': 134928, 'steps': 2810, 'loss/train': 3.694000005722046} +07/25/2024 11:38:31 - INFO - __main__ - Step 2812: {'lr': 0.0004997533987928033, 'samples': 134976, 'steps': 2811, 'loss/train': 4.468692779541016} +07/25/2024 11:38:31 - INFO - __main__ - Step 2813: {'lr': 0.0004997531651414359, 'samples': 135024, 'steps': 2812, 'loss/train': 3.718768358230591} +07/25/2024 11:38:32 - INFO - __main__ - Step 2814: {'lr': 0.0004997529313794847, 'samples': 135072, 'steps': 2813, 'loss/train': 3.312370538711548} +07/25/2024 11:38:32 - INFO - __main__ - Step 2815: {'lr': 0.00049975269750695, 'samples': 135120, 'steps': 2814, 'loss/train': 3.2099459171295166} +07/25/2024 11:38:32 - INFO - __main__ - Step 2816: {'lr': 0.0004997524635238317, 'samples': 135168, 'steps': 2815, 'loss/train': 2.9739224910736084} +07/25/2024 11:38:33 - INFO - __main__ - Step 2817: {'lr': 0.0004997522294301301, 'samples': 135216, 'steps': 2816, 'loss/train': 2.847998857498169} +07/25/2024 11:38:33 - INFO - __main__ - Step 2818: {'lr': 0.0004997519952258451, 'samples': 135264, 'steps': 2817, 'loss/train': 3.4187915325164795} +07/25/2024 11:38:33 - INFO - __main__ - Step 2819: {'lr': 0.0004997517609109769, 'samples': 135312, 'steps': 2818, 'loss/train': 3.344080924987793} +07/25/2024 11:38:33 - INFO - __main__ - Step 2820: {'lr': 0.0004997515264855259, 'samples': 135360, 'steps': 2819, 'loss/train': 3.1607420444488525} +07/25/2024 11:38:34 - INFO - __main__ - Step 2821: {'lr': 0.0004997512919494917, 'samples': 135408, 'steps': 2820, 'loss/train': 3.0731921195983887} +07/25/2024 11:38:34 - INFO - __main__ - Step 2822: {'lr': 0.0004997510573028746, 'samples': 135456, 'steps': 2821, 'loss/train': 3.2236502170562744} +07/25/2024 11:38:34 - INFO - __main__ - Step 2823: {'lr': 0.0004997508225456747, 'samples': 135504, 'steps': 2822, 'loss/train': 3.3059017658233643} +07/25/2024 11:38:35 - INFO - __main__ - Step 2824: {'lr': 0.0004997505876778923, 'samples': 135552, 'steps': 2823, 'loss/train': 2.87990403175354} +07/25/2024 11:38:35 - INFO - __main__ - Step 2825: {'lr': 0.0004997503526995272, 'samples': 135600, 'steps': 2824, 'loss/train': 3.8881990909576416} +07/25/2024 11:38:35 - INFO - __main__ - Step 2826: {'lr': 0.0004997501176105797, 'samples': 135648, 'steps': 2825, 'loss/train': 3.2503747940063477} +07/25/2024 11:38:35 - INFO - __main__ - Step 2827: {'lr': 0.0004997498824110498, 'samples': 135696, 'steps': 2826, 'loss/train': 3.2940168380737305} +07/25/2024 11:38:36 - INFO - __main__ - Step 2828: {'lr': 0.0004997496471009377, 'samples': 135744, 'steps': 2827, 'loss/train': 2.44038462638855} +07/25/2024 11:38:36 - INFO - __main__ - Step 2829: {'lr': 0.0004997494116802435, 'samples': 135792, 'steps': 2828, 'loss/train': 3.2805254459381104} +07/25/2024 11:38:36 - INFO - __main__ - Step 2830: {'lr': 0.0004997491761489671, 'samples': 135840, 'steps': 2829, 'loss/train': 1.975512146949768} +07/25/2024 11:38:37 - INFO - __main__ - Step 2831: {'lr': 0.0004997489405071089, 'samples': 135888, 'steps': 2830, 'loss/train': 3.176119804382324} +07/25/2024 11:38:37 - INFO - __main__ - Step 2832: {'lr': 0.0004997487047546688, 'samples': 135936, 'steps': 2831, 'loss/train': 3.3312058448791504} +07/25/2024 11:38:37 - INFO - __main__ - Step 2833: {'lr': 0.000499748468891647, 'samples': 135984, 'steps': 2832, 'loss/train': 3.4364852905273438} +07/25/2024 11:38:37 - INFO - __main__ - Step 2834: {'lr': 0.0004997482329180436, 'samples': 136032, 'steps': 2833, 'loss/train': 2.976594924926758} +07/25/2024 11:38:38 - INFO - __main__ - Step 2835: {'lr': 0.0004997479968338587, 'samples': 136080, 'steps': 2834, 'loss/train': 3.403844118118286} +07/25/2024 11:38:38 - INFO - __main__ - Step 2836: {'lr': 0.0004997477606390923, 'samples': 136128, 'steps': 2835, 'loss/train': 3.074083089828491} +07/25/2024 11:38:38 - INFO - __main__ - Step 2837: {'lr': 0.0004997475243337446, 'samples': 136176, 'steps': 2836, 'loss/train': 3.2449586391448975} +07/25/2024 11:38:38 - INFO - __main__ - Step 2838: {'lr': 0.0004997472879178157, 'samples': 136224, 'steps': 2837, 'loss/train': 3.183316946029663} +07/25/2024 11:38:39 - INFO - __main__ - Step 2839: {'lr': 0.0004997470513913057, 'samples': 136272, 'steps': 2838, 'loss/train': 2.828677177429199} +07/25/2024 11:38:39 - INFO - __main__ - Step 2840: {'lr': 0.0004997468147542147, 'samples': 136320, 'steps': 2839, 'loss/train': 2.7116518020629883} +07/25/2024 11:38:39 - INFO - __main__ - Step 2841: {'lr': 0.0004997465780065429, 'samples': 136368, 'steps': 2840, 'loss/train': 3.8162968158721924} +07/25/2024 11:38:40 - INFO - __main__ - Step 2842: {'lr': 0.0004997463411482903, 'samples': 136416, 'steps': 2841, 'loss/train': 3.326355457305908} +07/25/2024 11:38:40 - INFO - __main__ - Step 2843: {'lr': 0.000499746104179457, 'samples': 136464, 'steps': 2842, 'loss/train': 3.1973862648010254} +07/25/2024 11:38:40 - INFO - __main__ - Step 2844: {'lr': 0.000499745867100043, 'samples': 136512, 'steps': 2843, 'loss/train': 3.160762071609497} +07/25/2024 11:38:40 - INFO - __main__ - Step 2845: {'lr': 0.0004997456299100487, 'samples': 136560, 'steps': 2844, 'loss/train': 2.8765058517456055} +07/25/2024 11:38:41 - INFO - __main__ - Step 2846: {'lr': 0.0004997453926094739, 'samples': 136608, 'steps': 2845, 'loss/train': 2.7116756439208984} +07/25/2024 11:38:41 - INFO - __main__ - Step 2847: {'lr': 0.0004997451551983188, 'samples': 136656, 'steps': 2846, 'loss/train': 3.3205132484436035} +07/25/2024 11:38:41 - INFO - __main__ - Step 2848: {'lr': 0.0004997449176765837, 'samples': 136704, 'steps': 2847, 'loss/train': 2.8918161392211914} +07/25/2024 11:38:42 - INFO - __main__ - Step 2849: {'lr': 0.0004997446800442685, 'samples': 136752, 'steps': 2848, 'loss/train': 3.547791004180908} +07/25/2024 11:38:42 - INFO - __main__ - Step 2850: {'lr': 0.0004997444423013733, 'samples': 136800, 'steps': 2849, 'loss/train': 3.337489128112793} +07/25/2024 11:38:42 - INFO - __main__ - Step 2851: {'lr': 0.0004997442044478983, 'samples': 136848, 'steps': 2850, 'loss/train': 3.0242583751678467} +07/25/2024 11:38:42 - INFO - __main__ - Step 2852: {'lr': 0.0004997439664838436, 'samples': 136896, 'steps': 2851, 'loss/train': 3.505849838256836} +07/25/2024 11:38:43 - INFO - __main__ - Step 2853: {'lr': 0.0004997437284092092, 'samples': 136944, 'steps': 2852, 'loss/train': 2.954547882080078} +07/25/2024 11:38:43 - INFO - __main__ - Step 2854: {'lr': 0.0004997434902239953, 'samples': 136992, 'steps': 2853, 'loss/train': 2.6398508548736572} +07/25/2024 11:38:43 - INFO - __main__ - Step 2855: {'lr': 0.000499743251928202, 'samples': 137040, 'steps': 2854, 'loss/train': 3.154531955718994} +07/25/2024 11:38:44 - INFO - __main__ - Step 2856: {'lr': 0.0004997430135218294, 'samples': 137088, 'steps': 2855, 'loss/train': 2.7627265453338623} +07/25/2024 11:38:44 - INFO - __main__ - Step 2857: {'lr': 0.0004997427750048775, 'samples': 137136, 'steps': 2856, 'loss/train': 3.3855602741241455} +07/25/2024 11:38:44 - INFO - __main__ - Step 2858: {'lr': 0.0004997425363773466, 'samples': 137184, 'steps': 2857, 'loss/train': 3.1909823417663574} +07/25/2024 11:38:44 - INFO - __main__ - Step 2859: {'lr': 0.0004997422976392366, 'samples': 137232, 'steps': 2858, 'loss/train': 3.217458486557007} +07/25/2024 11:38:45 - INFO - __main__ - Step 2860: {'lr': 0.0004997420587905477, 'samples': 137280, 'steps': 2859, 'loss/train': 3.0650112628936768} +07/25/2024 11:38:45 - INFO - __main__ - Step 2861: {'lr': 0.0004997418198312801, 'samples': 137328, 'steps': 2860, 'loss/train': 2.660531520843506} +07/25/2024 11:38:45 - INFO - __main__ - Step 2862: {'lr': 0.0004997415807614338, 'samples': 137376, 'steps': 2861, 'loss/train': 3.299455404281616} +07/25/2024 11:38:46 - INFO - __main__ - Step 2863: {'lr': 0.000499741341581009, 'samples': 137424, 'steps': 2862, 'loss/train': 3.349956512451172} +07/25/2024 11:38:46 - INFO - __main__ - Step 2864: {'lr': 0.0004997411022900056, 'samples': 137472, 'steps': 2863, 'loss/train': 2.7596333026885986} +07/25/2024 11:38:46 - INFO - __main__ - Step 2865: {'lr': 0.0004997408628884239, 'samples': 137520, 'steps': 2864, 'loss/train': 3.255014657974243} +07/25/2024 11:38:46 - INFO - __main__ - Step 2866: {'lr': 0.000499740623376264, 'samples': 137568, 'steps': 2865, 'loss/train': 2.790005922317505} +07/25/2024 11:38:47 - INFO - __main__ - Step 2867: {'lr': 0.0004997403837535258, 'samples': 137616, 'steps': 2866, 'loss/train': 3.395251750946045} +07/25/2024 11:38:47 - INFO - __main__ - Step 2868: {'lr': 0.0004997401440202096, 'samples': 137664, 'steps': 2867, 'loss/train': 3.3441739082336426} +07/25/2024 11:38:47 - INFO - __main__ - Step 2869: {'lr': 0.0004997399041763155, 'samples': 137712, 'steps': 2868, 'loss/train': 2.7671689987182617} +07/25/2024 11:38:48 - INFO - __main__ - Step 2870: {'lr': 0.0004997396642218436, 'samples': 137760, 'steps': 2869, 'loss/train': 3.7876405715942383} +07/25/2024 11:38:48 - INFO - __main__ - Step 2871: {'lr': 0.000499739424156794, 'samples': 137808, 'steps': 2870, 'loss/train': 3.052574396133423} +07/25/2024 11:38:48 - INFO - __main__ - Step 2872: {'lr': 0.0004997391839811667, 'samples': 137856, 'steps': 2871, 'loss/train': 2.843299627304077} +07/25/2024 11:38:48 - INFO - __main__ - Step 2873: {'lr': 0.0004997389436949619, 'samples': 137904, 'steps': 2872, 'loss/train': 2.889356851577759} +07/25/2024 11:38:49 - INFO - __main__ - Step 2874: {'lr': 0.0004997387032981797, 'samples': 137952, 'steps': 2873, 'loss/train': 2.932420253753662} +07/25/2024 11:38:49 - INFO - __main__ - Step 2875: {'lr': 0.0004997384627908202, 'samples': 138000, 'steps': 2874, 'loss/train': 3.0530054569244385} +07/25/2024 11:38:49 - INFO - __main__ - Step 2876: {'lr': 0.0004997382221728836, 'samples': 138048, 'steps': 2875, 'loss/train': 2.7865710258483887} +07/25/2024 11:38:50 - INFO - __main__ - Step 2877: {'lr': 0.0004997379814443698, 'samples': 138096, 'steps': 2876, 'loss/train': 2.7528982162475586} +07/25/2024 11:38:50 - INFO - __main__ - Step 2878: {'lr': 0.000499737740605279, 'samples': 138144, 'steps': 2877, 'loss/train': 3.2453794479370117} +07/25/2024 11:38:50 - INFO - __main__ - Step 2879: {'lr': 0.0004997374996556114, 'samples': 138192, 'steps': 2878, 'loss/train': 3.0573008060455322} +07/25/2024 11:38:50 - INFO - __main__ - Step 2880: {'lr': 0.000499737258595367, 'samples': 138240, 'steps': 2879, 'loss/train': 1.9666937589645386} +07/25/2024 11:38:51 - INFO - __main__ - Step 2881: {'lr': 0.000499737017424546, 'samples': 138288, 'steps': 2880, 'loss/train': 2.882399559020996} +07/25/2024 11:38:51 - INFO - __main__ - Step 2882: {'lr': 0.0004997367761431483, 'samples': 138336, 'steps': 2881, 'loss/train': 2.384922742843628} +07/25/2024 11:38:51 - INFO - __main__ - Step 2883: {'lr': 0.0004997365347511743, 'samples': 138384, 'steps': 2882, 'loss/train': 2.8517184257507324} +07/25/2024 11:38:52 - INFO - __main__ - Step 2884: {'lr': 0.0004997362932486238, 'samples': 138432, 'steps': 2883, 'loss/train': 3.3430113792419434} +07/25/2024 11:38:52 - INFO - __main__ - Step 2885: {'lr': 0.0004997360516354972, 'samples': 138480, 'steps': 2884, 'loss/train': 2.927870988845825} +07/25/2024 11:38:52 - INFO - __main__ - Step 2886: {'lr': 0.0004997358099117945, 'samples': 138528, 'steps': 2885, 'loss/train': 3.054192304611206} +07/25/2024 11:38:52 - INFO - __main__ - Step 2887: {'lr': 0.0004997355680775156, 'samples': 138576, 'steps': 2886, 'loss/train': 3.655301809310913} +07/25/2024 11:38:53 - INFO - __main__ - Step 2888: {'lr': 0.000499735326132661, 'samples': 138624, 'steps': 2887, 'loss/train': 3.39998459815979} +07/25/2024 11:38:53 - INFO - __main__ - Step 2889: {'lr': 0.0004997350840772305, 'samples': 138672, 'steps': 2888, 'loss/train': 4.262927532196045} +07/25/2024 11:38:53 - INFO - __main__ - Step 2890: {'lr': 0.0004997348419112243, 'samples': 138720, 'steps': 2889, 'loss/train': 3.344475269317627} +07/25/2024 11:38:54 - INFO - __main__ - Step 2891: {'lr': 0.0004997345996346426, 'samples': 138768, 'steps': 2890, 'loss/train': 2.5732715129852295} +07/25/2024 11:38:54 - INFO - __main__ - Step 2892: {'lr': 0.0004997343572474853, 'samples': 138816, 'steps': 2891, 'loss/train': 2.8990867137908936} +07/25/2024 11:38:54 - INFO - __main__ - Step 2893: {'lr': 0.0004997341147497526, 'samples': 138864, 'steps': 2892, 'loss/train': 3.0315964221954346} +07/25/2024 11:38:54 - INFO - __main__ - Step 2894: {'lr': 0.0004997338721414447, 'samples': 138912, 'steps': 2893, 'loss/train': 4.050211429595947} +07/25/2024 11:38:55 - INFO - __main__ - Step 2895: {'lr': 0.0004997336294225616, 'samples': 138960, 'steps': 2894, 'loss/train': 3.1408400535583496} +07/25/2024 11:38:55 - INFO - __main__ - Step 2896: {'lr': 0.0004997333865931035, 'samples': 139008, 'steps': 2895, 'loss/train': 2.9970171451568604} +07/25/2024 11:38:55 - INFO - __main__ - Step 2897: {'lr': 0.0004997331436530705, 'samples': 139056, 'steps': 2896, 'loss/train': 3.016420602798462} +07/25/2024 11:38:56 - INFO - __main__ - Step 2898: {'lr': 0.0004997329006024625, 'samples': 139104, 'steps': 2897, 'loss/train': 2.975531578063965} +07/25/2024 11:38:56 - INFO - __main__ - Step 2899: {'lr': 0.0004997326574412799, 'samples': 139152, 'steps': 2898, 'loss/train': 2.648421049118042} +07/25/2024 11:38:56 - INFO - __main__ - Step 2900: {'lr': 0.0004997324141695226, 'samples': 139200, 'steps': 2899, 'loss/train': 3.358215093612671} +07/25/2024 11:38:56 - INFO - __main__ - Step 2901: {'lr': 0.0004997321707871908, 'samples': 139248, 'steps': 2900, 'loss/train': 3.0106921195983887} +07/25/2024 11:38:57 - INFO - __main__ - Step 2902: {'lr': 0.0004997319272942847, 'samples': 139296, 'steps': 2901, 'loss/train': 3.1120927333831787} +07/25/2024 11:38:57 - INFO - __main__ - Step 2903: {'lr': 0.0004997316836908042, 'samples': 139344, 'steps': 2902, 'loss/train': 3.1564810276031494} +07/25/2024 11:38:57 - INFO - __main__ - Step 2904: {'lr': 0.0004997314399767495, 'samples': 139392, 'steps': 2903, 'loss/train': 1.9711328744888306} +07/25/2024 11:38:58 - INFO - __main__ - Step 2905: {'lr': 0.0004997311961521207, 'samples': 139440, 'steps': 2904, 'loss/train': 3.1510703563690186} +07/25/2024 11:38:58 - INFO - __main__ - Step 2906: {'lr': 0.000499730952216918, 'samples': 139488, 'steps': 2905, 'loss/train': 3.304020643234253} +07/25/2024 11:38:58 - INFO - __main__ - Step 2907: {'lr': 0.0004997307081711414, 'samples': 139536, 'steps': 2906, 'loss/train': 2.8374905586242676} +07/25/2024 11:38:58 - INFO - __main__ - Step 2908: {'lr': 0.000499730464014791, 'samples': 139584, 'steps': 2907, 'loss/train': 2.9651052951812744} +07/25/2024 11:38:59 - INFO - __main__ - Step 2909: {'lr': 0.000499730219747867, 'samples': 139632, 'steps': 2908, 'loss/train': 2.82289981842041} +07/25/2024 11:38:59 - INFO - __main__ - Step 2910: {'lr': 0.0004997299753703695, 'samples': 139680, 'steps': 2909, 'loss/train': 2.6439285278320312} +07/25/2024 11:38:59 - INFO - __main__ - Step 2911: {'lr': 0.0004997297308822984, 'samples': 139728, 'steps': 2910, 'loss/train': 3.4105842113494873} +07/25/2024 11:39:00 - INFO - __main__ - Step 2912: {'lr': 0.0004997294862836542, 'samples': 139776, 'steps': 2911, 'loss/train': 3.9283599853515625} +07/25/2024 11:39:00 - INFO - __main__ - Step 2913: {'lr': 0.0004997292415744365, 'samples': 139824, 'steps': 2912, 'loss/train': 3.6982526779174805} +07/25/2024 11:39:00 - INFO - __main__ - Step 2914: {'lr': 0.0004997289967546459, 'samples': 139872, 'steps': 2913, 'loss/train': 2.899904489517212} +07/25/2024 11:39:00 - INFO - __main__ - Step 2915: {'lr': 0.0004997287518242823, 'samples': 139920, 'steps': 2914, 'loss/train': 1.5019620656967163} +07/25/2024 11:39:01 - INFO - __main__ - Step 2916: {'lr': 0.0004997285067833457, 'samples': 139968, 'steps': 2915, 'loss/train': 4.217556953430176} +07/25/2024 11:39:01 - INFO - __main__ - Step 2917: {'lr': 0.0004997282616318363, 'samples': 140016, 'steps': 2916, 'loss/train': 3.001410484313965} +07/25/2024 11:39:01 - INFO - __main__ - Step 2918: {'lr': 0.0004997280163697542, 'samples': 140064, 'steps': 2917, 'loss/train': 3.8714358806610107} +07/25/2024 11:39:01 - INFO - __main__ - Step 2919: {'lr': 0.0004997277709970998, 'samples': 140112, 'steps': 2918, 'loss/train': 3.1764817237854004} +07/25/2024 11:39:02 - INFO - __main__ - Step 2920: {'lr': 0.0004997275255138727, 'samples': 140160, 'steps': 2919, 'loss/train': 3.43489670753479} +07/25/2024 11:39:02 - INFO - __main__ - Step 2921: {'lr': 0.0004997272799200733, 'samples': 140208, 'steps': 2920, 'loss/train': 2.6410470008850098} +07/25/2024 11:39:02 - INFO - __main__ - Step 2922: {'lr': 0.0004997270342157016, 'samples': 140256, 'steps': 2921, 'loss/train': 3.342989206314087} +07/25/2024 11:39:03 - INFO - __main__ - Step 2923: {'lr': 0.0004997267884007579, 'samples': 140304, 'steps': 2922, 'loss/train': 3.169583320617676} +07/25/2024 11:39:03 - INFO - __main__ - Step 2924: {'lr': 0.0004997265424752421, 'samples': 140352, 'steps': 2923, 'loss/train': 2.498197317123413} +07/25/2024 11:39:03 - INFO - __main__ - Step 2925: {'lr': 0.0004997262964391544, 'samples': 140400, 'steps': 2924, 'loss/train': 3.1060144901275635} +07/25/2024 11:39:03 - INFO - __main__ - Step 2926: {'lr': 0.0004997260502924948, 'samples': 140448, 'steps': 2925, 'loss/train': 2.9140307903289795} +07/25/2024 11:39:04 - INFO - __main__ - Step 2927: {'lr': 0.0004997258040352636, 'samples': 140496, 'steps': 2926, 'loss/train': 3.045239210128784} +07/25/2024 11:39:04 - INFO - __main__ - Step 2928: {'lr': 0.0004997255576674608, 'samples': 140544, 'steps': 2927, 'loss/train': 1.7909581661224365} +07/25/2024 11:39:04 - INFO - __main__ - Step 2929: {'lr': 0.0004997253111890866, 'samples': 140592, 'steps': 2928, 'loss/train': 2.7462024688720703} +07/25/2024 11:39:05 - INFO - __main__ - Step 2930: {'lr': 0.0004997250646001409, 'samples': 140640, 'steps': 2929, 'loss/train': 3.380445957183838} +07/25/2024 11:39:05 - INFO - __main__ - Step 2931: {'lr': 0.000499724817900624, 'samples': 140688, 'steps': 2930, 'loss/train': 2.9811203479766846} +07/25/2024 11:39:05 - INFO - __main__ - Step 2932: {'lr': 0.000499724571090536, 'samples': 140736, 'steps': 2931, 'loss/train': 2.896674871444702} +07/25/2024 11:39:05 - INFO - __main__ - Step 2933: {'lr': 0.0004997243241698768, 'samples': 140784, 'steps': 2932, 'loss/train': 3.0169456005096436} +07/25/2024 11:39:06 - INFO - __main__ - Step 2934: {'lr': 0.0004997240771386467, 'samples': 140832, 'steps': 2933, 'loss/train': 2.7642548084259033} +07/25/2024 11:39:06 - INFO - __main__ - Step 2935: {'lr': 0.0004997238299968459, 'samples': 140880, 'steps': 2934, 'loss/train': 2.6293113231658936} +07/25/2024 11:39:06 - INFO - __main__ - Step 2936: {'lr': 0.0004997235827444743, 'samples': 140928, 'steps': 2935, 'loss/train': 3.163825750350952} +07/25/2024 11:39:07 - INFO - __main__ - Step 2937: {'lr': 0.0004997233353815321, 'samples': 140976, 'steps': 2936, 'loss/train': 2.834709644317627} +07/25/2024 11:39:07 - INFO - __main__ - Step 2938: {'lr': 0.0004997230879080195, 'samples': 141024, 'steps': 2937, 'loss/train': 3.225269079208374} +07/25/2024 11:39:07 - INFO - __main__ - Step 2939: {'lr': 0.0004997228403239364, 'samples': 141072, 'steps': 2938, 'loss/train': 2.8868749141693115} +07/25/2024 11:39:07 - INFO - __main__ - Step 2940: {'lr': 0.000499722592629283, 'samples': 141120, 'steps': 2939, 'loss/train': 1.9851818084716797} +07/25/2024 11:39:08 - INFO - __main__ - Step 2941: {'lr': 0.0004997223448240595, 'samples': 141168, 'steps': 2940, 'loss/train': 3.2170612812042236} +07/25/2024 11:39:08 - INFO - __main__ - Step 2942: {'lr': 0.0004997220969082659, 'samples': 141216, 'steps': 2941, 'loss/train': 3.0345780849456787} +07/25/2024 11:39:08 - INFO - __main__ - Step 2943: {'lr': 0.0004997218488819024, 'samples': 141264, 'steps': 2942, 'loss/train': 2.540827512741089} +07/25/2024 11:39:09 - INFO - __main__ - Step 2944: {'lr': 0.000499721600744969, 'samples': 141312, 'steps': 2943, 'loss/train': 3.182030200958252} +07/25/2024 11:39:09 - INFO - __main__ - Step 2945: {'lr': 0.000499721352497466, 'samples': 141360, 'steps': 2944, 'loss/train': 2.560883045196533} +07/25/2024 11:39:09 - INFO - __main__ - Step 2946: {'lr': 0.0004997211041393932, 'samples': 141408, 'steps': 2945, 'loss/train': 2.9482667446136475} +07/25/2024 11:39:09 - INFO - __main__ - Step 2947: {'lr': 0.000499720855670751, 'samples': 141456, 'steps': 2946, 'loss/train': 3.073577880859375} +07/25/2024 11:39:10 - INFO - __main__ - Step 2948: {'lr': 0.0004997206070915394, 'samples': 141504, 'steps': 2947, 'loss/train': 3.080193281173706} +07/25/2024 11:39:10 - INFO - __main__ - Step 2949: {'lr': 0.0004997203584017585, 'samples': 141552, 'steps': 2948, 'loss/train': 3.1612024307250977} +07/25/2024 11:39:10 - INFO - __main__ - Step 2950: {'lr': 0.0004997201096014085, 'samples': 141600, 'steps': 2949, 'loss/train': 3.3101577758789062} +07/25/2024 11:39:11 - INFO - __main__ - Step 2951: {'lr': 0.0004997198606904893, 'samples': 141648, 'steps': 2950, 'loss/train': 2.787029504776001} +07/25/2024 11:39:11 - INFO - __main__ - Step 2952: {'lr': 0.0004997196116690012, 'samples': 141696, 'steps': 2951, 'loss/train': 2.555962562561035} +07/25/2024 11:39:11 - INFO - __main__ - Step 2953: {'lr': 0.0004997193625369443, 'samples': 141744, 'steps': 2952, 'loss/train': 1.9448084831237793} +07/25/2024 11:39:11 - INFO - __main__ - Step 2954: {'lr': 0.0004997191132943186, 'samples': 141792, 'steps': 2953, 'loss/train': 3.1850225925445557} +07/25/2024 11:39:12 - INFO - __main__ - Step 2955: {'lr': 0.0004997188639411243, 'samples': 141840, 'steps': 2954, 'loss/train': 2.5357003211975098} +07/25/2024 11:39:12 - INFO - __main__ - Step 2956: {'lr': 0.0004997186144773615, 'samples': 141888, 'steps': 2955, 'loss/train': 3.1580464839935303} +07/25/2024 11:39:12 - INFO - __main__ - Step 2957: {'lr': 0.0004997183649030302, 'samples': 141936, 'steps': 2956, 'loss/train': 1.2862260341644287} +07/25/2024 11:39:13 - INFO - __main__ - Step 2958: {'lr': 0.0004997181152181307, 'samples': 141984, 'steps': 2957, 'loss/train': 2.207812547683716} +07/25/2024 11:39:13 - INFO - __main__ - Step 2959: {'lr': 0.000499717865422663, 'samples': 142032, 'steps': 2958, 'loss/train': 2.800166368484497} +07/25/2024 11:39:13 - INFO - __main__ - Step 2960: {'lr': 0.0004997176155166273, 'samples': 142080, 'steps': 2959, 'loss/train': 3.145841598510742} +07/25/2024 11:39:13 - INFO - __main__ - Step 2961: {'lr': 0.0004997173655000235, 'samples': 142128, 'steps': 2960, 'loss/train': 2.9552927017211914} +07/25/2024 11:39:14 - INFO - __main__ - Step 2962: {'lr': 0.0004997171153728519, 'samples': 142176, 'steps': 2961, 'loss/train': 3.00850772857666} +07/25/2024 11:39:14 - INFO - __main__ - Step 2963: {'lr': 0.0004997168651351125, 'samples': 142224, 'steps': 2962, 'loss/train': 2.9284839630126953} +07/25/2024 11:39:14 - INFO - __main__ - Step 2964: {'lr': 0.0004997166147868055, 'samples': 142272, 'steps': 2963, 'loss/train': 2.0775527954101562} +07/25/2024 11:39:15 - INFO - __main__ - Step 2965: {'lr': 0.0004997163643279311, 'samples': 142320, 'steps': 2964, 'loss/train': 2.6262521743774414} +07/25/2024 11:39:15 - INFO - __main__ - Step 2966: {'lr': 0.0004997161137584892, 'samples': 142368, 'steps': 2965, 'loss/train': 2.83443284034729} +07/25/2024 11:39:15 - INFO - __main__ - Step 2967: {'lr': 0.00049971586307848, 'samples': 142416, 'steps': 2966, 'loss/train': 2.76155424118042} +07/25/2024 11:39:15 - INFO - __main__ - Step 2968: {'lr': 0.0004997156122879036, 'samples': 142464, 'steps': 2967, 'loss/train': 3.1043930053710938} +07/25/2024 11:39:16 - INFO - __main__ - Step 2969: {'lr': 0.0004997153613867601, 'samples': 142512, 'steps': 2968, 'loss/train': 2.4586312770843506} +07/25/2024 11:39:16 - INFO - __main__ - Step 2970: {'lr': 0.0004997151103750497, 'samples': 142560, 'steps': 2969, 'loss/train': 3.0198323726654053} +07/25/2024 11:39:16 - INFO - __main__ - Step 2971: {'lr': 0.0004997148592527724, 'samples': 142608, 'steps': 2970, 'loss/train': 2.8037374019622803} +07/25/2024 11:39:17 - INFO - __main__ - Step 2972: {'lr': 0.0004997146080199284, 'samples': 142656, 'steps': 2971, 'loss/train': 2.935161828994751} +07/25/2024 11:39:17 - INFO - __main__ - Step 2973: {'lr': 0.0004997143566765177, 'samples': 142704, 'steps': 2972, 'loss/train': 2.164407730102539} +07/25/2024 11:39:17 - INFO - __main__ - Step 2974: {'lr': 0.0004997141052225406, 'samples': 142752, 'steps': 2973, 'loss/train': 2.995304584503174} +07/25/2024 11:39:17 - INFO - __main__ - Step 2975: {'lr': 0.000499713853657997, 'samples': 142800, 'steps': 2974, 'loss/train': 3.194249153137207} +07/25/2024 11:39:18 - INFO - __main__ - Step 2976: {'lr': 0.0004997136019828871, 'samples': 142848, 'steps': 2975, 'loss/train': 2.011805295944214} +07/25/2024 11:39:18 - INFO - __main__ - Step 2977: {'lr': 0.000499713350197211, 'samples': 142896, 'steps': 2976, 'loss/train': 2.9314143657684326} +07/25/2024 11:39:18 - INFO - __main__ - Step 2978: {'lr': 0.0004997130983009689, 'samples': 142944, 'steps': 2977, 'loss/train': 2.7680280208587646} +07/25/2024 11:39:19 - INFO - __main__ - Step 2979: {'lr': 0.0004997128462941608, 'samples': 142992, 'steps': 2978, 'loss/train': 2.868222713470459} +07/25/2024 11:39:19 - INFO - __main__ - Step 2980: {'lr': 0.0004997125941767868, 'samples': 143040, 'steps': 2979, 'loss/train': 3.0909459590911865} +07/25/2024 11:39:19 - INFO - __main__ - Step 2981: {'lr': 0.0004997123419488471, 'samples': 143088, 'steps': 2980, 'loss/train': 1.1429388523101807} +07/25/2024 11:39:19 - INFO - __main__ - Step 2982: {'lr': 0.0004997120896103417, 'samples': 143136, 'steps': 2981, 'loss/train': 2.754948377609253} +07/25/2024 11:39:20 - INFO - __main__ - Step 2983: {'lr': 0.000499711837161271, 'samples': 143184, 'steps': 2982, 'loss/train': 2.044830799102783} +07/25/2024 11:39:20 - INFO - __main__ - Step 2984: {'lr': 0.0004997115846016347, 'samples': 143232, 'steps': 2983, 'loss/train': 2.668459415435791} +07/25/2024 11:39:20 - INFO - __main__ - Step 2985: {'lr': 0.0004997113319314331, 'samples': 143280, 'steps': 2984, 'loss/train': 2.851130723953247} +07/25/2024 11:39:21 - INFO - __main__ - Step 2986: {'lr': 0.0004997110791506664, 'samples': 143328, 'steps': 2985, 'loss/train': 2.8014373779296875} +07/25/2024 11:39:21 - INFO - __main__ - Step 2987: {'lr': 0.0004997108262593346, 'samples': 143376, 'steps': 2986, 'loss/train': 3.2672770023345947} +07/25/2024 11:39:21 - INFO - __main__ - Step 2988: {'lr': 0.0004997105732574379, 'samples': 143424, 'steps': 2987, 'loss/train': 2.9637086391448975} +07/25/2024 11:39:21 - INFO - __main__ - Step 2989: {'lr': 0.0004997103201449763, 'samples': 143472, 'steps': 2988, 'loss/train': 2.645474910736084} +07/25/2024 11:39:22 - INFO - __main__ - Step 2990: {'lr': 0.00049971006692195, 'samples': 143520, 'steps': 2989, 'loss/train': 2.939197063446045} +07/25/2024 11:39:22 - INFO - __main__ - Step 2991: {'lr': 0.000499709813588359, 'samples': 143568, 'steps': 2990, 'loss/train': 2.808439016342163} +07/25/2024 11:39:22 - INFO - __main__ - Step 2992: {'lr': 0.0004997095601442036, 'samples': 143616, 'steps': 2991, 'loss/train': 4.0669355392456055} +07/25/2024 11:39:22 - INFO - __main__ - Step 2993: {'lr': 0.0004997093065894837, 'samples': 143664, 'steps': 2992, 'loss/train': 2.59507417678833} +07/25/2024 11:39:23 - INFO - __main__ - Step 2994: {'lr': 0.0004997090529241996, 'samples': 143712, 'steps': 2993, 'loss/train': 3.28790545463562} +07/25/2024 11:39:23 - INFO - __main__ - Step 2995: {'lr': 0.0004997087991483513, 'samples': 143760, 'steps': 2994, 'loss/train': 3.309824228286743} +07/25/2024 11:39:23 - INFO - __main__ - Step 2996: {'lr': 0.000499708545261939, 'samples': 143808, 'steps': 2995, 'loss/train': 2.8973307609558105} +07/25/2024 11:39:24 - INFO - __main__ - Step 2997: {'lr': 0.0004997082912649626, 'samples': 143856, 'steps': 2996, 'loss/train': 3.563589572906494} +07/25/2024 11:39:24 - INFO - __main__ - Step 2998: {'lr': 0.0004997080371574225, 'samples': 143904, 'steps': 2997, 'loss/train': 3.084437847137451} +07/25/2024 11:39:24 - INFO - __main__ - Step 2999: {'lr': 0.0004997077829393186, 'samples': 143952, 'steps': 2998, 'loss/train': 3.0077152252197266} +07/25/2024 11:39:24 - INFO - __main__ - Step 3000: {'lr': 0.0004997075286106511, 'samples': 144000, 'steps': 2999, 'loss/train': 2.653275728225708} +07/25/2024 11:39:25 - INFO - __main__ - Step 3001: {'lr': 0.0004997072741714201, 'samples': 144048, 'steps': 3000, 'loss/train': 2.5631937980651855} +07/25/2024 11:39:25 - INFO - __main__ - Step 3002: {'lr': 0.0004997070196216258, 'samples': 144096, 'steps': 3001, 'loss/train': 2.773939609527588} +07/25/2024 11:39:25 - INFO - __main__ - Step 3003: {'lr': 0.0004997067649612681, 'samples': 144144, 'steps': 3002, 'loss/train': 2.7442421913146973} +07/25/2024 11:39:26 - INFO - __main__ - Step 3004: {'lr': 0.0004997065101903474, 'samples': 144192, 'steps': 3003, 'loss/train': 3.4010400772094727} +07/25/2024 11:39:26 - INFO - __main__ - Step 3005: {'lr': 0.0004997062553088635, 'samples': 144240, 'steps': 3004, 'loss/train': 2.4245095252990723} +07/25/2024 11:39:26 - INFO - __main__ - Step 3006: {'lr': 0.0004997060003168167, 'samples': 144288, 'steps': 3005, 'loss/train': 3.1313750743865967} +07/25/2024 11:39:26 - INFO - __main__ - Step 3007: {'lr': 0.0004997057452142071, 'samples': 144336, 'steps': 3006, 'loss/train': 3.0955910682678223} +07/25/2024 11:39:27 - INFO - __main__ - Step 3008: {'lr': 0.0004997054900010348, 'samples': 144384, 'steps': 3007, 'loss/train': 2.93904709815979} +07/25/2024 11:39:27 - INFO - __main__ - Step 3009: {'lr': 0.0004997052346773, 'samples': 144432, 'steps': 3008, 'loss/train': 2.92439866065979} +07/25/2024 11:39:27 - INFO - __main__ - Step 3010: {'lr': 0.0004997049792430027, 'samples': 144480, 'steps': 3009, 'loss/train': 2.504485607147217} +07/25/2024 11:39:28 - INFO - __main__ - Step 3011: {'lr': 0.0004997047236981429, 'samples': 144528, 'steps': 3010, 'loss/train': 3.0940229892730713} +07/25/2024 11:39:28 - INFO - __main__ - Step 3012: {'lr': 0.0004997044680427208, 'samples': 144576, 'steps': 3011, 'loss/train': 3.0716915130615234} +07/25/2024 11:39:28 - INFO - __main__ - Step 3013: {'lr': 0.0004997042122767367, 'samples': 144624, 'steps': 3012, 'loss/train': 2.3536922931671143} +07/25/2024 11:39:28 - INFO - __main__ - Step 3014: {'lr': 0.0004997039564001905, 'samples': 144672, 'steps': 3013, 'loss/train': 3.8624465465545654} +07/25/2024 11:39:29 - INFO - __main__ - Step 3015: {'lr': 0.0004997037004130825, 'samples': 144720, 'steps': 3014, 'loss/train': 3.4692749977111816} +07/25/2024 11:39:29 - INFO - __main__ - Step 3016: {'lr': 0.0004997034443154126, 'samples': 144768, 'steps': 3015, 'loss/train': 3.181976556777954} +07/25/2024 11:39:29 - INFO - __main__ - Step 3017: {'lr': 0.000499703188107181, 'samples': 144816, 'steps': 3016, 'loss/train': 2.554145336151123} +07/25/2024 11:39:30 - INFO - __main__ - Step 3018: {'lr': 0.0004997029317883879, 'samples': 144864, 'steps': 3017, 'loss/train': 2.9006052017211914} +07/25/2024 11:39:30 - INFO - __main__ - Step 3019: {'lr': 0.0004997026753590332, 'samples': 144912, 'steps': 3018, 'loss/train': 2.8846383094787598} +07/25/2024 11:39:30 - INFO - __main__ - Step 3020: {'lr': 0.0004997024188191173, 'samples': 144960, 'steps': 3019, 'loss/train': 2.8182778358459473} +07/25/2024 11:39:30 - INFO - __main__ - Step 3021: {'lr': 0.00049970216216864, 'samples': 145008, 'steps': 3020, 'loss/train': 2.850939989089966} +07/25/2024 11:39:31 - INFO - __main__ - Step 3022: {'lr': 0.0004997019054076017, 'samples': 145056, 'steps': 3021, 'loss/train': 2.9656169414520264} +07/25/2024 11:39:31 - INFO - __main__ - Step 3023: {'lr': 0.0004997016485360024, 'samples': 145104, 'steps': 3022, 'loss/train': 2.3422999382019043} +07/25/2024 11:39:31 - INFO - __main__ - Step 3024: {'lr': 0.0004997013915538423, 'samples': 145152, 'steps': 3023, 'loss/train': 2.18437123298645} +07/25/2024 11:39:32 - INFO - __main__ - Step 3025: {'lr': 0.0004997011344611212, 'samples': 145200, 'steps': 3024, 'loss/train': 2.807008743286133} +07/25/2024 11:39:32 - INFO - __main__ - Step 3026: {'lr': 0.0004997008772578397, 'samples': 145248, 'steps': 3025, 'loss/train': 2.7839508056640625} +07/25/2024 11:39:32 - INFO - __main__ - Step 3027: {'lr': 0.0004997006199439975, 'samples': 145296, 'steps': 3026, 'loss/train': 2.316406011581421} +07/25/2024 11:39:32 - INFO - __main__ - Step 3028: {'lr': 0.0004997003625195947, 'samples': 145344, 'steps': 3027, 'loss/train': 3.14422869682312} +07/25/2024 11:39:33 - INFO - __main__ - Step 3029: {'lr': 0.0004997001049846317, 'samples': 145392, 'steps': 3028, 'loss/train': 2.834334373474121} +07/25/2024 11:39:33 - INFO - __main__ - Step 3030: {'lr': 0.0004996998473391085, 'samples': 145440, 'steps': 3029, 'loss/train': 3.1704773902893066} +07/25/2024 11:39:33 - INFO - __main__ - Step 3031: {'lr': 0.0004996995895830253, 'samples': 145488, 'steps': 3030, 'loss/train': 2.83282732963562} +07/25/2024 11:39:34 - INFO - __main__ - Step 3032: {'lr': 0.000499699331716382, 'samples': 145536, 'steps': 3031, 'loss/train': 2.6303815841674805} +07/25/2024 11:39:34 - INFO - __main__ - Step 3033: {'lr': 0.0004996990737391789, 'samples': 145584, 'steps': 3032, 'loss/train': 2.445819616317749} +07/25/2024 11:39:34 - INFO - __main__ - Step 3034: {'lr': 0.000499698815651416, 'samples': 145632, 'steps': 3033, 'loss/train': 3.0040829181671143} +07/25/2024 11:39:34 - INFO - __main__ - Step 3035: {'lr': 0.0004996985574530935, 'samples': 145680, 'steps': 3034, 'loss/train': 2.9785687923431396} +07/25/2024 11:39:35 - INFO - __main__ - Step 3036: {'lr': 0.0004996982991442115, 'samples': 145728, 'steps': 3035, 'loss/train': 2.6243252754211426} +07/25/2024 11:39:35 - INFO - __main__ - Step 3037: {'lr': 0.0004996980407247699, 'samples': 145776, 'steps': 3036, 'loss/train': 2.2495718002319336} +07/25/2024 11:39:35 - INFO - __main__ - Step 3038: {'lr': 0.0004996977821947693, 'samples': 145824, 'steps': 3037, 'loss/train': 3.368525505065918} +07/25/2024 11:39:36 - INFO - __main__ - Step 3039: {'lr': 0.0004996975235542093, 'samples': 145872, 'steps': 3038, 'loss/train': 2.2223212718963623} +07/25/2024 11:39:36 - INFO - __main__ - Step 3040: {'lr': 0.0004996972648030903, 'samples': 145920, 'steps': 3039, 'loss/train': 3.4642443656921387} +07/25/2024 11:39:36 - INFO - __main__ - Step 3041: {'lr': 0.0004996970059414124, 'samples': 145968, 'steps': 3040, 'loss/train': 2.561511993408203} +07/25/2024 11:39:36 - INFO - __main__ - Step 3042: {'lr': 0.0004996967469691755, 'samples': 146016, 'steps': 3041, 'loss/train': 2.8830409049987793} +07/25/2024 11:39:37 - INFO - __main__ - Step 3043: {'lr': 0.00049969648788638, 'samples': 146064, 'steps': 3042, 'loss/train': 3.116997241973877} +07/25/2024 11:39:37 - INFO - __main__ - Step 3044: {'lr': 0.000499696228693026, 'samples': 146112, 'steps': 3043, 'loss/train': 3.684656858444214} +07/25/2024 11:39:37 - INFO - __main__ - Step 3045: {'lr': 0.0004996959693891133, 'samples': 146160, 'steps': 3044, 'loss/train': 2.6857011318206787} +07/25/2024 11:39:38 - INFO - __main__ - Step 3046: {'lr': 0.0004996957099746423, 'samples': 146208, 'steps': 3045, 'loss/train': 3.232593059539795} +07/25/2024 11:39:38 - INFO - __main__ - Step 3047: {'lr': 0.000499695450449613, 'samples': 146256, 'steps': 3046, 'loss/train': 2.638869524002075} +07/25/2024 11:39:38 - INFO - __main__ - Step 3048: {'lr': 0.0004996951908140257, 'samples': 146304, 'steps': 3047, 'loss/train': 2.737114191055298} +07/25/2024 11:39:38 - INFO - __main__ - Step 3049: {'lr': 0.0004996949310678802, 'samples': 146352, 'steps': 3048, 'loss/train': 3.4236912727355957} +07/25/2024 11:39:39 - INFO - __main__ - Step 3050: {'lr': 0.0004996946712111769, 'samples': 146400, 'steps': 3049, 'loss/train': 2.4084596633911133} +07/25/2024 11:39:39 - INFO - __main__ - Step 3051: {'lr': 0.0004996944112439157, 'samples': 146448, 'steps': 3050, 'loss/train': 2.571974039077759} +07/25/2024 11:39:39 - INFO - __main__ - Step 3052: {'lr': 0.0004996941511660969, 'samples': 146496, 'steps': 3051, 'loss/train': 2.5151355266571045} +07/25/2024 11:39:40 - INFO - __main__ - Step 3053: {'lr': 0.0004996938909777205, 'samples': 146544, 'steps': 3052, 'loss/train': 2.4491562843322754} +07/25/2024 11:39:40 - INFO - __main__ - Step 3054: {'lr': 0.0004996936306787865, 'samples': 146592, 'steps': 3053, 'loss/train': 2.664522409439087} +07/25/2024 11:39:40 - INFO - __main__ - Step 3055: {'lr': 0.0004996933702692953, 'samples': 146640, 'steps': 3054, 'loss/train': 2.237114667892456} +07/25/2024 11:39:40 - INFO - __main__ - Step 3056: {'lr': 0.0004996931097492469, 'samples': 146688, 'steps': 3055, 'loss/train': 2.6535050868988037} +07/25/2024 11:39:41 - INFO - __main__ - Step 3057: {'lr': 0.0004996928491186412, 'samples': 146736, 'steps': 3056, 'loss/train': 2.740983009338379} +07/25/2024 11:39:41 - INFO - __main__ - Step 3058: {'lr': 0.0004996925883774786, 'samples': 146784, 'steps': 3057, 'loss/train': 2.489147186279297} +07/25/2024 11:39:41 - INFO - __main__ - Step 3059: {'lr': 0.0004996923275257593, 'samples': 146832, 'steps': 3058, 'loss/train': 3.0656321048736572} +07/25/2024 11:39:41 - INFO - __main__ - Step 3060: {'lr': 0.0004996920665634831, 'samples': 146880, 'steps': 3059, 'loss/train': 5.671152114868164} +07/25/2024 11:39:42 - INFO - __main__ - Step 3061: {'lr': 0.0004996918054906502, 'samples': 146928, 'steps': 3060, 'loss/train': 2.3344523906707764} +07/25/2024 11:39:42 - INFO - __main__ - Step 3062: {'lr': 0.0004996915443072607, 'samples': 146976, 'steps': 3061, 'loss/train': 2.5589377880096436} +07/25/2024 11:39:42 - INFO - __main__ - Step 3063: {'lr': 0.000499691283013315, 'samples': 147024, 'steps': 3062, 'loss/train': 2.367223024368286} +07/25/2024 11:39:43 - INFO - __main__ - Step 3064: {'lr': 0.0004996910216088129, 'samples': 147072, 'steps': 3063, 'loss/train': 3.5564780235290527} +07/25/2024 11:39:43 - INFO - __main__ - Step 3065: {'lr': 0.0004996907600937544, 'samples': 147120, 'steps': 3064, 'loss/train': 2.5304832458496094} +07/25/2024 11:39:43 - INFO - __main__ - Step 3066: {'lr': 0.0004996904984681401, 'samples': 147168, 'steps': 3065, 'loss/train': 3.1821930408477783} +07/25/2024 11:39:43 - INFO - __main__ - Step 3067: {'lr': 0.0004996902367319698, 'samples': 147216, 'steps': 3066, 'loss/train': 2.951096534729004} +07/25/2024 11:39:44 - INFO - __main__ - Step 3068: {'lr': 0.0004996899748852436, 'samples': 147264, 'steps': 3067, 'loss/train': 3.1324052810668945} +07/25/2024 11:39:44 - INFO - __main__ - Step 3069: {'lr': 0.0004996897129279618, 'samples': 147312, 'steps': 3068, 'loss/train': 3.6942059993743896} +07/25/2024 11:39:44 - INFO - __main__ - Step 3070: {'lr': 0.0004996894508601242, 'samples': 147360, 'steps': 3069, 'loss/train': 3.0282843112945557} +07/25/2024 11:39:45 - INFO - __main__ - Step 3071: {'lr': 0.0004996891886817311, 'samples': 147408, 'steps': 3070, 'loss/train': 2.4173269271850586} +07/25/2024 11:39:45 - INFO - __main__ - Step 3072: {'lr': 0.0004996889263927828, 'samples': 147456, 'steps': 3071, 'loss/train': 2.8674137592315674} +07/25/2024 11:39:45 - INFO - __main__ - Step 3073: {'lr': 0.0004996886639932792, 'samples': 147504, 'steps': 3072, 'loss/train': 2.8783576488494873} +07/25/2024 11:39:45 - INFO - __main__ - Step 3074: {'lr': 0.0004996884014832203, 'samples': 147552, 'steps': 3073, 'loss/train': 2.718006134033203} +07/25/2024 11:39:46 - INFO - __main__ - Step 3075: {'lr': 0.0004996881388626065, 'samples': 147600, 'steps': 3074, 'loss/train': 3.230494260787964} +07/25/2024 11:39:46 - INFO - __main__ - Step 3076: {'lr': 0.0004996878761314378, 'samples': 147648, 'steps': 3075, 'loss/train': 2.8308115005493164} +07/25/2024 11:39:46 - INFO - __main__ - Step 3077: {'lr': 0.0004996876132897142, 'samples': 147696, 'steps': 3076, 'loss/train': 2.0731406211853027} +07/25/2024 11:39:47 - INFO - __main__ - Step 3078: {'lr': 0.0004996873503374361, 'samples': 147744, 'steps': 3077, 'loss/train': 2.731114149093628} +07/25/2024 11:39:47 - INFO - __main__ - Step 3079: {'lr': 0.0004996870872746033, 'samples': 147792, 'steps': 3078, 'loss/train': 3.039336681365967} +07/25/2024 11:39:47 - INFO - __main__ - Step 3080: {'lr': 0.000499686824101216, 'samples': 147840, 'steps': 3079, 'loss/train': 3.3731658458709717} +07/25/2024 11:39:47 - INFO - __main__ - Step 3081: {'lr': 0.0004996865608172745, 'samples': 147888, 'steps': 3080, 'loss/train': 3.248169422149658} +07/25/2024 11:39:48 - INFO - __main__ - Step 3082: {'lr': 0.0004996862974227788, 'samples': 147936, 'steps': 3081, 'loss/train': 3.36140775680542} +07/25/2024 11:39:48 - INFO - __main__ - Step 3083: {'lr': 0.0004996860339177289, 'samples': 147984, 'steps': 3082, 'loss/train': 2.4896249771118164} +07/25/2024 11:39:48 - INFO - __main__ - Step 3084: {'lr': 0.0004996857703021252, 'samples': 148032, 'steps': 3083, 'loss/train': 9.337068557739258} +07/25/2024 11:39:49 - INFO - __main__ - Step 3085: {'lr': 0.0004996855065759674, 'samples': 148080, 'steps': 3084, 'loss/train': 2.819831371307373} +07/25/2024 11:39:49 - INFO - __main__ - Step 3086: {'lr': 0.0004996852427392559, 'samples': 148128, 'steps': 3085, 'loss/train': 1.761367917060852} +07/25/2024 11:39:49 - INFO - __main__ - Step 3087: {'lr': 0.000499684978791991, 'samples': 148176, 'steps': 3086, 'loss/train': 1.5925633907318115} +07/25/2024 11:39:49 - INFO - __main__ - Step 3088: {'lr': 0.0004996847147341724, 'samples': 148224, 'steps': 3087, 'loss/train': 3.2801685333251953} +07/25/2024 11:39:50 - INFO - __main__ - Step 3089: {'lr': 0.0004996844505658004, 'samples': 148272, 'steps': 3088, 'loss/train': 2.569138765335083} +07/25/2024 11:39:50 - INFO - __main__ - Step 3090: {'lr': 0.000499684186286875, 'samples': 148320, 'steps': 3089, 'loss/train': 2.5539164543151855} +07/25/2024 11:39:50 - INFO - __main__ - Step 3091: {'lr': 0.0004996839218973966, 'samples': 148368, 'steps': 3090, 'loss/train': 3.578310251235962} +07/25/2024 11:39:51 - INFO - __main__ - Step 3092: {'lr': 0.0004996836573973652, 'samples': 148416, 'steps': 3091, 'loss/train': 2.771595001220703} +07/25/2024 11:39:51 - INFO - __main__ - Step 3093: {'lr': 0.0004996833927867808, 'samples': 148464, 'steps': 3092, 'loss/train': 3.8643088340759277} +07/25/2024 11:39:51 - INFO - __main__ - Step 3094: {'lr': 0.0004996831280656436, 'samples': 148512, 'steps': 3093, 'loss/train': 2.957655906677246} +07/25/2024 11:39:51 - INFO - __main__ - Step 3095: {'lr': 0.0004996828632339537, 'samples': 148560, 'steps': 3094, 'loss/train': 2.3186087608337402} +07/25/2024 11:39:52 - INFO - __main__ - Step 3096: {'lr': 0.0004996825982917113, 'samples': 148608, 'steps': 3095, 'loss/train': 2.8151841163635254} +07/25/2024 11:39:52 - INFO - __main__ - Step 3097: {'lr': 0.0004996823332389163, 'samples': 148656, 'steps': 3096, 'loss/train': 2.750192165374756} +07/25/2024 11:39:52 - INFO - __main__ - Step 3098: {'lr': 0.000499682068075569, 'samples': 148704, 'steps': 3097, 'loss/train': 2.4222352504730225} +07/25/2024 11:39:53 - INFO - __main__ - Step 3099: {'lr': 0.0004996818028016695, 'samples': 148752, 'steps': 3098, 'loss/train': 2.6515133380889893} +07/25/2024 11:39:53 - INFO - __main__ - Step 3100: {'lr': 0.0004996815374172179, 'samples': 148800, 'steps': 3099, 'loss/train': 2.4786171913146973} +07/25/2024 11:39:53 - INFO - __main__ - Step 3101: {'lr': 0.0004996812719222143, 'samples': 148848, 'steps': 3100, 'loss/train': 2.815704822540283} +07/25/2024 11:39:53 - INFO - __main__ - Step 3102: {'lr': 0.0004996810063166589, 'samples': 148896, 'steps': 3101, 'loss/train': 2.6741456985473633} +07/25/2024 11:39:54 - INFO - __main__ - Step 3103: {'lr': 0.0004996807406005517, 'samples': 148944, 'steps': 3102, 'loss/train': 2.964888572692871} +07/25/2024 11:39:54 - INFO - __main__ - Step 3104: {'lr': 0.0004996804747738928, 'samples': 148992, 'steps': 3103, 'loss/train': 2.57362699508667} +07/25/2024 11:39:54 - INFO - __main__ - Step 3105: {'lr': 0.0004996802088366825, 'samples': 149040, 'steps': 3104, 'loss/train': 3.325209379196167} +07/25/2024 11:39:55 - INFO - __main__ - Step 3106: {'lr': 0.0004996799427889207, 'samples': 149088, 'steps': 3105, 'loss/train': 3.248798131942749} +07/25/2024 11:39:55 - INFO - __main__ - Step 3107: {'lr': 0.0004996796766306076, 'samples': 149136, 'steps': 3106, 'loss/train': 3.257375717163086} +07/25/2024 11:39:55 - INFO - __main__ - Step 3108: {'lr': 0.0004996794103617434, 'samples': 149184, 'steps': 3107, 'loss/train': 6.836536407470703} +07/25/2024 11:39:55 - INFO - __main__ - Step 3109: {'lr': 0.0004996791439823282, 'samples': 149232, 'steps': 3108, 'loss/train': 2.642457962036133} +07/25/2024 11:39:56 - INFO - __main__ - Step 3110: {'lr': 0.000499678877492362, 'samples': 149280, 'steps': 3109, 'loss/train': 2.746939182281494} +07/25/2024 11:39:56 - INFO - __main__ - Step 3111: {'lr': 0.000499678610891845, 'samples': 149328, 'steps': 3110, 'loss/train': 2.14775013923645} +07/25/2024 11:39:56 - INFO - __main__ - Step 3112: {'lr': 0.0004996783441807773, 'samples': 149376, 'steps': 3111, 'loss/train': 3.1578445434570312} +07/25/2024 11:39:57 - INFO - __main__ - Step 3113: {'lr': 0.000499678077359159, 'samples': 149424, 'steps': 3112, 'loss/train': 2.3715310096740723} +07/25/2024 11:39:57 - INFO - __main__ - Step 3114: {'lr': 0.0004996778104269903, 'samples': 149472, 'steps': 3113, 'loss/train': 3.225719928741455} +07/25/2024 11:39:57 - INFO - __main__ - Step 3115: {'lr': 0.0004996775433842712, 'samples': 149520, 'steps': 3114, 'loss/train': 2.6527156829833984} +07/25/2024 11:39:57 - INFO - __main__ - Step 3116: {'lr': 0.0004996772762310019, 'samples': 149568, 'steps': 3115, 'loss/train': 3.04748797416687} +07/25/2024 11:39:58 - INFO - __main__ - Step 3117: {'lr': 0.0004996770089671826, 'samples': 149616, 'steps': 3116, 'loss/train': 3.2801384925842285} +07/25/2024 11:39:58 - INFO - __main__ - Step 3118: {'lr': 0.0004996767415928132, 'samples': 149664, 'steps': 3117, 'loss/train': 2.6428654193878174} +07/25/2024 11:39:58 - INFO - __main__ - Step 3119: {'lr': 0.000499676474107894, 'samples': 149712, 'steps': 3118, 'loss/train': 2.0540924072265625} +07/25/2024 11:39:59 - INFO - __main__ - Step 3120: {'lr': 0.000499676206512425, 'samples': 149760, 'steps': 3119, 'loss/train': 2.773655891418457} +07/25/2024 11:39:59 - INFO - __main__ - Step 3121: {'lr': 0.0004996759388064063, 'samples': 149808, 'steps': 3120, 'loss/train': 2.7827048301696777} +07/25/2024 11:39:59 - INFO - __main__ - Step 3122: {'lr': 0.0004996756709898382, 'samples': 149856, 'steps': 3121, 'loss/train': 2.298048496246338} +07/25/2024 11:39:59 - INFO - __main__ - Step 3123: {'lr': 0.0004996754030627207, 'samples': 149904, 'steps': 3122, 'loss/train': 1.9433379173278809} +07/25/2024 11:40:00 - INFO - __main__ - Step 3124: {'lr': 0.000499675135025054, 'samples': 149952, 'steps': 3123, 'loss/train': 2.9699816703796387} +07/25/2024 11:40:00 - INFO - __main__ - Step 3125: {'lr': 0.000499674866876838, 'samples': 150000, 'steps': 3124, 'loss/train': 1.5000503063201904} +07/25/2024 11:40:00 - INFO - __main__ - Step 3126: {'lr': 0.0004996745986180729, 'samples': 150048, 'steps': 3125, 'loss/train': 2.6355807781219482} +07/25/2024 11:40:01 - INFO - __main__ - Step 3127: {'lr': 0.0004996743302487591, 'samples': 150096, 'steps': 3126, 'loss/train': 2.685044765472412} +07/25/2024 11:40:01 - INFO - __main__ - Step 3128: {'lr': 0.0004996740617688964, 'samples': 150144, 'steps': 3127, 'loss/train': 1.7368754148483276} +07/25/2024 11:40:01 - INFO - __main__ - Step 3129: {'lr': 0.0004996737931784851, 'samples': 150192, 'steps': 3128, 'loss/train': 1.3929861783981323} +07/25/2024 11:40:01 - INFO - __main__ - Step 3130: {'lr': 0.0004996735244775252, 'samples': 150240, 'steps': 3129, 'loss/train': 2.5290772914886475} +07/25/2024 11:40:02 - INFO - __main__ - Step 3131: {'lr': 0.0004996732556660167, 'samples': 150288, 'steps': 3130, 'loss/train': 2.831843852996826} +07/25/2024 11:40:02 - INFO - __main__ - Step 3132: {'lr': 0.00049967298674396, 'samples': 150336, 'steps': 3131, 'loss/train': 6.745789051055908} +07/25/2024 11:40:02 - INFO - __main__ - Step 3133: {'lr': 0.0004996727177113551, 'samples': 150384, 'steps': 3132, 'loss/train': 1.9293659925460815} +07/25/2024 11:40:02 - INFO - __main__ - Step 3134: {'lr': 0.0004996724485682022, 'samples': 150432, 'steps': 3133, 'loss/train': 2.7628729343414307} +07/25/2024 11:40:03 - INFO - __main__ - Step 3135: {'lr': 0.0004996721793145012, 'samples': 150480, 'steps': 3134, 'loss/train': 2.6574931144714355} +07/25/2024 11:40:03 - INFO - __main__ - Step 3136: {'lr': 0.0004996719099502524, 'samples': 150528, 'steps': 3135, 'loss/train': 3.048948049545288} +07/25/2024 11:40:03 - INFO - __main__ - Step 3137: {'lr': 0.0004996716404754559, 'samples': 150576, 'steps': 3136, 'loss/train': 2.4062340259552} +07/25/2024 11:40:04 - INFO - __main__ - Step 3138: {'lr': 0.0004996713708901118, 'samples': 150624, 'steps': 3137, 'loss/train': 2.9466514587402344} +07/25/2024 11:40:04 - INFO - __main__ - Step 3139: {'lr': 0.0004996711011942202, 'samples': 150672, 'steps': 3138, 'loss/train': 3.4063355922698975} +07/25/2024 11:40:04 - INFO - __main__ - Step 3140: {'lr': 0.0004996708313877812, 'samples': 150720, 'steps': 3139, 'loss/train': 2.727051258087158} +07/25/2024 11:40:04 - INFO - __main__ - Step 3141: {'lr': 0.000499670561470795, 'samples': 150768, 'steps': 3140, 'loss/train': 4.050637722015381} +07/25/2024 11:40:05 - INFO - __main__ - Step 3142: {'lr': 0.0004996702914432617, 'samples': 150816, 'steps': 3141, 'loss/train': 1.1681638956069946} +07/25/2024 11:40:05 - INFO - __main__ - Step 3143: {'lr': 0.0004996700213051813, 'samples': 150864, 'steps': 3142, 'loss/train': 2.381251811981201} +07/25/2024 11:40:05 - INFO - __main__ - Step 3144: {'lr': 0.0004996697510565541, 'samples': 150912, 'steps': 3143, 'loss/train': 2.8101797103881836} +07/25/2024 11:40:06 - INFO - __main__ - Step 3145: {'lr': 0.00049966948069738, 'samples': 150960, 'steps': 3144, 'loss/train': 3.1219418048858643} +07/25/2024 11:40:06 - INFO - __main__ - Step 3146: {'lr': 0.0004996692102276593, 'samples': 151008, 'steps': 3145, 'loss/train': 2.468967914581299} +07/25/2024 11:40:06 - INFO - __main__ - Step 3147: {'lr': 0.0004996689396473921, 'samples': 151056, 'steps': 3146, 'loss/train': 0.6362808346748352} +07/25/2024 11:40:06 - INFO - __main__ - Step 3148: {'lr': 0.0004996686689565785, 'samples': 151104, 'steps': 3147, 'loss/train': 3.1060218811035156} +07/25/2024 11:40:07 - INFO - __main__ - Step 3149: {'lr': 0.0004996683981552186, 'samples': 151152, 'steps': 3148, 'loss/train': 2.51173996925354} +07/25/2024 11:40:07 - INFO - __main__ - Step 3150: {'lr': 0.0004996681272433126, 'samples': 151200, 'steps': 3149, 'loss/train': 2.9654476642608643} +07/25/2024 11:40:07 - INFO - __main__ - Step 3151: {'lr': 0.0004996678562208605, 'samples': 151248, 'steps': 3150, 'loss/train': 2.877579927444458} +07/25/2024 11:40:08 - INFO - __main__ - Step 3152: {'lr': 0.0004996675850878624, 'samples': 151296, 'steps': 3151, 'loss/train': 1.6030387878417969} +07/25/2024 11:40:08 - INFO - __main__ - Step 3153: {'lr': 0.0004996673138443186, 'samples': 151344, 'steps': 3152, 'loss/train': 0.5915510058403015} +07/25/2024 11:40:08 - INFO - __main__ - Step 3154: {'lr': 0.000499667042490229, 'samples': 151392, 'steps': 3153, 'loss/train': 2.71513295173645} +07/25/2024 11:40:08 - INFO - __main__ - Step 3155: {'lr': 0.0004996667710255938, 'samples': 151440, 'steps': 3154, 'loss/train': 2.9905872344970703} +07/25/2024 11:40:09 - INFO - __main__ - Step 3156: {'lr': 0.0004996664994504132, 'samples': 151488, 'steps': 3155, 'loss/train': 6.801745414733887} +07/25/2024 11:40:09 - INFO - __main__ - Step 3157: {'lr': 0.0004996662277646873, 'samples': 151536, 'steps': 3156, 'loss/train': 2.686572790145874} +07/25/2024 11:40:09 - INFO - __main__ - Step 3158: {'lr': 0.0004996659559684162, 'samples': 151584, 'steps': 3157, 'loss/train': 2.71642804145813} +07/25/2024 11:40:10 - INFO - __main__ - Step 3159: {'lr': 0.0004996656840616, 'samples': 151632, 'steps': 3158, 'loss/train': 3.213873863220215} +07/25/2024 11:40:10 - INFO - __main__ - Step 3160: {'lr': 0.0004996654120442387, 'samples': 151680, 'steps': 3159, 'loss/train': 2.748352289199829} +07/25/2024 11:40:10 - INFO - __main__ - Step 3161: {'lr': 0.0004996651399163328, 'samples': 151728, 'steps': 3160, 'loss/train': 2.364757776260376} +07/25/2024 11:40:10 - INFO - __main__ - Step 3162: {'lr': 0.0004996648676778819, 'samples': 151776, 'steps': 3161, 'loss/train': 3.3050193786621094} +07/25/2024 11:40:11 - INFO - __main__ - Step 3163: {'lr': 0.0004996645953288866, 'samples': 151824, 'steps': 3162, 'loss/train': 3.0901529788970947} +07/25/2024 11:40:11 - INFO - __main__ - Step 3164: {'lr': 0.0004996643228693469, 'samples': 151872, 'steps': 3163, 'loss/train': 3.553701400756836} +07/25/2024 11:40:11 - INFO - __main__ - Step 3165: {'lr': 0.0004996640502992625, 'samples': 151920, 'steps': 3164, 'loss/train': 3.2493155002593994} +07/25/2024 11:40:12 - INFO - __main__ - Step 3166: {'lr': 0.0004996637776186341, 'samples': 151968, 'steps': 3165, 'loss/train': 2.916222095489502} +07/25/2024 11:40:12 - INFO - __main__ - Step 3167: {'lr': 0.0004996635048274614, 'samples': 152016, 'steps': 3166, 'loss/train': 3.0736281871795654} +07/25/2024 11:40:12 - INFO - __main__ - Step 3168: {'lr': 0.0004996632319257449, 'samples': 152064, 'steps': 3167, 'loss/train': 2.927002191543579} +07/25/2024 11:40:12 - INFO - __main__ - Step 3169: {'lr': 0.0004996629589134844, 'samples': 152112, 'steps': 3168, 'loss/train': 2.339751720428467} +07/25/2024 11:40:13 - INFO - __main__ - Step 3170: {'lr': 0.0004996626857906802, 'samples': 152160, 'steps': 3169, 'loss/train': 2.417407989501953} +07/25/2024 11:40:13 - INFO - __main__ - Step 3171: {'lr': 0.0004996624125573323, 'samples': 152208, 'steps': 3170, 'loss/train': 0.5803000330924988} +07/25/2024 11:40:13 - INFO - __main__ - Step 3172: {'lr': 0.0004996621392134409, 'samples': 152256, 'steps': 3171, 'loss/train': 2.7740886211395264} +07/25/2024 11:40:14 - INFO - __main__ - Step 3173: {'lr': 0.000499661865759006, 'samples': 152304, 'steps': 3172, 'loss/train': 2.470471143722534} +07/25/2024 11:40:14 - INFO - __main__ - Step 3174: {'lr': 0.0004996615921940281, 'samples': 152352, 'steps': 3173, 'loss/train': 2.7057807445526123} +07/25/2024 11:40:14 - INFO - __main__ - Step 3175: {'lr': 0.0004996613185185068, 'samples': 152400, 'steps': 3174, 'loss/train': 2.911698818206787} +07/25/2024 11:40:14 - INFO - __main__ - Step 3176: {'lr': 0.0004996610447324425, 'samples': 152448, 'steps': 3175, 'loss/train': 1.715712308883667} +07/25/2024 11:40:15 - INFO - __main__ - Step 3177: {'lr': 0.0004996607708358352, 'samples': 152496, 'steps': 3176, 'loss/train': 0.481123685836792} +07/25/2024 11:40:15 - INFO - __main__ - Step 3178: {'lr': 0.0004996604968286853, 'samples': 152544, 'steps': 3177, 'loss/train': 2.4048538208007812} +07/25/2024 11:40:15 - INFO - __main__ - Step 3179: {'lr': 0.0004996602227109926, 'samples': 152592, 'steps': 3178, 'loss/train': 2.7755372524261475} +07/25/2024 11:40:16 - INFO - __main__ - Step 3180: {'lr': 0.0004996599484827575, 'samples': 152640, 'steps': 3179, 'loss/train': 6.790804862976074} +07/25/2024 11:40:16 - INFO - __main__ - Step 3181: {'lr': 0.0004996596741439799, 'samples': 152688, 'steps': 3180, 'loss/train': 2.7877817153930664} +07/25/2024 11:40:16 - INFO - __main__ - Step 3182: {'lr': 0.0004996593996946598, 'samples': 152736, 'steps': 3181, 'loss/train': 2.942878484725952} +07/25/2024 11:40:16 - INFO - __main__ - Step 3183: {'lr': 0.0004996591251347978, 'samples': 152784, 'steps': 3182, 'loss/train': 3.046743869781494} +07/25/2024 11:40:17 - INFO - __main__ - Step 3184: {'lr': 0.0004996588504643935, 'samples': 152832, 'steps': 3183, 'loss/train': 2.7470202445983887} +07/25/2024 11:40:17 - INFO - __main__ - Step 3185: {'lr': 0.0004996585756834475, 'samples': 152880, 'steps': 3184, 'loss/train': 2.502253770828247} +07/25/2024 11:40:17 - INFO - __main__ - Step 3186: {'lr': 0.0004996583007919594, 'samples': 152928, 'steps': 3185, 'loss/train': 3.416080951690674} +07/25/2024 11:40:18 - INFO - __main__ - Step 3187: {'lr': 0.0004996580257899297, 'samples': 152976, 'steps': 3186, 'loss/train': 2.634843111038208} +07/25/2024 11:40:18 - INFO - __main__ - Step 3188: {'lr': 0.0004996577506773585, 'samples': 153024, 'steps': 3187, 'loss/train': 3.8578124046325684} +07/25/2024 11:40:18 - INFO - __main__ - Step 3189: {'lr': 0.0004996574754542458, 'samples': 153072, 'steps': 3188, 'loss/train': 2.865341901779175} +07/25/2024 11:40:18 - INFO - __main__ - Step 3190: {'lr': 0.0004996572001205918, 'samples': 153120, 'steps': 3189, 'loss/train': 1.7939039468765259} +07/25/2024 11:40:19 - INFO - __main__ - Step 3191: {'lr': 0.0004996569246763966, 'samples': 153168, 'steps': 3190, 'loss/train': 2.8862502574920654} +07/25/2024 11:40:19 - INFO - __main__ - Step 3192: {'lr': 0.0004996566491216602, 'samples': 153216, 'steps': 3191, 'loss/train': 3.719895839691162} +07/25/2024 11:40:19 - INFO - __main__ - Step 3193: {'lr': 0.0004996563734563828, 'samples': 153264, 'steps': 3192, 'loss/train': 2.516961097717285} +07/25/2024 11:40:20 - INFO - __main__ - Step 3194: {'lr': 0.0004996560976805647, 'samples': 153312, 'steps': 3193, 'loss/train': 1.990061640739441} +07/25/2024 11:40:20 - INFO - __main__ - Step 3195: {'lr': 0.0004996558217942058, 'samples': 153360, 'steps': 3194, 'loss/train': 0.5279544591903687} +07/25/2024 11:40:20 - INFO - __main__ - Step 3196: {'lr': 0.0004996555457973063, 'samples': 153408, 'steps': 3195, 'loss/train': 3.3243632316589355} +07/25/2024 11:40:20 - INFO - __main__ - Step 3197: {'lr': 0.0004996552696898664, 'samples': 153456, 'steps': 3196, 'loss/train': 2.4923369884490967} +07/25/2024 11:40:21 - INFO - __main__ - Step 3198: {'lr': 0.000499654993471886, 'samples': 153504, 'steps': 3197, 'loss/train': 2.8266427516937256} +07/25/2024 11:40:21 - INFO - __main__ - Step 3199: {'lr': 0.0004996547171433655, 'samples': 153552, 'steps': 3198, 'loss/train': 2.785576343536377} +07/25/2024 11:40:21 - INFO - __main__ - Step 3200: {'lr': 0.0004996544407043048, 'samples': 153600, 'steps': 3199, 'loss/train': 1.5892800092697144} +07/25/2024 11:40:22 - INFO - __main__ - Step 3201: {'lr': 0.0004996541641547041, 'samples': 153648, 'steps': 3200, 'loss/train': 0.5164067149162292} +07/25/2024 11:40:22 - INFO - __main__ - Step 3202: {'lr': 0.0004996538874945636, 'samples': 153696, 'steps': 3201, 'loss/train': 1.9521632194519043} +07/25/2024 11:40:22 - INFO - __main__ - Step 3203: {'lr': 0.0004996536107238833, 'samples': 153744, 'steps': 3202, 'loss/train': 2.6806700229644775} +07/25/2024 11:40:22 - INFO - __main__ - Step 3204: {'lr': 0.0004996533338426634, 'samples': 153792, 'steps': 3203, 'loss/train': 6.165155410766602} +07/25/2024 11:40:23 - INFO - __main__ - Step 3205: {'lr': 0.000499653056850904, 'samples': 153840, 'steps': 3204, 'loss/train': 2.9259769916534424} +07/25/2024 11:40:23 - INFO - __main__ - Step 3206: {'lr': 0.0004996527797486053, 'samples': 153888, 'steps': 3205, 'loss/train': 2.7734453678131104} +07/25/2024 11:40:23 - INFO - __main__ - Step 3207: {'lr': 0.0004996525025357672, 'samples': 153936, 'steps': 3206, 'loss/train': 3.0425992012023926} +07/25/2024 11:40:24 - INFO - __main__ - Step 3208: {'lr': 0.00049965222521239, 'samples': 153984, 'steps': 3207, 'loss/train': 3.080709457397461} +07/25/2024 11:40:24 - INFO - __main__ - Step 3209: {'lr': 0.0004996519477784739, 'samples': 154032, 'steps': 3208, 'loss/train': 2.6679627895355225} +07/25/2024 11:40:24 - INFO - __main__ - Step 3210: {'lr': 0.0004996516702340188, 'samples': 154080, 'steps': 3209, 'loss/train': 3.125135660171509} +07/25/2024 11:40:24 - INFO - __main__ - Step 3211: {'lr': 0.000499651392579025, 'samples': 154128, 'steps': 3210, 'loss/train': 3.2758843898773193} +07/25/2024 11:40:25 - INFO - __main__ - Step 3212: {'lr': 0.0004996511148134925, 'samples': 154176, 'steps': 3211, 'loss/train': 2.440352439880371} +07/25/2024 11:40:25 - INFO - __main__ - Step 3213: {'lr': 0.0004996508369374215, 'samples': 154224, 'steps': 3212, 'loss/train': 2.279177665710449} +07/25/2024 11:40:25 - INFO - __main__ - Step 3214: {'lr': 0.0004996505589508121, 'samples': 154272, 'steps': 3213, 'loss/train': 2.3957393169403076} +07/25/2024 11:40:25 - INFO - __main__ - Step 3215: {'lr': 0.0004996502808536645, 'samples': 154320, 'steps': 3214, 'loss/train': 2.7117629051208496} +07/25/2024 11:40:26 - INFO - __main__ - Step 3216: {'lr': 0.0004996500026459786, 'samples': 154368, 'steps': 3215, 'loss/train': 3.153938055038452} +07/25/2024 11:40:26 - INFO - __main__ - Step 3217: {'lr': 0.0004996497243277548, 'samples': 154416, 'steps': 3216, 'loss/train': 3.024894952774048} +07/25/2024 11:40:26 - INFO - __main__ - Step 3218: {'lr': 0.0004996494458989931, 'samples': 154464, 'steps': 3217, 'loss/train': 2.63775634765625} +07/25/2024 11:40:27 - INFO - __main__ - Step 3219: {'lr': 0.0004996491673596936, 'samples': 154512, 'steps': 3218, 'loss/train': 0.584536075592041} +07/25/2024 11:40:27 - INFO - __main__ - Step 3220: {'lr': 0.0004996488887098564, 'samples': 154560, 'steps': 3219, 'loss/train': 2.5353500843048096} +07/25/2024 11:40:27 - INFO - __main__ - Step 3221: {'lr': 0.0004996486099494817, 'samples': 154608, 'steps': 3220, 'loss/train': 2.6715078353881836} +07/25/2024 11:40:27 - INFO - __main__ - Step 3222: {'lr': 0.0004996483310785695, 'samples': 154656, 'steps': 3221, 'loss/train': 3.065610647201538} +07/25/2024 11:40:28 - INFO - __main__ - Step 3223: {'lr': 0.0004996480520971201, 'samples': 154704, 'steps': 3222, 'loss/train': 2.8633735179901123} +07/25/2024 11:40:28 - INFO - __main__ - Step 3224: {'lr': 0.0004996477730051336, 'samples': 154752, 'steps': 3223, 'loss/train': 1.450046181678772} +07/25/2024 11:40:28 - INFO - __main__ - Step 3225: {'lr': 0.00049964749380261, 'samples': 154800, 'steps': 3224, 'loss/train': 0.4642537534236908} +07/25/2024 11:40:29 - INFO - __main__ - Step 3226: {'lr': 0.0004996472144895495, 'samples': 154848, 'steps': 3225, 'loss/train': 2.565671920776367} +07/25/2024 11:40:29 - INFO - __main__ - Step 3227: {'lr': 0.0004996469350659522, 'samples': 154896, 'steps': 3226, 'loss/train': 2.562000274658203} +07/25/2024 11:40:29 - INFO - __main__ - Step 3228: {'lr': 0.0004996466555318182, 'samples': 154944, 'steps': 3227, 'loss/train': 6.097886085510254} +07/25/2024 11:40:29 - INFO - __main__ - Step 3229: {'lr': 0.0004996463758871477, 'samples': 154992, 'steps': 3228, 'loss/train': 3.2313499450683594} +07/25/2024 11:40:30 - INFO - __main__ - Step 3230: {'lr': 0.0004996460961319407, 'samples': 155040, 'steps': 3229, 'loss/train': 2.673428773880005} +07/25/2024 11:40:30 - INFO - __main__ - Step 3231: {'lr': 0.0004996458162661975, 'samples': 155088, 'steps': 3230, 'loss/train': 3.719724416732788} +07/25/2024 11:40:30 - INFO - __main__ - Step 3232: {'lr': 0.000499645536289918, 'samples': 155136, 'steps': 3231, 'loss/train': 2.3775887489318848} +07/25/2024 11:40:31 - INFO - __main__ - Step 3233: {'lr': 0.0004996452562031026, 'samples': 155184, 'steps': 3232, 'loss/train': 2.613370418548584} +07/25/2024 11:40:31 - INFO - __main__ - Step 3234: {'lr': 0.0004996449760057512, 'samples': 155232, 'steps': 3233, 'loss/train': 2.915695905685425} +07/25/2024 11:40:31 - INFO - __main__ - Step 3235: {'lr': 0.000499644695697864, 'samples': 155280, 'steps': 3234, 'loss/train': 3.233177900314331} +07/25/2024 11:40:31 - INFO - __main__ - Step 3236: {'lr': 0.0004996444152794412, 'samples': 155328, 'steps': 3235, 'loss/train': 2.9219794273376465} +07/25/2024 11:40:32 - INFO - __main__ - Step 3237: {'lr': 0.0004996441347504827, 'samples': 155376, 'steps': 3236, 'loss/train': 2.890073299407959} +07/25/2024 11:40:32 - INFO - __main__ - Step 3238: {'lr': 0.000499643854110989, 'samples': 155424, 'steps': 3237, 'loss/train': 2.8040692806243896} +07/25/2024 11:40:32 - INFO - __main__ - Step 3239: {'lr': 0.0004996435733609598, 'samples': 155472, 'steps': 3238, 'loss/train': 3.0921409130096436} +07/25/2024 11:40:33 - INFO - __main__ - Step 3240: {'lr': 0.0004996432925003955, 'samples': 155520, 'steps': 3239, 'loss/train': 2.7469189167022705} +07/25/2024 11:40:33 - INFO - __main__ - Step 3241: {'lr': 0.0004996430115292961, 'samples': 155568, 'steps': 3240, 'loss/train': 2.6420738697052} +07/25/2024 11:40:33 - INFO - __main__ - Step 3242: {'lr': 0.0004996427304476618, 'samples': 155616, 'steps': 3241, 'loss/train': 2.7029967308044434} +07/25/2024 11:40:33 - INFO - __main__ - Step 3243: {'lr': 0.0004996424492554926, 'samples': 155664, 'steps': 3242, 'loss/train': 0.48742732405662537} +07/25/2024 11:40:34 - INFO - __main__ - Step 3244: {'lr': 0.0004996421679527888, 'samples': 155712, 'steps': 3243, 'loss/train': 3.06724214553833} +07/25/2024 11:40:34 - INFO - __main__ - Step 3245: {'lr': 0.0004996418865395505, 'samples': 155760, 'steps': 3244, 'loss/train': 2.927217721939087} +07/25/2024 11:40:34 - INFO - __main__ - Step 3246: {'lr': 0.0004996416050157777, 'samples': 155808, 'steps': 3245, 'loss/train': 3.272406578063965} +07/25/2024 11:40:35 - INFO - __main__ - Step 3247: {'lr': 0.0004996413233814706, 'samples': 155856, 'steps': 3246, 'loss/train': 2.4627394676208496} +07/25/2024 11:40:35 - INFO - __main__ - Step 3248: {'lr': 0.0004996410416366294, 'samples': 155904, 'steps': 3247, 'loss/train': 1.304236650466919} +07/25/2024 11:40:35 - INFO - __main__ - Step 3249: {'lr': 0.0004996407597812541, 'samples': 155952, 'steps': 3248, 'loss/train': 0.4590834975242615} +07/25/2024 11:40:35 - INFO - __main__ - Step 3250: {'lr': 0.0004996404778153447, 'samples': 156000, 'steps': 3249, 'loss/train': 2.363123655319214} +07/25/2024 11:40:36 - INFO - __main__ - Step 3251: {'lr': 0.0004996401957389016, 'samples': 156048, 'steps': 3250, 'loss/train': 3.2086212635040283} +07/25/2024 11:40:36 - INFO - __main__ - Step 3252: {'lr': 0.0004996399135519249, 'samples': 156096, 'steps': 3251, 'loss/train': 6.065637588500977} +07/25/2024 11:40:36 - INFO - __main__ - Step 3253: {'lr': 0.0004996396312544146, 'samples': 156144, 'steps': 3252, 'loss/train': 3.5116400718688965} +07/25/2024 11:40:37 - INFO - __main__ - Step 3254: {'lr': 0.0004996393488463708, 'samples': 156192, 'steps': 3253, 'loss/train': 2.2119903564453125} +07/25/2024 11:40:37 - INFO - __main__ - Step 3255: {'lr': 0.0004996390663277938, 'samples': 156240, 'steps': 3254, 'loss/train': 2.43831467628479} +07/25/2024 11:40:37 - INFO - __main__ - Step 3256: {'lr': 0.0004996387836986836, 'samples': 156288, 'steps': 3255, 'loss/train': 2.6487462520599365} +07/25/2024 11:40:37 - INFO - __main__ - Step 3257: {'lr': 0.0004996385009590403, 'samples': 156336, 'steps': 3256, 'loss/train': 2.7925913333892822} +07/25/2024 11:40:38 - INFO - __main__ - Step 3258: {'lr': 0.000499638218108864, 'samples': 156384, 'steps': 3257, 'loss/train': 3.0324766635894775} +07/25/2024 11:40:38 - INFO - __main__ - Step 3259: {'lr': 0.0004996379351481549, 'samples': 156432, 'steps': 3258, 'loss/train': 2.8525125980377197} +07/25/2024 11:40:38 - INFO - __main__ - Step 3260: {'lr': 0.0004996376520769132, 'samples': 156480, 'steps': 3259, 'loss/train': 3.036897659301758} +07/25/2024 11:40:39 - INFO - __main__ - Step 3261: {'lr': 0.0004996373688951389, 'samples': 156528, 'steps': 3260, 'loss/train': 2.7119626998901367} +07/25/2024 11:40:39 - INFO - __main__ - Step 3262: {'lr': 0.0004996370856028322, 'samples': 156576, 'steps': 3261, 'loss/train': 2.165654420852661} +07/25/2024 11:40:39 - INFO - __main__ - Step 3263: {'lr': 0.0004996368021999932, 'samples': 156624, 'steps': 3262, 'loss/train': 1.8469018936157227} +07/25/2024 11:40:39 - INFO - __main__ - Step 3264: {'lr': 0.000499636518686622, 'samples': 156672, 'steps': 3263, 'loss/train': 2.713407278060913} +07/25/2024 11:40:40 - INFO - __main__ - Step 3265: {'lr': 0.0004996362350627187, 'samples': 156720, 'steps': 3264, 'loss/train': 3.018503427505493} +07/25/2024 11:40:40 - INFO - __main__ - Step 3266: {'lr': 0.0004996359513282835, 'samples': 156768, 'steps': 3265, 'loss/train': 2.645033121109009} +07/25/2024 11:40:40 - INFO - __main__ - Step 3267: {'lr': 0.0004996356674833166, 'samples': 156816, 'steps': 3266, 'loss/train': 0.44328778982162476} +07/25/2024 11:40:41 - INFO - __main__ - Step 3268: {'lr': 0.0004996353835278178, 'samples': 156864, 'steps': 3267, 'loss/train': 2.966421127319336} +07/25/2024 11:40:41 - INFO - __main__ - Step 3269: {'lr': 0.0004996350994617876, 'samples': 156912, 'steps': 3268, 'loss/train': 2.7623536586761475} +07/25/2024 11:40:41 - INFO - __main__ - Step 3270: {'lr': 0.0004996348152852259, 'samples': 156960, 'steps': 3269, 'loss/train': 3.069911479949951} +07/25/2024 11:40:41 - INFO - __main__ - Step 3271: {'lr': 0.0004996345309981329, 'samples': 157008, 'steps': 3270, 'loss/train': 1.4926667213439941} +07/25/2024 11:40:42 - INFO - __main__ - Step 3272: {'lr': 0.0004996342466005088, 'samples': 157056, 'steps': 3271, 'loss/train': 1.284309983253479} +07/25/2024 11:40:42 - INFO - __main__ - Step 3273: {'lr': 0.0004996339620923537, 'samples': 157104, 'steps': 3272, 'loss/train': 0.5475984811782837} +07/25/2024 11:40:42 - INFO - __main__ - Step 3274: {'lr': 0.0004996336774736675, 'samples': 157152, 'steps': 3273, 'loss/train': 2.209216594696045} +07/25/2024 11:40:43 - INFO - __main__ - Step 3275: {'lr': 0.0004996333927444507, 'samples': 157200, 'steps': 3274, 'loss/train': 3.2878551483154297} +07/25/2024 11:40:43 - INFO - __main__ - Step 3276: {'lr': 0.0004996331079047031, 'samples': 157248, 'steps': 3275, 'loss/train': 6.048747539520264} +07/25/2024 11:40:43 - INFO - __main__ - Step 3277: {'lr': 0.000499632822954425, 'samples': 157296, 'steps': 3276, 'loss/train': 2.5000600814819336} +07/25/2024 11:40:43 - INFO - __main__ - Step 3278: {'lr': 0.0004996325378936164, 'samples': 157344, 'steps': 3277, 'loss/train': 2.6272685527801514} +07/25/2024 11:40:44 - INFO - __main__ - Step 3279: {'lr': 0.0004996322527222775, 'samples': 157392, 'steps': 3278, 'loss/train': 2.3961217403411865} +07/25/2024 11:40:44 - INFO - __main__ - Step 3280: {'lr': 0.0004996319674404085, 'samples': 157440, 'steps': 3279, 'loss/train': 2.588078498840332} +07/25/2024 11:40:44 - INFO - __main__ - Step 3281: {'lr': 0.0004996316820480095, 'samples': 157488, 'steps': 3280, 'loss/train': 2.4453067779541016} +07/25/2024 11:40:44 - INFO - __main__ - Step 3282: {'lr': 0.0004996313965450806, 'samples': 157536, 'steps': 3281, 'loss/train': 2.8710110187530518} +07/25/2024 11:40:45 - INFO - __main__ - Step 3283: {'lr': 0.0004996311109316218, 'samples': 157584, 'steps': 3282, 'loss/train': 2.451164484024048} +07/25/2024 11:40:45 - INFO - __main__ - Step 3284: {'lr': 0.0004996308252076334, 'samples': 157632, 'steps': 3283, 'loss/train': 2.5265114307403564} +07/25/2024 11:40:45 - INFO - __main__ - Step 3285: {'lr': 0.0004996305393731154, 'samples': 157680, 'steps': 3284, 'loss/train': 2.5712544918060303} +07/25/2024 11:40:46 - INFO - __main__ - Step 3286: {'lr': 0.0004996302534280681, 'samples': 157728, 'steps': 3285, 'loss/train': 2.519580602645874} +07/25/2024 11:40:46 - INFO - __main__ - Step 3287: {'lr': 0.0004996299673724915, 'samples': 157776, 'steps': 3286, 'loss/train': 3.3826310634613037} +07/25/2024 11:40:46 - INFO - __main__ - Step 3288: {'lr': 0.0004996296812063857, 'samples': 157824, 'steps': 3287, 'loss/train': 2.6518845558166504} +07/25/2024 11:40:46 - INFO - __main__ - Step 3289: {'lr': 0.0004996293949297509, 'samples': 157872, 'steps': 3288, 'loss/train': 2.699622869491577} +07/25/2024 11:40:47 - INFO - __main__ - Step 3290: {'lr': 0.0004996291085425872, 'samples': 157920, 'steps': 3289, 'loss/train': 2.634824752807617} +07/25/2024 11:40:47 - INFO - __main__ - Step 3291: {'lr': 0.0004996288220448947, 'samples': 157968, 'steps': 3290, 'loss/train': 0.4262307584285736} +07/25/2024 11:40:47 - INFO - __main__ - Step 3292: {'lr': 0.0004996285354366735, 'samples': 158016, 'steps': 3291, 'loss/train': 2.6947953701019287} +07/25/2024 11:40:48 - INFO - __main__ - Step 3293: {'lr': 0.0004996282487179239, 'samples': 158064, 'steps': 3292, 'loss/train': 2.5812828540802} +07/25/2024 11:40:48 - INFO - __main__ - Step 3294: {'lr': 0.0004996279618886458, 'samples': 158112, 'steps': 3293, 'loss/train': 3.2110915184020996} +07/25/2024 11:40:48 - INFO - __main__ - Step 3295: {'lr': 0.0004996276749488394, 'samples': 158160, 'steps': 3294, 'loss/train': 2.9597442150115967} +07/25/2024 11:40:48 - INFO - __main__ - Step 3296: {'lr': 0.0004996273878985051, 'samples': 158208, 'steps': 3295, 'loss/train': 1.285544753074646} +07/25/2024 11:40:49 - INFO - __main__ - Step 3297: {'lr': 0.0004996271007376426, 'samples': 158256, 'steps': 3296, 'loss/train': 0.5006058812141418} +07/25/2024 11:40:49 - INFO - __main__ - Step 3298: {'lr': 0.0004996268134662521, 'samples': 158304, 'steps': 3297, 'loss/train': 3.162834882736206} +07/25/2024 11:40:49 - INFO - __main__ - Step 3299: {'lr': 0.0004996265260843341, 'samples': 158352, 'steps': 3298, 'loss/train': 3.0177273750305176} +07/25/2024 11:40:50 - INFO - __main__ - Step 3300: {'lr': 0.0004996262385918883, 'samples': 158400, 'steps': 3299, 'loss/train': 6.082427501678467} +07/25/2024 11:40:50 - INFO - __main__ - Step 3301: {'lr': 0.0004996259509889151, 'samples': 158448, 'steps': 3300, 'loss/train': 1.7672934532165527} +07/25/2024 11:40:50 - INFO - __main__ - Step 3302: {'lr': 0.0004996256632754145, 'samples': 158496, 'steps': 3301, 'loss/train': 2.113180160522461} +07/25/2024 11:40:50 - INFO - __main__ - Step 3303: {'lr': 0.0004996253754513865, 'samples': 158544, 'steps': 3302, 'loss/train': 2.481933832168579} +07/25/2024 11:40:51 - INFO - __main__ - Step 3304: {'lr': 0.0004996250875168315, 'samples': 158592, 'steps': 3303, 'loss/train': 2.919105291366577} +07/25/2024 11:40:51 - INFO - __main__ - Step 3305: {'lr': 0.0004996247994717496, 'samples': 158640, 'steps': 3304, 'loss/train': 2.4226136207580566} +07/25/2024 11:40:51 - INFO - __main__ - Step 3306: {'lr': 0.0004996245113161407, 'samples': 158688, 'steps': 3305, 'loss/train': 3.3219311237335205} +07/25/2024 11:40:52 - INFO - __main__ - Step 3307: {'lr': 0.0004996242230500051, 'samples': 158736, 'steps': 3306, 'loss/train': 2.4521872997283936} +07/25/2024 11:40:52 - INFO - __main__ - Step 3308: {'lr': 0.0004996239346733429, 'samples': 158784, 'steps': 3307, 'loss/train': 3.063145875930786} +07/25/2024 11:40:52 - INFO - __main__ - Step 3309: {'lr': 0.0004996236461861542, 'samples': 158832, 'steps': 3308, 'loss/train': 2.447134256362915} +07/25/2024 11:40:52 - INFO - __main__ - Step 3310: {'lr': 0.0004996233575884391, 'samples': 158880, 'steps': 3309, 'loss/train': 2.4923126697540283} +07/25/2024 11:40:53 - INFO - __main__ - Step 3311: {'lr': 0.0004996230688801978, 'samples': 158928, 'steps': 3310, 'loss/train': 2.8657970428466797} +07/25/2024 11:40:53 - INFO - __main__ - Step 3312: {'lr': 0.0004996227800614305, 'samples': 158976, 'steps': 3311, 'loss/train': 2.4733877182006836} +07/25/2024 11:40:53 - INFO - __main__ - Step 3313: {'lr': 0.000499622491132137, 'samples': 159024, 'steps': 3312, 'loss/train': 2.6874217987060547} +07/25/2024 11:40:54 - INFO - __main__ - Step 3314: {'lr': 0.0004996222020923179, 'samples': 159072, 'steps': 3313, 'loss/train': 1.9402538537979126} +07/25/2024 11:40:54 - INFO - __main__ - Step 3315: {'lr': 0.000499621912941973, 'samples': 159120, 'steps': 3314, 'loss/train': 0.4686526656150818} +07/25/2024 11:40:54 - INFO - __main__ - Step 3316: {'lr': 0.0004996216236811024, 'samples': 159168, 'steps': 3315, 'loss/train': 2.789992570877075} +07/25/2024 11:40:54 - INFO - __main__ - Step 3317: {'lr': 0.0004996213343097066, 'samples': 159216, 'steps': 3316, 'loss/train': 2.422919511795044} +07/25/2024 11:40:55 - INFO - __main__ - Step 3318: {'lr': 0.0004996210448277853, 'samples': 159264, 'steps': 3317, 'loss/train': 3.152967691421509} +07/25/2024 11:40:55 - INFO - __main__ - Step 3319: {'lr': 0.0004996207552353387, 'samples': 159312, 'steps': 3318, 'loss/train': 2.599982976913452} +07/25/2024 11:40:55 - INFO - __main__ - Step 3320: {'lr': 0.0004996204655323671, 'samples': 159360, 'steps': 3319, 'loss/train': 1.3988194465637207} +07/25/2024 11:40:56 - INFO - __main__ - Step 3321: {'lr': 0.0004996201757188707, 'samples': 159408, 'steps': 3320, 'loss/train': 0.44202369451522827} +07/25/2024 11:40:56 - INFO - __main__ - Step 3322: {'lr': 0.0004996198857948493, 'samples': 159456, 'steps': 3321, 'loss/train': 2.7152109146118164} +07/25/2024 11:40:56 - INFO - __main__ - Step 3323: {'lr': 0.0004996195957603033, 'samples': 159504, 'steps': 3322, 'loss/train': 2.569396734237671} +07/25/2024 11:40:56 - INFO - __main__ - Step 3324: {'lr': 0.0004996193056152326, 'samples': 159552, 'steps': 3323, 'loss/train': 6.010128974914551} +07/25/2024 11:40:57 - INFO - __main__ - Step 3325: {'lr': 0.0004996190153596376, 'samples': 159600, 'steps': 3324, 'loss/train': 2.1841318607330322} +07/25/2024 11:40:57 - INFO - __main__ - Step 3326: {'lr': 0.0004996187249935182, 'samples': 159648, 'steps': 3325, 'loss/train': 1.850309133529663} +07/25/2024 11:40:57 - INFO - __main__ - Step 3327: {'lr': 0.0004996184345168747, 'samples': 159696, 'steps': 3326, 'loss/train': 2.5772430896759033} +07/25/2024 11:40:58 - INFO - __main__ - Step 3328: {'lr': 0.0004996181439297072, 'samples': 159744, 'steps': 3327, 'loss/train': 2.970872640609741} +07/25/2024 11:40:58 - INFO - __main__ - Step 3329: {'lr': 0.0004996178532320157, 'samples': 159792, 'steps': 3328, 'loss/train': 2.097419261932373} +07/25/2024 11:40:58 - INFO - __main__ - Step 3330: {'lr': 0.0004996175624238004, 'samples': 159840, 'steps': 3329, 'loss/train': 3.140658378601074} +07/25/2024 11:40:58 - INFO - __main__ - Step 3331: {'lr': 0.0004996172715050614, 'samples': 159888, 'steps': 3330, 'loss/train': 2.3079311847686768} +07/25/2024 11:40:59 - INFO - __main__ - Step 3332: {'lr': 0.000499616980475799, 'samples': 159936, 'steps': 3331, 'loss/train': 3.719452381134033} +07/25/2024 11:40:59 - INFO - __main__ - Step 3333: {'lr': 0.0004996166893360131, 'samples': 159984, 'steps': 3332, 'loss/train': 2.6313226222991943} +07/25/2024 11:40:59 - INFO - __main__ - Step 3334: {'lr': 0.0004996163980857039, 'samples': 160032, 'steps': 3333, 'loss/train': 2.6887335777282715} +07/25/2024 11:41:00 - INFO - __main__ - Step 3335: {'lr': 0.0004996161067248717, 'samples': 160080, 'steps': 3334, 'loss/train': 2.6425554752349854} +07/25/2024 11:41:00 - INFO - __main__ - Step 3336: {'lr': 0.0004996158152535163, 'samples': 160128, 'steps': 3335, 'loss/train': 1.8880527019500732} +07/25/2024 11:41:00 - INFO - __main__ - Step 3337: {'lr': 0.0004996155236716381, 'samples': 160176, 'steps': 3336, 'loss/train': 2.9741272926330566} +07/25/2024 11:41:00 - INFO - __main__ - Step 3338: {'lr': 0.0004996152319792372, 'samples': 160224, 'steps': 3337, 'loss/train': 2.345179796218872} +07/25/2024 11:41:01 - INFO - __main__ - Step 3339: {'lr': 0.0004996149401763135, 'samples': 160272, 'steps': 3338, 'loss/train': 0.40109702944755554} +07/25/2024 11:41:01 - INFO - __main__ - Step 3340: {'lr': 0.0004996146482628674, 'samples': 160320, 'steps': 3339, 'loss/train': 2.9881319999694824} +07/25/2024 11:41:01 - INFO - __main__ - Step 3341: {'lr': 0.000499614356238899, 'samples': 160368, 'steps': 3340, 'loss/train': 2.4959053993225098} +07/25/2024 11:41:02 - INFO - __main__ - Step 3342: {'lr': 0.0004996140641044082, 'samples': 160416, 'steps': 3341, 'loss/train': 2.952439308166504} +07/25/2024 11:41:02 - INFO - __main__ - Step 3343: {'lr': 0.0004996137718593954, 'samples': 160464, 'steps': 3342, 'loss/train': 2.837184190750122} +07/25/2024 11:41:02 - INFO - __main__ - Step 3344: {'lr': 0.0004996134795038607, 'samples': 160512, 'steps': 3343, 'loss/train': 1.1605697870254517} +07/25/2024 11:41:02 - INFO - __main__ - Step 3345: {'lr': 0.000499613187037804, 'samples': 160560, 'steps': 3344, 'loss/train': 0.36371034383773804} +07/25/2024 11:41:03 - INFO - __main__ - Step 3346: {'lr': 0.0004996128944612255, 'samples': 160608, 'steps': 3345, 'loss/train': 2.5823566913604736} +07/25/2024 11:41:03 - INFO - __main__ - Step 3347: {'lr': 0.0004996126017741256, 'samples': 160656, 'steps': 3346, 'loss/train': 2.749018430709839} +07/25/2024 11:41:03 - INFO - __main__ - Step 3348: {'lr': 0.0004996123089765041, 'samples': 160704, 'steps': 3347, 'loss/train': 6.026431083679199} +07/25/2024 11:41:04 - INFO - __main__ - Step 3349: {'lr': 0.0004996120160683613, 'samples': 160752, 'steps': 3348, 'loss/train': 2.421858787536621} +07/25/2024 11:41:04 - INFO - __main__ - Step 3350: {'lr': 0.0004996117230496973, 'samples': 160800, 'steps': 3349, 'loss/train': 1.8969717025756836} +07/25/2024 11:41:04 - INFO - __main__ - Step 3351: {'lr': 0.0004996114299205122, 'samples': 160848, 'steps': 3350, 'loss/train': 3.49814772605896} +07/25/2024 11:41:04 - INFO - __main__ - Step 3352: {'lr': 0.0004996111366808063, 'samples': 160896, 'steps': 3351, 'loss/train': 2.545368194580078} +07/25/2024 11:41:05 - INFO - __main__ - Step 3353: {'lr': 0.0004996108433305794, 'samples': 160944, 'steps': 3352, 'loss/train': 2.0943174362182617} +07/25/2024 11:41:05 - INFO - __main__ - Step 3354: {'lr': 0.0004996105498698319, 'samples': 160992, 'steps': 3353, 'loss/train': 2.8456015586853027} +07/25/2024 11:41:05 - INFO - __main__ - Step 3355: {'lr': 0.0004996102562985638, 'samples': 161040, 'steps': 3354, 'loss/train': 1.9758583307266235} +07/25/2024 11:41:05 - INFO - __main__ - Step 3356: {'lr': 0.0004996099626167753, 'samples': 161088, 'steps': 3355, 'loss/train': 3.4929873943328857} +07/25/2024 11:41:06 - INFO - __main__ - Step 3357: {'lr': 0.0004996096688244664, 'samples': 161136, 'steps': 3356, 'loss/train': 2.3461666107177734} +07/25/2024 11:41:06 - INFO - __main__ - Step 3358: {'lr': 0.0004996093749216375, 'samples': 161184, 'steps': 3357, 'loss/train': 2.401735782623291} +07/25/2024 11:41:06 - INFO - __main__ - Step 3359: {'lr': 0.0004996090809082885, 'samples': 161232, 'steps': 3358, 'loss/train': 2.938819646835327} +07/25/2024 11:41:07 - INFO - __main__ - Step 3360: {'lr': 0.0004996087867844195, 'samples': 161280, 'steps': 3359, 'loss/train': 2.2416200637817383} +07/25/2024 11:41:07 - INFO - __main__ - Step 3361: {'lr': 0.0004996084925500308, 'samples': 161328, 'steps': 3360, 'loss/train': 2.724693775177002} +07/25/2024 11:41:07 - INFO - __main__ - Step 3362: {'lr': 0.0004996081982051225, 'samples': 161376, 'steps': 3361, 'loss/train': 2.764556407928467} +07/25/2024 11:41:07 - INFO - __main__ - Step 3363: {'lr': 0.0004996079037496947, 'samples': 161424, 'steps': 3362, 'loss/train': 1.9072555303573608} +07/25/2024 11:41:08 - INFO - __main__ - Step 3364: {'lr': 0.0004996076091837474, 'samples': 161472, 'steps': 3363, 'loss/train': 2.4867823123931885} +07/25/2024 11:41:08 - INFO - __main__ - Step 3365: {'lr': 0.0004996073145072809, 'samples': 161520, 'steps': 3364, 'loss/train': 2.7982146739959717} +07/25/2024 11:41:08 - INFO - __main__ - Step 3366: {'lr': 0.0004996070197202953, 'samples': 161568, 'steps': 3365, 'loss/train': 2.8773601055145264} +07/25/2024 11:41:09 - INFO - __main__ - Step 3367: {'lr': 0.0004996067248227906, 'samples': 161616, 'steps': 3366, 'loss/train': 2.8915669918060303} +07/25/2024 11:41:09 - INFO - __main__ - Step 3368: {'lr': 0.0004996064298147672, 'samples': 161664, 'steps': 3367, 'loss/train': 1.0900969505310059} +07/25/2024 11:41:09 - INFO - __main__ - Step 3369: {'lr': 0.000499606134696225, 'samples': 161712, 'steps': 3368, 'loss/train': 1.9020038843154907} +07/25/2024 11:41:09 - INFO - __main__ - Step 3370: {'lr': 0.0004996058394671642, 'samples': 161760, 'steps': 3369, 'loss/train': 3.3419535160064697} +07/25/2024 11:41:10 - INFO - __main__ - Step 3371: {'lr': 0.0004996055441275849, 'samples': 161808, 'steps': 3370, 'loss/train': 3.3733623027801514} +07/25/2024 11:41:10 - INFO - __main__ - Step 3372: {'lr': 0.0004996052486774872, 'samples': 161856, 'steps': 3371, 'loss/train': 6.008428573608398} +07/25/2024 11:41:10 - INFO - __main__ - Step 3373: {'lr': 0.0004996049531168714, 'samples': 161904, 'steps': 3372, 'loss/train': 2.4910616874694824} +07/25/2024 11:41:11 - INFO - __main__ - Step 3374: {'lr': 0.0004996046574457375, 'samples': 161952, 'steps': 3373, 'loss/train': 2.795076847076416} +07/25/2024 11:41:11 - INFO - __main__ - Step 3375: {'lr': 0.0004996043616640856, 'samples': 162000, 'steps': 3374, 'loss/train': 1.7015935182571411} +07/25/2024 11:41:11 - INFO - __main__ - Step 3376: {'lr': 0.0004996040657719159, 'samples': 162048, 'steps': 3375, 'loss/train': 2.5883071422576904} +07/25/2024 11:41:11 - INFO - __main__ - Step 3377: {'lr': 0.0004996037697692286, 'samples': 162096, 'steps': 3376, 'loss/train': 1.9417903423309326} +07/25/2024 11:41:12 - INFO - __main__ - Step 3378: {'lr': 0.0004996034736560235, 'samples': 162144, 'steps': 3377, 'loss/train': 2.5827176570892334} +07/25/2024 11:41:12 - INFO - __main__ - Step 3379: {'lr': 0.0004996031774323012, 'samples': 162192, 'steps': 3378, 'loss/train': 2.4687137603759766} +07/25/2024 11:41:12 - INFO - __main__ - Step 3380: {'lr': 0.0004996028810980615, 'samples': 162240, 'steps': 3379, 'loss/train': 3.3416855335235596} +07/25/2024 11:41:13 - INFO - __main__ - Step 3381: {'lr': 0.0004996025846533048, 'samples': 162288, 'steps': 3380, 'loss/train': 2.400721549987793} +07/25/2024 11:41:13 - INFO - __main__ - Step 3382: {'lr': 0.0004996022880980309, 'samples': 162336, 'steps': 3381, 'loss/train': 2.7021610736846924} +07/25/2024 11:41:13 - INFO - __main__ - Step 3383: {'lr': 0.0004996019914322401, 'samples': 162384, 'steps': 3382, 'loss/train': 3.114673376083374} +07/25/2024 11:41:13 - INFO - __main__ - Step 3384: {'lr': 0.0004996016946559327, 'samples': 162432, 'steps': 3383, 'loss/train': 2.9395911693573} +07/25/2024 11:41:14 - INFO - __main__ - Step 3385: {'lr': 0.0004996013977691085, 'samples': 162480, 'steps': 3384, 'loss/train': 3.0964648723602295} +07/25/2024 11:41:14 - INFO - __main__ - Step 3386: {'lr': 0.0004996011007717679, 'samples': 162528, 'steps': 3385, 'loss/train': 2.83035945892334} +07/25/2024 11:41:14 - INFO - __main__ - Step 3387: {'lr': 0.0004996008036639108, 'samples': 162576, 'steps': 3386, 'loss/train': 2.425053358078003} +07/25/2024 11:41:15 - INFO - __main__ - Step 3388: {'lr': 0.0004996005064455375, 'samples': 162624, 'steps': 3387, 'loss/train': 2.2526657581329346} +07/25/2024 11:41:15 - INFO - __main__ - Step 3389: {'lr': 0.0004996002091166483, 'samples': 162672, 'steps': 3388, 'loss/train': 2.3394546508789062} +07/25/2024 11:41:15 - INFO - __main__ - Step 3390: {'lr': 0.0004995999116772428, 'samples': 162720, 'steps': 3389, 'loss/train': 2.631314992904663} +07/25/2024 11:41:15 - INFO - __main__ - Step 3391: {'lr': 0.0004995996141273217, 'samples': 162768, 'steps': 3390, 'loss/train': 2.536745309829712} +07/25/2024 11:41:16 - INFO - __main__ - Step 3392: {'lr': 0.0004995993164668847, 'samples': 162816, 'steps': 3391, 'loss/train': 1.015249252319336} +07/25/2024 11:41:16 - INFO - __main__ - Step 3393: {'lr': 0.0004995990186959323, 'samples': 162864, 'steps': 3392, 'loss/train': 2.7531216144561768} +07/25/2024 11:41:16 - INFO - __main__ - Step 3394: {'lr': 0.0004995987208144643, 'samples': 162912, 'steps': 3393, 'loss/train': 2.839256763458252} +07/25/2024 11:41:17 - INFO - __main__ - Step 3395: {'lr': 0.000499598422822481, 'samples': 162960, 'steps': 3394, 'loss/train': 2.973367214202881} +07/25/2024 11:41:17 - INFO - __main__ - Step 3396: {'lr': 0.0004995981247199826, 'samples': 163008, 'steps': 3395, 'loss/train': 2.9655396938323975} +07/25/2024 11:41:17 - INFO - __main__ - Step 3397: {'lr': 0.000499597826506969, 'samples': 163056, 'steps': 3396, 'loss/train': 2.435086250305176} +07/25/2024 11:41:17 - INFO - __main__ - Step 3398: {'lr': 0.0004995975281834406, 'samples': 163104, 'steps': 3397, 'loss/train': 2.3326516151428223} +07/25/2024 11:41:18 - INFO - __main__ - Step 3399: {'lr': 0.0004995972297493974, 'samples': 163152, 'steps': 3398, 'loss/train': 3.143099069595337} +07/25/2024 11:41:18 - INFO - __main__ - Step 3400: {'lr': 0.0004995969312048394, 'samples': 163200, 'steps': 3399, 'loss/train': 2.3933565616607666} +07/25/2024 11:41:18 - INFO - __main__ - Step 3401: {'lr': 0.0004995966325497671, 'samples': 163248, 'steps': 3400, 'loss/train': 2.1569809913635254} +07/25/2024 11:41:19 - INFO - __main__ - Step 3402: {'lr': 0.0004995963337841802, 'samples': 163296, 'steps': 3401, 'loss/train': 2.3329834938049316} +07/25/2024 11:41:19 - INFO - __main__ - Step 3403: {'lr': 0.0004995960349080792, 'samples': 163344, 'steps': 3402, 'loss/train': 2.437556505203247} +07/25/2024 11:41:19 - INFO - __main__ - Step 3404: {'lr': 0.000499595735921464, 'samples': 163392, 'steps': 3403, 'loss/train': 2.478877305984497} +07/25/2024 11:41:19 - INFO - __main__ - Step 3405: {'lr': 0.0004995954368243348, 'samples': 163440, 'steps': 3404, 'loss/train': 3.141655445098877} +07/25/2024 11:41:20 - INFO - __main__ - Step 3406: {'lr': 0.0004995951376166917, 'samples': 163488, 'steps': 3405, 'loss/train': 2.744507312774658} +07/25/2024 11:41:20 - INFO - __main__ - Step 3407: {'lr': 0.0004995948382985349, 'samples': 163536, 'steps': 3406, 'loss/train': 3.4251043796539307} +07/25/2024 11:41:20 - INFO - __main__ - Step 3408: {'lr': 0.0004995945388698644, 'samples': 163584, 'steps': 3407, 'loss/train': 2.7291252613067627} +07/25/2024 11:41:21 - INFO - __main__ - Step 3409: {'lr': 0.0004995942393306806, 'samples': 163632, 'steps': 3408, 'loss/train': 3.067128896713257} +07/25/2024 11:41:21 - INFO - __main__ - Step 3410: {'lr': 0.0004995939396809833, 'samples': 163680, 'steps': 3409, 'loss/train': 2.487455368041992} +07/25/2024 11:41:21 - INFO - __main__ - Step 3411: {'lr': 0.0004995936399207729, 'samples': 163728, 'steps': 3410, 'loss/train': 2.8756306171417236} +07/25/2024 11:41:21 - INFO - __main__ - Step 3412: {'lr': 0.0004995933400500494, 'samples': 163776, 'steps': 3411, 'loss/train': 2.302663564682007} +07/25/2024 11:41:22 - INFO - __main__ - Step 3413: {'lr': 0.000499593040068813, 'samples': 163824, 'steps': 3412, 'loss/train': 3.5250351428985596} +07/25/2024 11:41:22 - INFO - __main__ - Step 3414: {'lr': 0.0004995927399770638, 'samples': 163872, 'steps': 3413, 'loss/train': 2.316537618637085} +07/25/2024 11:41:22 - INFO - __main__ - Step 3415: {'lr': 0.000499592439774802, 'samples': 163920, 'steps': 3414, 'loss/train': 2.203124761581421} +07/25/2024 11:41:23 - INFO - __main__ - Step 3416: {'lr': 0.0004995921394620275, 'samples': 163968, 'steps': 3415, 'loss/train': 1.0911575555801392} +07/25/2024 11:41:23 - INFO - __main__ - Step 3417: {'lr': 0.0004995918390387406, 'samples': 164016, 'steps': 3416, 'loss/train': 2.454414129257202} +07/25/2024 11:41:23 - INFO - __main__ - Step 3418: {'lr': 0.0004995915385049416, 'samples': 164064, 'steps': 3417, 'loss/train': 2.407562494277954} +07/25/2024 11:41:23 - INFO - __main__ - Step 3419: {'lr': 0.0004995912378606302, 'samples': 164112, 'steps': 3418, 'loss/train': 2.733003854751587} +07/25/2024 11:41:24 - INFO - __main__ - Step 3420: {'lr': 0.000499590937105807, 'samples': 164160, 'steps': 3419, 'loss/train': 2.358140707015991} +07/25/2024 11:41:24 - INFO - __main__ - Step 3421: {'lr': 0.0004995906362404718, 'samples': 164208, 'steps': 3420, 'loss/train': 3.0522873401641846} +07/25/2024 11:41:24 - INFO - __main__ - Step 3422: {'lr': 0.000499590335264625, 'samples': 164256, 'steps': 3421, 'loss/train': 1.549270749092102} +07/25/2024 11:41:25 - INFO - __main__ - Step 3423: {'lr': 0.0004995900341782665, 'samples': 164304, 'steps': 3422, 'loss/train': 2.6232874393463135} +07/25/2024 11:41:25 - INFO - __main__ - Step 3424: {'lr': 0.0004995897329813965, 'samples': 164352, 'steps': 3423, 'loss/train': 2.416618585586548} +07/25/2024 11:41:25 - INFO - __main__ - Step 3425: {'lr': 0.0004995894316740153, 'samples': 164400, 'steps': 3424, 'loss/train': 2.3255724906921387} +07/25/2024 11:41:25 - INFO - __main__ - Step 3426: {'lr': 0.0004995891302561228, 'samples': 164448, 'steps': 3425, 'loss/train': 2.3941102027893066} +07/25/2024 11:41:26 - INFO - __main__ - Step 3427: {'lr': 0.0004995888287277192, 'samples': 164496, 'steps': 3426, 'loss/train': 2.220872163772583} +07/25/2024 11:41:26 - INFO - __main__ - Step 3428: {'lr': 0.0004995885270888046, 'samples': 164544, 'steps': 3427, 'loss/train': 2.3539223670959473} +07/25/2024 11:41:26 - INFO - __main__ - Step 3429: {'lr': 0.0004995882253393793, 'samples': 164592, 'steps': 3428, 'loss/train': 2.695064067840576} +07/25/2024 11:41:26 - INFO - __main__ - Step 3430: {'lr': 0.0004995879234794433, 'samples': 164640, 'steps': 3429, 'loss/train': 2.8224282264709473} +07/25/2024 11:41:27 - INFO - __main__ - Step 3431: {'lr': 0.0004995876215089967, 'samples': 164688, 'steps': 3430, 'loss/train': 2.8495142459869385} +07/25/2024 11:41:27 - INFO - __main__ - Step 3432: {'lr': 0.0004995873194280398, 'samples': 164736, 'steps': 3431, 'loss/train': 1.8899904489517212} +07/25/2024 11:41:27 - INFO - __main__ - Step 3433: {'lr': 0.0004995870172365726, 'samples': 164784, 'steps': 3432, 'loss/train': 2.1970651149749756} +07/25/2024 11:41:28 - INFO - __main__ - Step 3434: {'lr': 0.0004995867149345952, 'samples': 164832, 'steps': 3433, 'loss/train': 2.447120428085327} +07/25/2024 11:41:28 - INFO - __main__ - Step 3435: {'lr': 0.0004995864125221079, 'samples': 164880, 'steps': 3434, 'loss/train': 2.5096817016601562} +07/25/2024 11:41:28 - INFO - __main__ - Step 3436: {'lr': 0.0004995861099991107, 'samples': 164928, 'steps': 3435, 'loss/train': 2.7717912197113037} +07/25/2024 11:41:28 - INFO - __main__ - Step 3437: {'lr': 0.0004995858073656038, 'samples': 164976, 'steps': 3436, 'loss/train': 2.6857054233551025} +07/25/2024 11:41:29 - INFO - __main__ - Step 3438: {'lr': 0.0004995855046215871, 'samples': 165024, 'steps': 3437, 'loss/train': 1.8735190629959106} +07/25/2024 11:41:29 - INFO - __main__ - Step 3439: {'lr': 0.0004995852017670612, 'samples': 165072, 'steps': 3438, 'loss/train': 2.567662239074707} +07/25/2024 11:41:29 - INFO - __main__ - Step 3440: {'lr': 0.0004995848988020257, 'samples': 165120, 'steps': 3439, 'loss/train': 1.0283589363098145} +07/25/2024 11:41:30 - INFO - __main__ - Step 3441: {'lr': 0.0004995845957264811, 'samples': 165168, 'steps': 3440, 'loss/train': 2.348780393600464} +07/25/2024 11:41:30 - INFO - __main__ - Step 3442: {'lr': 0.0004995842925404276, 'samples': 165216, 'steps': 3441, 'loss/train': 2.198235034942627} +07/25/2024 11:41:30 - INFO - __main__ - Step 3443: {'lr': 0.000499583989243865, 'samples': 165264, 'steps': 3442, 'loss/train': 2.551309585571289} +07/25/2024 11:41:30 - INFO - __main__ - Step 3444: {'lr': 0.0004995836858367937, 'samples': 165312, 'steps': 3443, 'loss/train': 2.3609023094177246} +07/25/2024 11:41:31 - INFO - __main__ - Step 3445: {'lr': 0.0004995833823192137, 'samples': 165360, 'steps': 3444, 'loss/train': 2.644317388534546} +07/25/2024 11:41:31 - INFO - __main__ - Step 3446: {'lr': 0.0004995830786911251, 'samples': 165408, 'steps': 3445, 'loss/train': 2.1076831817626953} +07/25/2024 11:41:31 - INFO - __main__ - Step 3447: {'lr': 0.0004995827749525281, 'samples': 165456, 'steps': 3446, 'loss/train': 2.345639228820801} +07/25/2024 11:41:32 - INFO - __main__ - Step 3448: {'lr': 0.0004995824711034229, 'samples': 165504, 'steps': 3447, 'loss/train': 2.8984992504119873} +07/25/2024 11:41:32 - INFO - __main__ - Step 3449: {'lr': 0.0004995821671438096, 'samples': 165552, 'steps': 3448, 'loss/train': 2.8782057762145996} +07/25/2024 11:41:32 - INFO - __main__ - Step 3450: {'lr': 0.0004995818630736883, 'samples': 165600, 'steps': 3449, 'loss/train': 2.509155035018921} +07/25/2024 11:41:32 - INFO - __main__ - Step 3451: {'lr': 0.0004995815588930591, 'samples': 165648, 'steps': 3450, 'loss/train': 2.624021530151367} +07/25/2024 11:41:33 - INFO - __main__ - Step 3452: {'lr': 0.0004995812546019221, 'samples': 165696, 'steps': 3451, 'loss/train': 1.9233263731002808} +07/25/2024 11:41:33 - INFO - __main__ - Step 3453: {'lr': 0.0004995809502002777, 'samples': 165744, 'steps': 3452, 'loss/train': 2.0223631858825684} +07/25/2024 11:41:33 - INFO - __main__ - Step 3454: {'lr': 0.0004995806456881258, 'samples': 165792, 'steps': 3453, 'loss/train': 2.4820845127105713} +07/25/2024 11:41:34 - INFO - __main__ - Step 3455: {'lr': 0.0004995803410654665, 'samples': 165840, 'steps': 3454, 'loss/train': 2.8294150829315186} +07/25/2024 11:41:34 - INFO - __main__ - Step 3456: {'lr': 0.0004995800363323001, 'samples': 165888, 'steps': 3455, 'loss/train': 2.569091320037842} +07/25/2024 11:41:34 - INFO - __main__ - Step 3457: {'lr': 0.0004995797314886266, 'samples': 165936, 'steps': 3456, 'loss/train': 2.5933165550231934} +07/25/2024 11:41:34 - INFO - __main__ - Step 3458: {'lr': 0.0004995794265344462, 'samples': 165984, 'steps': 3457, 'loss/train': 2.503329277038574} +07/25/2024 11:41:35 - INFO - __main__ - Step 3459: {'lr': 0.000499579121469759, 'samples': 166032, 'steps': 3458, 'loss/train': 2.3691751956939697} +07/25/2024 11:41:35 - INFO - __main__ - Step 3460: {'lr': 0.0004995788162945652, 'samples': 166080, 'steps': 3459, 'loss/train': 3.131631374359131} +07/25/2024 11:41:35 - INFO - __main__ - Step 3461: {'lr': 0.0004995785110088649, 'samples': 166128, 'steps': 3460, 'loss/train': 2.3381807804107666} +07/25/2024 11:41:36 - INFO - __main__ - Step 3462: {'lr': 0.0004995782056126582, 'samples': 166176, 'steps': 3461, 'loss/train': 2.570391893386841} +07/25/2024 11:41:36 - INFO - __main__ - Step 3463: {'lr': 0.0004995779001059453, 'samples': 166224, 'steps': 3462, 'loss/train': 2.2792248725891113} +07/25/2024 11:41:36 - INFO - __main__ - Step 3464: {'lr': 0.0004995775944887263, 'samples': 166272, 'steps': 3463, 'loss/train': 0.9329222440719604} +07/25/2024 11:41:36 - INFO - __main__ - Step 3465: {'lr': 0.0004995772887610013, 'samples': 166320, 'steps': 3464, 'loss/train': 2.3220996856689453} +07/25/2024 11:41:37 - INFO - __main__ - Step 3466: {'lr': 0.0004995769829227705, 'samples': 166368, 'steps': 3465, 'loss/train': 2.5531327724456787} +07/25/2024 11:41:37 - INFO - __main__ - Step 3467: {'lr': 0.000499576676974034, 'samples': 166416, 'steps': 3466, 'loss/train': 2.143690347671509} +07/25/2024 11:41:37 - INFO - __main__ - Step 3468: {'lr': 0.0004995763709147919, 'samples': 166464, 'steps': 3467, 'loss/train': 2.53476619720459} +07/25/2024 11:41:38 - INFO - __main__ - Step 3469: {'lr': 0.0004995760647450443, 'samples': 166512, 'steps': 3468, 'loss/train': 2.508934497833252} +07/25/2024 11:41:38 - INFO - __main__ - Step 3470: {'lr': 0.0004995757584647916, 'samples': 166560, 'steps': 3469, 'loss/train': 2.4326398372650146} +07/25/2024 11:41:38 - INFO - __main__ - Step 3471: {'lr': 0.0004995754520740336, 'samples': 166608, 'steps': 3470, 'loss/train': 2.720240354537964} +07/25/2024 11:41:38 - INFO - __main__ - Step 3472: {'lr': 0.0004995751455727706, 'samples': 166656, 'steps': 3471, 'loss/train': 2.6654229164123535} +07/25/2024 11:41:39 - INFO - __main__ - Step 3473: {'lr': 0.0004995748389610028, 'samples': 166704, 'steps': 3472, 'loss/train': 2.4519224166870117} +07/25/2024 11:41:39 - INFO - __main__ - Step 3474: {'lr': 0.0004995745322387302, 'samples': 166752, 'steps': 3473, 'loss/train': 2.33034086227417} +07/25/2024 11:41:39 - INFO - __main__ - Step 3475: {'lr': 0.000499574225405953, 'samples': 166800, 'steps': 3474, 'loss/train': 2.367685556411743} +07/25/2024 11:41:40 - INFO - __main__ - Step 3476: {'lr': 0.0004995739184626714, 'samples': 166848, 'steps': 3475, 'loss/train': 2.566876173019409} +07/25/2024 11:41:40 - INFO - __main__ - Step 3477: {'lr': 0.0004995736114088853, 'samples': 166896, 'steps': 3476, 'loss/train': 1.9475321769714355} +07/25/2024 11:41:40 - INFO - __main__ - Step 3478: {'lr': 0.0004995733042445951, 'samples': 166944, 'steps': 3477, 'loss/train': 2.2776920795440674} +07/25/2024 11:41:40 - INFO - __main__ - Step 3479: {'lr': 0.0004995729969698009, 'samples': 166992, 'steps': 3478, 'loss/train': 1.7994376420974731} +07/25/2024 11:41:41 - INFO - __main__ - Step 3480: {'lr': 0.0004995726895845027, 'samples': 167040, 'steps': 3479, 'loss/train': 2.458975076675415} +07/25/2024 11:41:41 - INFO - __main__ - Step 3481: {'lr': 0.0004995723820887006, 'samples': 167088, 'steps': 3480, 'loss/train': 2.7328717708587646} +07/25/2024 11:41:41 - INFO - __main__ - Step 3482: {'lr': 0.000499572074482395, 'samples': 167136, 'steps': 3481, 'loss/train': 2.762251853942871} +07/25/2024 11:41:42 - INFO - __main__ - Step 3483: {'lr': 0.0004995717667655858, 'samples': 167184, 'steps': 3482, 'loss/train': 2.7954230308532715} +07/25/2024 11:41:42 - INFO - __main__ - Step 3484: {'lr': 0.0004995714589382733, 'samples': 167232, 'steps': 3483, 'loss/train': 2.6065125465393066} +07/25/2024 11:41:42 - INFO - __main__ - Step 3485: {'lr': 0.0004995711510004574, 'samples': 167280, 'steps': 3484, 'loss/train': 2.6814143657684326} +07/25/2024 11:41:42 - INFO - __main__ - Step 3486: {'lr': 0.0004995708429521385, 'samples': 167328, 'steps': 3485, 'loss/train': 2.696439504623413} +07/25/2024 11:41:43 - INFO - __main__ - Step 3487: {'lr': 0.0004995705347933166, 'samples': 167376, 'steps': 3486, 'loss/train': 2.441866636276245} +07/25/2024 11:41:43 - INFO - __main__ - Step 3488: {'lr': 0.0004995702265239919, 'samples': 167424, 'steps': 3487, 'loss/train': 0.8752374053001404} +07/25/2024 11:41:43 - INFO - __main__ - Step 3489: {'lr': 0.0004995699181441644, 'samples': 167472, 'steps': 3488, 'loss/train': 1.8553284406661987} +07/25/2024 11:41:44 - INFO - __main__ - Step 3490: {'lr': 0.0004995696096538343, 'samples': 167520, 'steps': 3489, 'loss/train': 2.479414463043213} +07/25/2024 11:41:44 - INFO - __main__ - Step 3491: {'lr': 0.0004995693010530019, 'samples': 167568, 'steps': 3490, 'loss/train': 2.4995148181915283} +07/25/2024 11:41:44 - INFO - __main__ - Step 3492: {'lr': 0.0004995689923416672, 'samples': 167616, 'steps': 3491, 'loss/train': 2.2801268100738525} +07/25/2024 11:41:44 - INFO - __main__ - Step 3493: {'lr': 0.0004995686835198303, 'samples': 167664, 'steps': 3492, 'loss/train': 2.4844179153442383} +07/25/2024 11:41:45 - INFO - __main__ - Step 3494: {'lr': 0.0004995683745874914, 'samples': 167712, 'steps': 3493, 'loss/train': 2.9126710891723633} +07/25/2024 11:41:45 - INFO - __main__ - Step 3495: {'lr': 0.0004995680655446506, 'samples': 167760, 'steps': 3494, 'loss/train': 2.1269748210906982} +07/25/2024 11:41:45 - INFO - __main__ - Step 3496: {'lr': 0.000499567756391308, 'samples': 167808, 'steps': 3495, 'loss/train': 2.254723072052002} +07/25/2024 11:41:45 - INFO - __main__ - Step 3497: {'lr': 0.0004995674471274639, 'samples': 167856, 'steps': 3496, 'loss/train': 2.3773577213287354} +07/25/2024 11:41:46 - INFO - __main__ - Step 3498: {'lr': 0.0004995671377531183, 'samples': 167904, 'steps': 3497, 'loss/train': 3.0991969108581543} +07/25/2024 11:41:46 - INFO - __main__ - Step 3499: {'lr': 0.0004995668282682713, 'samples': 167952, 'steps': 3498, 'loss/train': 2.7494540214538574} +07/25/2024 11:41:46 - INFO - __main__ - Step 3500: {'lr': 0.0004995665186729231, 'samples': 168000, 'steps': 3499, 'loss/train': 2.771595001220703} +07/25/2024 11:41:47 - INFO - __main__ - Step 3501: {'lr': 0.0004995662089670739, 'samples': 168048, 'steps': 3500, 'loss/train': 2.4004578590393066} +07/25/2024 11:41:47 - INFO - __main__ - Step 3502: {'lr': 0.0004995658991507237, 'samples': 168096, 'steps': 3501, 'loss/train': 1.8888072967529297} +07/25/2024 11:41:47 - INFO - __main__ - Step 3503: {'lr': 0.0004995655892238728, 'samples': 168144, 'steps': 3502, 'loss/train': 1.6412489414215088} +07/25/2024 11:41:47 - INFO - __main__ - Step 3504: {'lr': 0.0004995652791865212, 'samples': 168192, 'steps': 3503, 'loss/train': 2.7607240676879883} +07/25/2024 11:41:48 - INFO - __main__ - Step 3505: {'lr': 0.000499564969038669, 'samples': 168240, 'steps': 3504, 'loss/train': 2.6570565700531006} +07/25/2024 11:41:48 - INFO - __main__ - Step 3506: {'lr': 0.0004995646587803166, 'samples': 168288, 'steps': 3505, 'loss/train': 2.800358295440674} +07/25/2024 11:41:48 - INFO - __main__ - Step 3507: {'lr': 0.0004995643484114639, 'samples': 168336, 'steps': 3506, 'loss/train': 2.533646583557129} +07/25/2024 11:41:49 - INFO - __main__ - Step 3508: {'lr': 0.0004995640379321111, 'samples': 168384, 'steps': 3507, 'loss/train': 2.28568696975708} +07/25/2024 11:41:49 - INFO - __main__ - Step 3509: {'lr': 0.0004995637273422583, 'samples': 168432, 'steps': 3508, 'loss/train': 2.7927093505859375} +07/25/2024 11:41:49 - INFO - __main__ - Step 3510: {'lr': 0.0004995634166419057, 'samples': 168480, 'steps': 3509, 'loss/train': 2.6031744480133057} +07/25/2024 11:41:49 - INFO - __main__ - Step 3511: {'lr': 0.0004995631058310534, 'samples': 168528, 'steps': 3510, 'loss/train': 2.5652780532836914} +07/25/2024 11:41:50 - INFO - __main__ - Step 3512: {'lr': 0.0004995627949097016, 'samples': 168576, 'steps': 3511, 'loss/train': 0.9632576704025269} +07/25/2024 11:41:50 - INFO - __main__ - Step 3513: {'lr': 0.0004995624838778503, 'samples': 168624, 'steps': 3512, 'loss/train': 2.460876941680908} +07/25/2024 11:41:50 - INFO - __main__ - Step 3514: {'lr': 0.0004995621727354998, 'samples': 168672, 'steps': 3513, 'loss/train': 2.4429187774658203} +07/25/2024 11:41:51 - INFO - __main__ - Step 3515: {'lr': 0.0004995618614826501, 'samples': 168720, 'steps': 3514, 'loss/train': 2.4104654788970947} +07/25/2024 11:41:51 - INFO - __main__ - Step 3516: {'lr': 0.0004995615501193015, 'samples': 168768, 'steps': 3515, 'loss/train': 3.135831594467163} +07/25/2024 11:41:51 - INFO - __main__ - Step 3517: {'lr': 0.0004995612386454539, 'samples': 168816, 'steps': 3516, 'loss/train': 2.419018268585205} +07/25/2024 11:41:51 - INFO - __main__ - Step 3518: {'lr': 0.0004995609270611077, 'samples': 168864, 'steps': 3517, 'loss/train': 2.6948437690734863} +07/25/2024 11:41:52 - INFO - __main__ - Step 3519: {'lr': 0.0004995606153662629, 'samples': 168912, 'steps': 3518, 'loss/train': 2.054386854171753} +07/25/2024 11:41:52 - INFO - __main__ - Step 3520: {'lr': 0.0004995603035609195, 'samples': 168960, 'steps': 3519, 'loss/train': 2.682905912399292} +07/25/2024 11:41:52 - INFO - __main__ - Step 3521: {'lr': 0.0004995599916450779, 'samples': 169008, 'steps': 3520, 'loss/train': 1.9843872785568237} +07/25/2024 11:41:53 - INFO - __main__ - Step 3522: {'lr': 0.0004995596796187381, 'samples': 169056, 'steps': 3521, 'loss/train': 2.4221458435058594} +07/25/2024 11:41:53 - INFO - __main__ - Step 3523: {'lr': 0.0004995593674819004, 'samples': 169104, 'steps': 3522, 'loss/train': 2.9076480865478516} +07/25/2024 11:41:53 - INFO - __main__ - Step 3524: {'lr': 0.0004995590552345646, 'samples': 169152, 'steps': 3523, 'loss/train': 2.7492992877960205} +07/25/2024 11:41:53 - INFO - __main__ - Step 3525: {'lr': 0.0004995587428767311, 'samples': 169200, 'steps': 3524, 'loss/train': 2.8007984161376953} +07/25/2024 11:41:54 - INFO - __main__ - Step 3526: {'lr': 0.0004995584304084001, 'samples': 169248, 'steps': 3525, 'loss/train': 4.093027114868164} +07/25/2024 11:41:54 - INFO - __main__ - Step 3527: {'lr': 0.0004995581178295716, 'samples': 169296, 'steps': 3526, 'loss/train': 2.5160398483276367} +07/25/2024 11:41:54 - INFO - __main__ - Step 3528: {'lr': 0.0004995578051402457, 'samples': 169344, 'steps': 3527, 'loss/train': 2.582390069961548} +07/25/2024 11:41:55 - INFO - __main__ - Step 3529: {'lr': 0.0004995574923404225, 'samples': 169392, 'steps': 3528, 'loss/train': 2.2350540161132812} +07/25/2024 11:41:55 - INFO - __main__ - Step 3530: {'lr': 0.0004995571794301025, 'samples': 169440, 'steps': 3529, 'loss/train': 2.3860862255096436} +07/25/2024 11:41:55 - INFO - __main__ - Step 3531: {'lr': 0.0004995568664092853, 'samples': 169488, 'steps': 3530, 'loss/train': 2.9227263927459717} +07/25/2024 11:41:55 - INFO - __main__ - Step 3532: {'lr': 0.0004995565532779715, 'samples': 169536, 'steps': 3531, 'loss/train': 2.9621193408966064} +07/25/2024 11:41:56 - INFO - __main__ - Step 3533: {'lr': 0.0004995562400361609, 'samples': 169584, 'steps': 3532, 'loss/train': 2.254399299621582} +07/25/2024 11:41:56 - INFO - __main__ - Step 3534: {'lr': 0.0004995559266838538, 'samples': 169632, 'steps': 3533, 'loss/train': 1.2192113399505615} +07/25/2024 11:41:56 - INFO - __main__ - Step 3535: {'lr': 0.0004995556132210505, 'samples': 169680, 'steps': 3534, 'loss/train': 3.6228671073913574} +07/25/2024 11:41:57 - INFO - __main__ - Step 3536: {'lr': 0.0004995552996477508, 'samples': 169728, 'steps': 3535, 'loss/train': 0.9198848605155945} +07/25/2024 11:41:57 - INFO - __main__ - Step 3537: {'lr': 0.000499554985963955, 'samples': 169776, 'steps': 3536, 'loss/train': 2.1287307739257812} +07/25/2024 11:41:57 - INFO - __main__ - Step 3538: {'lr': 0.0004995546721696634, 'samples': 169824, 'steps': 3537, 'loss/train': 2.8781158924102783} +07/25/2024 11:41:57 - INFO - __main__ - Step 3539: {'lr': 0.0004995543582648758, 'samples': 169872, 'steps': 3538, 'loss/train': 2.387512683868408} +07/25/2024 11:41:58 - INFO - __main__ - Step 3540: {'lr': 0.0004995540442495927, 'samples': 169920, 'steps': 3539, 'loss/train': 2.4666879177093506} +07/25/2024 11:41:58 - INFO - __main__ - Step 3541: {'lr': 0.000499553730123814, 'samples': 169968, 'steps': 3540, 'loss/train': 2.5696043968200684} +07/25/2024 11:41:58 - INFO - __main__ - Step 3542: {'lr': 0.0004995534158875398, 'samples': 170016, 'steps': 3541, 'loss/train': 2.331414222717285} +07/25/2024 11:41:59 - INFO - __main__ - Step 3543: {'lr': 0.0004995531015407704, 'samples': 170064, 'steps': 3542, 'loss/train': 2.8448634147644043} +07/25/2024 11:41:59 - INFO - __main__ - Step 3544: {'lr': 0.0004995527870835059, 'samples': 170112, 'steps': 3543, 'loss/train': 2.209989309310913} +07/25/2024 11:41:59 - INFO - __main__ - Step 3545: {'lr': 0.0004995524725157465, 'samples': 170160, 'steps': 3544, 'loss/train': 2.1390504837036133} +07/25/2024 11:41:59 - INFO - __main__ - Step 3546: {'lr': 0.0004995521578374921, 'samples': 170208, 'steps': 3545, 'loss/train': 2.6386070251464844} +07/25/2024 11:42:00 - INFO - __main__ - Step 3547: {'lr': 0.0004995518430487431, 'samples': 170256, 'steps': 3546, 'loss/train': 2.712733030319214} +07/25/2024 11:42:00 - INFO - __main__ - Step 3548: {'lr': 0.0004995515281494995, 'samples': 170304, 'steps': 3547, 'loss/train': 2.8147575855255127} +07/25/2024 11:42:00 - INFO - __main__ - Step 3549: {'lr': 0.0004995512131397615, 'samples': 170352, 'steps': 3548, 'loss/train': 2.3845202922821045} +07/25/2024 11:42:01 - INFO - __main__ - Step 3550: {'lr': 0.0004995508980195292, 'samples': 170400, 'steps': 3549, 'loss/train': 4.106863498687744} +07/25/2024 11:42:01 - INFO - __main__ - Step 3551: {'lr': 0.0004995505827888028, 'samples': 170448, 'steps': 3550, 'loss/train': 1.6702966690063477} +07/25/2024 11:42:01 - INFO - __main__ - Step 3552: {'lr': 0.0004995502674475823, 'samples': 170496, 'steps': 3551, 'loss/train': 2.5689916610717773} +07/25/2024 11:42:01 - INFO - __main__ - Step 3553: {'lr': 0.0004995499519958679, 'samples': 170544, 'steps': 3552, 'loss/train': 2.5718345642089844} +07/25/2024 11:42:02 - INFO - __main__ - Step 3554: {'lr': 0.0004995496364336599, 'samples': 170592, 'steps': 3553, 'loss/train': 2.789763927459717} +07/25/2024 11:42:02 - INFO - __main__ - Step 3555: {'lr': 0.0004995493207609582, 'samples': 170640, 'steps': 3554, 'loss/train': 3.0311434268951416} +07/25/2024 11:42:02 - INFO - __main__ - Step 3556: {'lr': 0.0004995490049777632, 'samples': 170688, 'steps': 3555, 'loss/train': 2.8310673236846924} +07/25/2024 11:42:03 - INFO - __main__ - Step 3557: {'lr': 0.0004995486890840748, 'samples': 170736, 'steps': 3556, 'loss/train': 1.9007519483566284} +07/25/2024 11:42:03 - INFO - __main__ - Step 3558: {'lr': 0.0004995483730798932, 'samples': 170784, 'steps': 3557, 'loss/train': 2.5931994915008545} +07/25/2024 11:42:03 - INFO - __main__ - Step 3559: {'lr': 0.0004995480569652186, 'samples': 170832, 'steps': 3558, 'loss/train': 3.232172727584839} +07/25/2024 11:42:03 - INFO - __main__ - Step 3560: {'lr': 0.0004995477407400511, 'samples': 170880, 'steps': 3559, 'loss/train': 0.927837073802948} +07/25/2024 11:42:04 - INFO - __main__ - Step 3561: {'lr': 0.0004995474244043909, 'samples': 170928, 'steps': 3560, 'loss/train': 2.6520419120788574} +07/25/2024 11:42:04 - INFO - __main__ - Step 3562: {'lr': 0.0004995471079582381, 'samples': 170976, 'steps': 3561, 'loss/train': 2.4712491035461426} +07/25/2024 11:42:04 - INFO - __main__ - Step 3563: {'lr': 0.0004995467914015927, 'samples': 171024, 'steps': 3562, 'loss/train': 3.086530923843384} +07/25/2024 11:42:04 - INFO - __main__ - Step 3564: {'lr': 0.0004995464747344551, 'samples': 171072, 'steps': 3563, 'loss/train': 2.6977503299713135} +07/25/2024 11:42:05 - INFO - __main__ - Step 3565: {'lr': 0.0004995461579568254, 'samples': 171120, 'steps': 3564, 'loss/train': 2.407656669616699} +07/25/2024 11:42:05 - INFO - __main__ - Step 3566: {'lr': 0.0004995458410687035, 'samples': 171168, 'steps': 3565, 'loss/train': 2.833559274673462} +07/25/2024 11:42:05 - INFO - __main__ - Step 3567: {'lr': 0.0004995455240700897, 'samples': 171216, 'steps': 3566, 'loss/train': 2.768393039703369} +07/25/2024 11:42:06 - INFO - __main__ - Step 3568: {'lr': 0.0004995452069609842, 'samples': 171264, 'steps': 3567, 'loss/train': 2.6359643936157227} +07/25/2024 11:42:06 - INFO - __main__ - Step 3569: {'lr': 0.000499544889741387, 'samples': 171312, 'steps': 3568, 'loss/train': 2.169245481491089} +07/25/2024 11:42:06 - INFO - __main__ - Step 3570: {'lr': 0.0004995445724112985, 'samples': 171360, 'steps': 3569, 'loss/train': 2.435492992401123} +07/25/2024 11:42:06 - INFO - __main__ - Step 3571: {'lr': 0.0004995442549707185, 'samples': 171408, 'steps': 3570, 'loss/train': 2.818592071533203} +07/25/2024 11:42:07 - INFO - __main__ - Step 3572: {'lr': 0.0004995439374196472, 'samples': 171456, 'steps': 3571, 'loss/train': 2.5109636783599854} +07/25/2024 11:42:07 - INFO - __main__ - Step 3573: {'lr': 0.000499543619758085, 'samples': 171504, 'steps': 3572, 'loss/train': 2.843233346939087} +07/25/2024 11:42:07 - INFO - __main__ - Step 3574: {'lr': 0.0004995433019860319, 'samples': 171552, 'steps': 3573, 'loss/train': 3.0137200355529785} +07/25/2024 11:42:08 - INFO - __main__ - Step 3575: {'lr': 0.0004995429841034879, 'samples': 171600, 'steps': 3574, 'loss/train': 2.6808905601501465} +07/25/2024 11:42:08 - INFO - __main__ - Step 3576: {'lr': 0.0004995426661104533, 'samples': 171648, 'steps': 3575, 'loss/train': 2.956315279006958} +07/25/2024 11:42:08 - INFO - __main__ - Step 3577: {'lr': 0.0004995423480069281, 'samples': 171696, 'steps': 3576, 'loss/train': 2.7196967601776123} +07/25/2024 11:42:08 - INFO - __main__ - Step 3578: {'lr': 0.0004995420297929127, 'samples': 171744, 'steps': 3577, 'loss/train': 2.307039499282837} +07/25/2024 11:42:09 - INFO - __main__ - Step 3579: {'lr': 0.000499541711468407, 'samples': 171792, 'steps': 3578, 'loss/train': 2.6799802780151367} +07/25/2024 11:42:09 - INFO - __main__ - Step 3580: {'lr': 0.0004995413930334113, 'samples': 171840, 'steps': 3579, 'loss/train': 2.0640041828155518} +07/25/2024 11:42:09 - INFO - __main__ - Step 3581: {'lr': 0.0004995410744879255, 'samples': 171888, 'steps': 3580, 'loss/train': 1.9003596305847168} +07/25/2024 11:42:10 - INFO - __main__ - Step 3582: {'lr': 0.0004995407558319501, 'samples': 171936, 'steps': 3581, 'loss/train': 2.8261117935180664} +07/25/2024 11:42:10 - INFO - __main__ - Step 3583: {'lr': 0.000499540437065485, 'samples': 171984, 'steps': 3582, 'loss/train': 2.4634451866149902} +07/25/2024 11:42:10 - INFO - __main__ - Step 3584: {'lr': 0.0004995401181885303, 'samples': 172032, 'steps': 3583, 'loss/train': 0.9148027896881104} +07/25/2024 11:42:10 - INFO - __main__ - Step 3585: {'lr': 0.0004995397992010863, 'samples': 172080, 'steps': 3584, 'loss/train': 2.8851566314697266} +07/25/2024 11:42:11 - INFO - __main__ - Step 3586: {'lr': 0.000499539480103153, 'samples': 172128, 'steps': 3585, 'loss/train': 2.7021799087524414} +07/25/2024 11:42:11 - INFO - __main__ - Step 3587: {'lr': 0.0004995391608947307, 'samples': 172176, 'steps': 3586, 'loss/train': 2.2081921100616455} +07/25/2024 11:42:11 - INFO - __main__ - Step 3588: {'lr': 0.0004995388415758194, 'samples': 172224, 'steps': 3587, 'loss/train': 2.696930170059204} +07/25/2024 11:42:12 - INFO - __main__ - Step 3589: {'lr': 0.0004995385221464193, 'samples': 172272, 'steps': 3588, 'loss/train': 2.7174901962280273} +07/25/2024 11:42:12 - INFO - __main__ - Step 3590: {'lr': 0.0004995382026065305, 'samples': 172320, 'steps': 3589, 'loss/train': 2.8325045108795166} +07/25/2024 11:42:12 - INFO - __main__ - Step 3591: {'lr': 0.0004995378829561532, 'samples': 172368, 'steps': 3590, 'loss/train': 2.305424690246582} +07/25/2024 11:42:12 - INFO - __main__ - Step 3592: {'lr': 0.0004995375631952875, 'samples': 172416, 'steps': 3591, 'loss/train': 2.886065721511841} +07/25/2024 11:42:13 - INFO - __main__ - Step 3593: {'lr': 0.0004995372433239337, 'samples': 172464, 'steps': 3592, 'loss/train': 2.4323439598083496} +07/25/2024 11:42:13 - INFO - __main__ - Step 3594: {'lr': 0.0004995369233420916, 'samples': 172512, 'steps': 3593, 'loss/train': 2.2379658222198486} +07/25/2024 11:42:13 - INFO - __main__ - Step 3595: {'lr': 0.0004995366032497616, 'samples': 172560, 'steps': 3594, 'loss/train': 2.751370906829834} +07/25/2024 11:42:14 - INFO - __main__ - Step 3596: {'lr': 0.0004995362830469439, 'samples': 172608, 'steps': 3595, 'loss/train': 2.7380433082580566} +07/25/2024 11:42:14 - INFO - __main__ - Step 3597: {'lr': 0.0004995359627336384, 'samples': 172656, 'steps': 3596, 'loss/train': 3.084564208984375} +07/25/2024 11:42:14 - INFO - __main__ - Step 3598: {'lr': 0.0004995356423098453, 'samples': 172704, 'steps': 3597, 'loss/train': 3.14180064201355} +07/25/2024 11:42:14 - INFO - __main__ - Step 3599: {'lr': 0.000499535321775565, 'samples': 172752, 'steps': 3598, 'loss/train': 2.7653567790985107} +07/25/2024 11:42:15 - INFO - __main__ - Step 3600: {'lr': 0.0004995350011307973, 'samples': 172800, 'steps': 3599, 'loss/train': 2.397016763687134} +07/25/2024 11:42:15 - INFO - __main__ - Step 3601: {'lr': 0.0004995346803755426, 'samples': 172848, 'steps': 3600, 'loss/train': 3.0707201957702637} +07/25/2024 11:42:15 - INFO - __main__ - Step 3602: {'lr': 0.0004995343595098009, 'samples': 172896, 'steps': 3601, 'loss/train': 1.4025685787200928} +07/25/2024 11:42:16 - INFO - __main__ - Step 3603: {'lr': 0.0004995340385335724, 'samples': 172944, 'steps': 3602, 'loss/train': 2.2566115856170654} +07/25/2024 11:42:16 - INFO - __main__ - Step 3604: {'lr': 0.0004995337174468572, 'samples': 172992, 'steps': 3603, 'loss/train': 2.8057656288146973} +07/25/2024 11:42:16 - INFO - __main__ - Step 3605: {'lr': 0.0004995333962496554, 'samples': 173040, 'steps': 3604, 'loss/train': 2.4660987854003906} +07/25/2024 11:42:16 - INFO - __main__ - Step 3606: {'lr': 0.0004995330749419673, 'samples': 173088, 'steps': 3605, 'loss/train': 2.018171787261963} +07/25/2024 11:42:17 - INFO - __main__ - Step 3607: {'lr': 0.0004995327535237929, 'samples': 173136, 'steps': 3606, 'loss/train': 2.204676628112793} +07/25/2024 11:42:17 - INFO - __main__ - Step 3608: {'lr': 0.0004995324319951324, 'samples': 173184, 'steps': 3607, 'loss/train': 0.9225022792816162} +07/25/2024 11:42:17 - INFO - __main__ - Step 3609: {'lr': 0.0004995321103559859, 'samples': 173232, 'steps': 3608, 'loss/train': 2.811715602874756} +07/25/2024 11:42:18 - INFO - __main__ - Step 3610: {'lr': 0.0004995317886063536, 'samples': 173280, 'steps': 3609, 'loss/train': 2.9247236251831055} +07/25/2024 11:42:18 - INFO - __main__ - Step 3611: {'lr': 0.0004995314667462356, 'samples': 173328, 'steps': 3610, 'loss/train': 3.0227601528167725} +07/25/2024 11:42:18 - INFO - __main__ - Step 3612: {'lr': 0.0004995311447756321, 'samples': 173376, 'steps': 3611, 'loss/train': 3.238783359527588} +07/25/2024 11:42:18 - INFO - __main__ - Step 3613: {'lr': 0.0004995308226945431, 'samples': 173424, 'steps': 3612, 'loss/train': 2.859452724456787} +07/25/2024 11:42:19 - INFO - __main__ - Step 3614: {'lr': 0.000499530500502969, 'samples': 173472, 'steps': 3613, 'loss/train': 2.5045082569122314} +07/25/2024 11:42:19 - INFO - __main__ - Step 3615: {'lr': 0.0004995301782009096, 'samples': 173520, 'steps': 3614, 'loss/train': 1.9211286306381226} +07/25/2024 11:42:19 - INFO - __main__ - Step 3616: {'lr': 0.0004995298557883654, 'samples': 173568, 'steps': 3615, 'loss/train': 2.808140993118286} +07/25/2024 11:42:20 - INFO - __main__ - Step 3617: {'lr': 0.0004995295332653363, 'samples': 173616, 'steps': 3616, 'loss/train': 2.3912460803985596} +07/25/2024 11:42:20 - INFO - __main__ - Step 3618: {'lr': 0.0004995292106318225, 'samples': 173664, 'steps': 3617, 'loss/train': 2.467527389526367} +07/25/2024 11:42:20 - INFO - __main__ - Step 3619: {'lr': 0.0004995288878878242, 'samples': 173712, 'steps': 3618, 'loss/train': 2.7377893924713135} +07/25/2024 11:42:20 - INFO - __main__ - Step 3620: {'lr': 0.0004995285650333415, 'samples': 173760, 'steps': 3619, 'loss/train': 2.1430413722991943} +07/25/2024 11:42:21 - INFO - __main__ - Step 3621: {'lr': 0.0004995282420683744, 'samples': 173808, 'steps': 3620, 'loss/train': 1.9854294061660767} +07/25/2024 11:42:21 - INFO - __main__ - Step 3622: {'lr': 0.0004995279189929234, 'samples': 173856, 'steps': 3621, 'loss/train': 2.783413887023926} +07/25/2024 11:42:21 - INFO - __main__ - Step 3623: {'lr': 0.0004995275958069883, 'samples': 173904, 'steps': 3622, 'loss/train': 2.830099105834961} +07/25/2024 11:42:22 - INFO - __main__ - Step 3624: {'lr': 0.0004995272725105694, 'samples': 173952, 'steps': 3623, 'loss/train': 2.2480545043945312} +07/25/2024 11:42:22 - INFO - __main__ - Step 3625: {'lr': 0.0004995269491036668, 'samples': 174000, 'steps': 3624, 'loss/train': 2.586876392364502} +07/25/2024 11:42:22 - INFO - __main__ - Step 3626: {'lr': 0.0004995266255862807, 'samples': 174048, 'steps': 3625, 'loss/train': 2.2680583000183105} +07/25/2024 11:42:22 - INFO - __main__ - Step 3627: {'lr': 0.0004995263019584112, 'samples': 174096, 'steps': 3626, 'loss/train': 3.159064292907715} +07/25/2024 11:42:23 - INFO - __main__ - Step 3628: {'lr': 0.0004995259782200584, 'samples': 174144, 'steps': 3627, 'loss/train': 2.5053908824920654} +07/25/2024 11:42:23 - INFO - __main__ - Step 3629: {'lr': 0.0004995256543712226, 'samples': 174192, 'steps': 3628, 'loss/train': 3.0215232372283936} +07/25/2024 11:42:23 - INFO - __main__ - Step 3630: {'lr': 0.0004995253304119036, 'samples': 174240, 'steps': 3629, 'loss/train': 1.5816352367401123} +07/25/2024 11:42:24 - INFO - __main__ - Step 3631: {'lr': 0.000499525006342102, 'samples': 174288, 'steps': 3630, 'loss/train': 3.51804780960083} +07/25/2024 11:42:24 - INFO - __main__ - Step 3632: {'lr': 0.0004995246821618176, 'samples': 174336, 'steps': 3631, 'loss/train': 0.8909224271774292} +07/25/2024 11:42:24 - INFO - __main__ - Step 3633: {'lr': 0.0004995243578710507, 'samples': 174384, 'steps': 3632, 'loss/train': 2.4148173332214355} +07/25/2024 11:42:24 - INFO - __main__ - Step 3634: {'lr': 0.0004995240334698014, 'samples': 174432, 'steps': 3633, 'loss/train': 2.750173807144165} +07/25/2024 11:42:25 - INFO - __main__ - Step 3635: {'lr': 0.0004995237089580699, 'samples': 174480, 'steps': 3634, 'loss/train': 1.7035974264144897} +07/25/2024 11:42:25 - INFO - __main__ - Step 3636: {'lr': 0.0004995233843358562, 'samples': 174528, 'steps': 3635, 'loss/train': 2.761680841445923} +07/25/2024 11:42:25 - INFO - __main__ - Step 3637: {'lr': 0.0004995230596031606, 'samples': 174576, 'steps': 3636, 'loss/train': 1.951760172843933} +07/25/2024 11:42:25 - INFO - __main__ - Step 3638: {'lr': 0.0004995227347599832, 'samples': 174624, 'steps': 3637, 'loss/train': 2.8749337196350098} +07/25/2024 11:42:26 - INFO - __main__ - Step 3639: {'lr': 0.000499522409806324, 'samples': 174672, 'steps': 3638, 'loss/train': 3.329972505569458} +07/25/2024 11:42:26 - INFO - __main__ - Step 3640: {'lr': 0.0004995220847421834, 'samples': 174720, 'steps': 3639, 'loss/train': 1.820510745048523} +07/25/2024 11:42:26 - INFO - __main__ - Step 3641: {'lr': 0.0004995217595675614, 'samples': 174768, 'steps': 3640, 'loss/train': 2.600139617919922} +07/25/2024 11:42:27 - INFO - __main__ - Step 3642: {'lr': 0.0004995214342824581, 'samples': 174816, 'steps': 3641, 'loss/train': 2.1182026863098145} +07/25/2024 11:42:27 - INFO - __main__ - Step 3643: {'lr': 0.0004995211088868738, 'samples': 174864, 'steps': 3642, 'loss/train': 1.6961487531661987} +07/25/2024 11:42:27 - INFO - __main__ - Step 3644: {'lr': 0.0004995207833808084, 'samples': 174912, 'steps': 3643, 'loss/train': 2.8284049034118652} +07/25/2024 11:42:27 - INFO - __main__ - Step 3645: {'lr': 0.0004995204577642623, 'samples': 174960, 'steps': 3644, 'loss/train': 3.03855299949646} +07/25/2024 11:42:28 - INFO - __main__ - Step 3646: {'lr': 0.0004995201320372355, 'samples': 175008, 'steps': 3645, 'loss/train': 2.7288105487823486} +07/25/2024 11:42:28 - INFO - __main__ - Step 3647: {'lr': 0.0004995198061997281, 'samples': 175056, 'steps': 3646, 'loss/train': 2.4680819511413574} +07/25/2024 11:42:28 - INFO - __main__ - Step 3648: {'lr': 0.0004995194802517404, 'samples': 175104, 'steps': 3647, 'loss/train': 2.3709187507629395} +07/25/2024 11:42:29 - INFO - __main__ - Step 3649: {'lr': 0.0004995191541932725, 'samples': 175152, 'steps': 3648, 'loss/train': 2.5988359451293945} +07/25/2024 11:42:29 - INFO - __main__ - Step 3650: {'lr': 0.0004995188280243245, 'samples': 175200, 'steps': 3649, 'loss/train': 2.8381590843200684} +07/25/2024 11:42:29 - INFO - __main__ - Step 3651: {'lr': 0.0004995185017448965, 'samples': 175248, 'steps': 3650, 'loss/train': 2.700979709625244} +07/25/2024 11:42:29 - INFO - __main__ - Step 3652: {'lr': 0.0004995181753549887, 'samples': 175296, 'steps': 3651, 'loss/train': 2.359575033187866} +07/25/2024 11:42:30 - INFO - __main__ - Step 3653: {'lr': 0.0004995178488546013, 'samples': 175344, 'steps': 3652, 'loss/train': 2.676621913909912} +07/25/2024 11:42:30 - INFO - __main__ - Step 3654: {'lr': 0.0004995175222437344, 'samples': 175392, 'steps': 3653, 'loss/train': 3.213270664215088} +07/25/2024 11:42:30 - INFO - __main__ - Step 3655: {'lr': 0.0004995171955223881, 'samples': 175440, 'steps': 3654, 'loss/train': 3.2517826557159424} +07/25/2024 11:42:31 - INFO - __main__ - Step 3656: {'lr': 0.0004995168686905625, 'samples': 175488, 'steps': 3655, 'loss/train': 0.8449878692626953} +07/25/2024 11:42:31 - INFO - __main__ - Step 3657: {'lr': 0.0004995165417482579, 'samples': 175536, 'steps': 3656, 'loss/train': 1.1757028102874756} +07/25/2024 11:42:31 - INFO - __main__ - Step 3658: {'lr': 0.0004995162146954744, 'samples': 175584, 'steps': 3657, 'loss/train': 2.6007487773895264} +07/25/2024 11:42:31 - INFO - __main__ - Step 3659: {'lr': 0.000499515887532212, 'samples': 175632, 'steps': 3658, 'loss/train': 1.7027084827423096} +07/25/2024 11:42:32 - INFO - __main__ - Step 3660: {'lr': 0.000499515560258471, 'samples': 175680, 'steps': 3659, 'loss/train': 2.711261034011841} +07/25/2024 11:42:32 - INFO - __main__ - Step 3661: {'lr': 0.0004995152328742515, 'samples': 175728, 'steps': 3660, 'loss/train': 3.0629751682281494} +07/25/2024 11:42:32 - INFO - __main__ - Step 3662: {'lr': 0.0004995149053795537, 'samples': 175776, 'steps': 3661, 'loss/train': 2.2052793502807617} +07/25/2024 11:42:33 - INFO - __main__ - Step 3663: {'lr': 0.0004995145777743777, 'samples': 175824, 'steps': 3662, 'loss/train': 2.946174144744873} +07/25/2024 11:42:33 - INFO - __main__ - Step 3664: {'lr': 0.0004995142500587236, 'samples': 175872, 'steps': 3663, 'loss/train': 1.4788475036621094} +07/25/2024 11:42:33 - INFO - __main__ - Step 3665: {'lr': 0.0004995139222325915, 'samples': 175920, 'steps': 3664, 'loss/train': 1.7974791526794434} +07/25/2024 11:42:33 - INFO - __main__ - Step 3666: {'lr': 0.0004995135942959817, 'samples': 175968, 'steps': 3665, 'loss/train': 2.3268470764160156} +07/25/2024 11:42:34 - INFO - __main__ - Step 3667: {'lr': 0.0004995132662488943, 'samples': 176016, 'steps': 3666, 'loss/train': 2.845412492752075} +07/25/2024 11:42:34 - INFO - __main__ - Step 3668: {'lr': 0.0004995129380913295, 'samples': 176064, 'steps': 3667, 'loss/train': 3.723599433898926} +07/25/2024 11:42:34 - INFO - __main__ - Step 3669: {'lr': 0.0004995126098232872, 'samples': 176112, 'steps': 3668, 'loss/train': 2.6188697814941406} +07/25/2024 11:42:35 - INFO - __main__ - Step 3670: {'lr': 0.0004995122814447677, 'samples': 176160, 'steps': 3669, 'loss/train': 2.4893319606781006} +07/25/2024 11:42:35 - INFO - __main__ - Step 3671: {'lr': 0.0004995119529557713, 'samples': 176208, 'steps': 3670, 'loss/train': 2.8276593685150146} +07/25/2024 11:42:35 - INFO - __main__ - Step 3672: {'lr': 0.0004995116243562978, 'samples': 176256, 'steps': 3671, 'loss/train': 2.663024425506592} +07/25/2024 11:42:35 - INFO - __main__ - Step 3673: {'lr': 0.0004995112956463477, 'samples': 176304, 'steps': 3672, 'loss/train': 2.3615071773529053} +07/25/2024 11:42:36 - INFO - __main__ - Step 3674: {'lr': 0.000499510966825921, 'samples': 176352, 'steps': 3673, 'loss/train': 2.5865321159362793} +07/25/2024 11:42:36 - INFO - __main__ - Step 3675: {'lr': 0.0004995106378950177, 'samples': 176400, 'steps': 3674, 'loss/train': 2.3117198944091797} +07/25/2024 11:42:36 - INFO - __main__ - Step 3676: {'lr': 0.0004995103088536382, 'samples': 176448, 'steps': 3675, 'loss/train': 2.1084201335906982} +07/25/2024 11:42:37 - INFO - __main__ - Step 3677: {'lr': 0.0004995099797017825, 'samples': 176496, 'steps': 3676, 'loss/train': 2.5702662467956543} +07/25/2024 11:42:37 - INFO - __main__ - Step 3678: {'lr': 0.0004995096504394508, 'samples': 176544, 'steps': 3677, 'loss/train': 3.047494649887085} +07/25/2024 11:42:37 - INFO - __main__ - Step 3679: {'lr': 0.0004995093210666432, 'samples': 176592, 'steps': 3678, 'loss/train': 2.016536235809326} +07/25/2024 11:42:37 - INFO - __main__ - Step 3680: {'lr': 0.0004995089915833598, 'samples': 176640, 'steps': 3679, 'loss/train': 0.9602656960487366} +07/25/2024 11:42:38 - INFO - __main__ - Step 3681: {'lr': 0.0004995086619896008, 'samples': 176688, 'steps': 3680, 'loss/train': 2.256436824798584} +07/25/2024 11:42:38 - INFO - __main__ - Step 3682: {'lr': 0.0004995083322853664, 'samples': 176736, 'steps': 3681, 'loss/train': 2.9081249237060547} +07/25/2024 11:42:38 - INFO - __main__ - Step 3683: {'lr': 0.0004995080024706568, 'samples': 176784, 'steps': 3682, 'loss/train': 2.380352020263672} +07/25/2024 11:42:39 - INFO - __main__ - Step 3684: {'lr': 0.000499507672545472, 'samples': 176832, 'steps': 3683, 'loss/train': 2.5686569213867188} +07/25/2024 11:42:39 - INFO - __main__ - Step 3685: {'lr': 0.000499507342509812, 'samples': 176880, 'steps': 3684, 'loss/train': 3.2072527408599854} +07/25/2024 11:42:39 - INFO - __main__ - Step 3686: {'lr': 0.0004995070123636773, 'samples': 176928, 'steps': 3685, 'loss/train': 2.0860509872436523} +07/25/2024 11:42:39 - INFO - __main__ - Step 3687: {'lr': 0.0004995066821070679, 'samples': 176976, 'steps': 3686, 'loss/train': 3.2232608795166016} +07/25/2024 11:42:40 - INFO - __main__ - Step 3688: {'lr': 0.000499506351739984, 'samples': 177024, 'steps': 3687, 'loss/train': 2.581434726715088} +07/25/2024 11:42:40 - INFO - __main__ - Step 3689: {'lr': 0.0004995060212624255, 'samples': 177072, 'steps': 3688, 'loss/train': 2.5333175659179688} +07/25/2024 11:42:40 - INFO - __main__ - Step 3690: {'lr': 0.0004995056906743928, 'samples': 177120, 'steps': 3689, 'loss/train': 2.679018020629883} +07/25/2024 11:42:41 - INFO - __main__ - Step 3691: {'lr': 0.0004995053599758859, 'samples': 177168, 'steps': 3690, 'loss/train': 2.824357509613037} +07/25/2024 11:42:41 - INFO - __main__ - Step 3692: {'lr': 0.0004995050291669052, 'samples': 177216, 'steps': 3691, 'loss/train': 3.8413496017456055} +07/25/2024 11:42:41 - INFO - __main__ - Step 3693: {'lr': 0.0004995046982474506, 'samples': 177264, 'steps': 3692, 'loss/train': 2.705901622772217} +07/25/2024 11:42:41 - INFO - __main__ - Step 3694: {'lr': 0.0004995043672175222, 'samples': 177312, 'steps': 3693, 'loss/train': 2.272002935409546} +07/25/2024 11:42:42 - INFO - __main__ - Step 3695: {'lr': 0.0004995040360771204, 'samples': 177360, 'steps': 3694, 'loss/train': 2.928483486175537} +07/25/2024 11:42:42 - INFO - __main__ - Step 3696: {'lr': 0.0004995037048262451, 'samples': 177408, 'steps': 3695, 'loss/train': 2.070612907409668} +07/25/2024 11:42:42 - INFO - __main__ - Step 3697: {'lr': 0.0004995033734648966, 'samples': 177456, 'steps': 3696, 'loss/train': 1.9715114831924438} +07/25/2024 11:42:43 - INFO - __main__ - Step 3698: {'lr': 0.0004995030419930749, 'samples': 177504, 'steps': 3697, 'loss/train': 2.5379059314727783} +07/25/2024 11:42:43 - INFO - __main__ - Step 3699: {'lr': 0.0004995027104107804, 'samples': 177552, 'steps': 3698, 'loss/train': 3.0400702953338623} +07/25/2024 11:42:43 - INFO - __main__ - Step 3700: {'lr': 0.000499502378718013, 'samples': 177600, 'steps': 3699, 'loss/train': 1.9070889949798584} +07/25/2024 11:42:43 - INFO - __main__ - Step 3701: {'lr': 0.0004995020469147729, 'samples': 177648, 'steps': 3700, 'loss/train': 2.699854850769043} +07/25/2024 11:42:44 - INFO - __main__ - Step 3702: {'lr': 0.0004995017150010605, 'samples': 177696, 'steps': 3701, 'loss/train': 3.1903164386749268} +07/25/2024 11:42:44 - INFO - __main__ - Step 3703: {'lr': 0.0004995013829768755, 'samples': 177744, 'steps': 3702, 'loss/train': 1.7550581693649292} +07/25/2024 11:42:44 - INFO - __main__ - Step 3704: {'lr': 0.0004995010508422183, 'samples': 177792, 'steps': 3703, 'loss/train': 0.9283621907234192} +07/25/2024 11:42:44 - INFO - __main__ - Step 3705: {'lr': 0.0004995007185970891, 'samples': 177840, 'steps': 3704, 'loss/train': 1.8892511129379272} +07/25/2024 11:42:45 - INFO - __main__ - Step 3706: {'lr': 0.000499500386241488, 'samples': 177888, 'steps': 3705, 'loss/train': 3.3891255855560303} +07/25/2024 11:42:45 - INFO - __main__ - Step 3707: {'lr': 0.0004995000537754151, 'samples': 177936, 'steps': 3706, 'loss/train': 2.543170213699341} +07/25/2024 11:42:45 - INFO - __main__ - Step 3708: {'lr': 0.0004994997211988704, 'samples': 177984, 'steps': 3707, 'loss/train': 2.189469575881958} +07/25/2024 11:42:46 - INFO - __main__ - Step 3709: {'lr': 0.0004994993885118543, 'samples': 178032, 'steps': 3708, 'loss/train': 2.539255380630493} +07/25/2024 11:42:46 - INFO - __main__ - Step 3710: {'lr': 0.0004994990557143671, 'samples': 178080, 'steps': 3709, 'loss/train': 2.237471103668213} +07/25/2024 11:42:46 - INFO - __main__ - Step 3711: {'lr': 0.0004994987228064085, 'samples': 178128, 'steps': 3710, 'loss/train': 2.98549485206604} +07/25/2024 11:42:46 - INFO - __main__ - Step 3712: {'lr': 0.0004994983897879788, 'samples': 178176, 'steps': 3711, 'loss/train': 2.7787725925445557} +07/25/2024 11:42:47 - INFO - __main__ - Step 3713: {'lr': 0.0004994980566590783, 'samples': 178224, 'steps': 3712, 'loss/train': 2.3669276237487793} +07/25/2024 11:42:47 - INFO - __main__ - Step 3714: {'lr': 0.0004994977234197071, 'samples': 178272, 'steps': 3713, 'loss/train': 2.473996877670288} +07/25/2024 11:42:47 - INFO - __main__ - Step 3715: {'lr': 0.0004994973900698652, 'samples': 178320, 'steps': 3714, 'loss/train': 2.8066959381103516} +07/25/2024 11:42:48 - INFO - __main__ - Step 3716: {'lr': 0.0004994970566095529, 'samples': 178368, 'steps': 3715, 'loss/train': 3.6934242248535156} +07/25/2024 11:42:48 - INFO - __main__ - Step 3717: {'lr': 0.0004994967230387702, 'samples': 178416, 'steps': 3716, 'loss/train': 2.6651713848114014} +07/25/2024 11:42:48 - INFO - __main__ - Step 3718: {'lr': 0.0004994963893575175, 'samples': 178464, 'steps': 3717, 'loss/train': 2.4312994480133057} +07/25/2024 11:42:48 - INFO - __main__ - Step 3719: {'lr': 0.0004994960555657947, 'samples': 178512, 'steps': 3718, 'loss/train': 3.061842203140259} +07/25/2024 11:42:49 - INFO - __main__ - Step 3720: {'lr': 0.000499495721663602, 'samples': 178560, 'steps': 3719, 'loss/train': 3.00909423828125} +07/25/2024 11:42:49 - INFO - __main__ - Step 3721: {'lr': 0.0004994953876509396, 'samples': 178608, 'steps': 3720, 'loss/train': 2.7723655700683594} +07/25/2024 11:42:49 - INFO - __main__ - Step 3722: {'lr': 0.0004994950535278077, 'samples': 178656, 'steps': 3721, 'loss/train': 2.2235770225524902} +07/25/2024 11:42:50 - INFO - __main__ - Step 3723: {'lr': 0.0004994947192942063, 'samples': 178704, 'steps': 3722, 'loss/train': 2.4680092334747314} +07/25/2024 11:42:50 - INFO - __main__ - Step 3724: {'lr': 0.0004994943849501356, 'samples': 178752, 'steps': 3723, 'loss/train': 2.5461549758911133} +07/25/2024 11:42:50 - INFO - __main__ - Step 3725: {'lr': 0.0004994940504955959, 'samples': 178800, 'steps': 3724, 'loss/train': 2.1660044193267822} +07/25/2024 11:42:50 - INFO - __main__ - Step 3726: {'lr': 0.0004994937159305872, 'samples': 178848, 'steps': 3725, 'loss/train': 3.2631630897521973} +07/25/2024 11:42:51 - INFO - __main__ - Step 3727: {'lr': 0.0004994933812551097, 'samples': 178896, 'steps': 3726, 'loss/train': 1.9466984272003174} +07/25/2024 11:42:51 - INFO - __main__ - Step 3728: {'lr': 0.0004994930464691634, 'samples': 178944, 'steps': 3727, 'loss/train': 0.8906803727149963} +07/25/2024 11:42:51 - INFO - __main__ - Step 3729: {'lr': 0.0004994927115727487, 'samples': 178992, 'steps': 3728, 'loss/train': 1.9187735319137573} +07/25/2024 11:42:52 - INFO - __main__ - Step 3730: {'lr': 0.0004994923765658655, 'samples': 179040, 'steps': 3729, 'loss/train': 3.2525177001953125} +07/25/2024 11:42:52 - INFO - __main__ - Step 3731: {'lr': 0.0004994920414485141, 'samples': 179088, 'steps': 3730, 'loss/train': 2.483705759048462} +07/25/2024 11:42:52 - INFO - __main__ - Step 3732: {'lr': 0.0004994917062206947, 'samples': 179136, 'steps': 3731, 'loss/train': 2.373425245285034} +07/25/2024 11:42:52 - INFO - __main__ - Step 3733: {'lr': 0.0004994913708824073, 'samples': 179184, 'steps': 3732, 'loss/train': 2.4704813957214355} +07/25/2024 11:42:53 - INFO - __main__ - Step 3734: {'lr': 0.0004994910354336521, 'samples': 179232, 'steps': 3733, 'loss/train': 2.151580333709717} +07/25/2024 11:42:53 - INFO - __main__ - Step 3735: {'lr': 0.0004994906998744292, 'samples': 179280, 'steps': 3734, 'loss/train': 2.7548487186431885} +07/25/2024 11:42:53 - INFO - __main__ - Step 3736: {'lr': 0.000499490364204739, 'samples': 179328, 'steps': 3735, 'loss/train': 2.47170090675354} +07/25/2024 11:42:54 - INFO - __main__ - Step 3737: {'lr': 0.0004994900284245813, 'samples': 179376, 'steps': 3736, 'loss/train': 1.1666885614395142} +07/25/2024 11:42:54 - INFO - __main__ - Step 3738: {'lr': 0.0004994896925339564, 'samples': 179424, 'steps': 3737, 'loss/train': 2.586296319961548} +07/25/2024 11:42:54 - INFO - __main__ - Step 3739: {'lr': 0.0004994893565328645, 'samples': 179472, 'steps': 3738, 'loss/train': 2.6084001064300537} +07/25/2024 11:42:54 - INFO - __main__ - Step 3740: {'lr': 0.0004994890204213057, 'samples': 179520, 'steps': 3739, 'loss/train': 3.6742072105407715} +07/25/2024 11:42:55 - INFO - __main__ - Step 3741: {'lr': 0.0004994886841992802, 'samples': 179568, 'steps': 3740, 'loss/train': 3.029897928237915} +07/25/2024 11:42:55 - INFO - __main__ - Step 3742: {'lr': 0.0004994883478667881, 'samples': 179616, 'steps': 3741, 'loss/train': 1.9914661645889282} +07/25/2024 11:42:55 - INFO - __main__ - Step 3743: {'lr': 0.0004994880114238294, 'samples': 179664, 'steps': 3742, 'loss/train': 2.33762788772583} +07/25/2024 11:42:56 - INFO - __main__ - Step 3744: {'lr': 0.0004994876748704045, 'samples': 179712, 'steps': 3743, 'loss/train': 2.7388718128204346} +07/25/2024 11:42:56 - INFO - __main__ - Step 3745: {'lr': 0.0004994873382065135, 'samples': 179760, 'steps': 3744, 'loss/train': 2.3338639736175537} +07/25/2024 11:42:56 - INFO - __main__ - Step 3746: {'lr': 0.0004994870014321564, 'samples': 179808, 'steps': 3745, 'loss/train': 2.871382713317871} +07/25/2024 11:42:56 - INFO - __main__ - Step 3747: {'lr': 0.0004994866645473334, 'samples': 179856, 'steps': 3746, 'loss/train': 2.6398983001708984} +07/25/2024 11:42:57 - INFO - __main__ - Step 3748: {'lr': 0.0004994863275520449, 'samples': 179904, 'steps': 3747, 'loss/train': 2.507105827331543} +07/25/2024 11:42:57 - INFO - __main__ - Step 3749: {'lr': 0.0004994859904462908, 'samples': 179952, 'steps': 3748, 'loss/train': 2.504281520843506} +07/25/2024 11:42:57 - INFO - __main__ - Step 3750: {'lr': 0.0004994856532300712, 'samples': 180000, 'steps': 3749, 'loss/train': 1.3494024276733398} +07/25/2024 11:42:58 - INFO - __main__ - Step 3751: {'lr': 0.0004994853159033864, 'samples': 180048, 'steps': 3750, 'loss/train': 1.7235363721847534} +07/25/2024 11:42:58 - INFO - __main__ - Step 3752: {'lr': 0.0004994849784662364, 'samples': 180096, 'steps': 3751, 'loss/train': 0.8500077724456787} +07/25/2024 11:42:58 - INFO - __main__ - Step 3753: {'lr': 0.0004994846409186216, 'samples': 180144, 'steps': 3752, 'loss/train': 1.8901249170303345} +07/25/2024 11:42:58 - INFO - __main__ - Step 3754: {'lr': 0.0004994843032605418, 'samples': 180192, 'steps': 3753, 'loss/train': 2.29056715965271} +07/25/2024 11:42:59 - INFO - __main__ - Step 3755: {'lr': 0.0004994839654919974, 'samples': 180240, 'steps': 3754, 'loss/train': 2.6281254291534424} +07/25/2024 11:42:59 - INFO - __main__ - Step 3756: {'lr': 0.0004994836276129886, 'samples': 180288, 'steps': 3755, 'loss/train': 2.1428380012512207} +07/25/2024 11:42:59 - INFO - __main__ - Step 3757: {'lr': 0.0004994832896235154, 'samples': 180336, 'steps': 3756, 'loss/train': 2.716488838195801} +07/25/2024 11:43:00 - INFO - __main__ - Step 3758: {'lr': 0.0004994829515235779, 'samples': 180384, 'steps': 3757, 'loss/train': 2.453198194503784} +07/25/2024 11:43:00 - INFO - __main__ - Step 3759: {'lr': 0.0004994826133131765, 'samples': 180432, 'steps': 3758, 'loss/train': 3.487170696258545} +07/25/2024 11:43:00 - INFO - __main__ - Step 3760: {'lr': 0.000499482274992311, 'samples': 180480, 'steps': 3759, 'loss/train': 2.8966946601867676} +07/25/2024 11:43:00 - INFO - __main__ - Step 3761: {'lr': 0.0004994819365609818, 'samples': 180528, 'steps': 3760, 'loss/train': 2.273653030395508} +07/25/2024 11:43:01 - INFO - __main__ - Step 3762: {'lr': 0.000499481598019189, 'samples': 180576, 'steps': 3761, 'loss/train': 2.4379756450653076} +07/25/2024 11:43:01 - INFO - __main__ - Step 3763: {'lr': 0.0004994812593669328, 'samples': 180624, 'steps': 3762, 'loss/train': 2.4156064987182617} +07/25/2024 11:43:01 - INFO - __main__ - Step 3764: {'lr': 0.0004994809206042133, 'samples': 180672, 'steps': 3763, 'loss/train': 3.5076045989990234} +07/25/2024 11:43:02 - INFO - __main__ - Step 3765: {'lr': 0.0004994805817310306, 'samples': 180720, 'steps': 3764, 'loss/train': 2.6697189807891846} +07/25/2024 11:43:02 - INFO - __main__ - Step 3766: {'lr': 0.0004994802427473848, 'samples': 180768, 'steps': 3765, 'loss/train': 2.3735830783843994} +07/25/2024 11:43:02 - INFO - __main__ - Step 3767: {'lr': 0.0004994799036532763, 'samples': 180816, 'steps': 3766, 'loss/train': 2.2191078662872314} +07/25/2024 11:43:02 - INFO - __main__ - Step 3768: {'lr': 0.0004994795644487049, 'samples': 180864, 'steps': 3767, 'loss/train': 2.660961389541626} +07/25/2024 11:43:03 - INFO - __main__ - Step 3769: {'lr': 0.0004994792251336711, 'samples': 180912, 'steps': 3768, 'loss/train': 2.397897958755493} +07/25/2024 11:43:03 - INFO - __main__ - Step 3770: {'lr': 0.0004994788857081748, 'samples': 180960, 'steps': 3769, 'loss/train': 2.7997019290924072} +07/25/2024 11:43:03 - INFO - __main__ - Step 3771: {'lr': 0.0004994785461722163, 'samples': 181008, 'steps': 3770, 'loss/train': 2.054739475250244} +07/25/2024 11:43:04 - INFO - __main__ - Step 3772: {'lr': 0.0004994782065257956, 'samples': 181056, 'steps': 3771, 'loss/train': 2.6474106311798096} +07/25/2024 11:43:04 - INFO - __main__ - Step 3773: {'lr': 0.0004994778667689131, 'samples': 181104, 'steps': 3772, 'loss/train': 2.7278635501861572} +07/25/2024 11:43:04 - INFO - __main__ - Step 3774: {'lr': 0.0004994775269015686, 'samples': 181152, 'steps': 3773, 'loss/train': 2.0034937858581543} +07/25/2024 11:43:04 - INFO - __main__ - Step 3775: {'lr': 0.0004994771869237625, 'samples': 181200, 'steps': 3774, 'loss/train': 1.6932710409164429} +07/25/2024 11:43:05 - INFO - __main__ - Step 3776: {'lr': 0.000499476846835495, 'samples': 181248, 'steps': 3775, 'loss/train': 0.8805445432662964} +07/25/2024 11:43:05 - INFO - __main__ - Step 3777: {'lr': 0.0004994765066367661, 'samples': 181296, 'steps': 3776, 'loss/train': 1.7947285175323486} +07/25/2024 11:43:05 - INFO - __main__ - Step 3778: {'lr': 0.0004994761663275758, 'samples': 181344, 'steps': 3777, 'loss/train': 3.093369483947754} +07/25/2024 11:43:05 - INFO - __main__ - Step 3779: {'lr': 0.0004994758259079247, 'samples': 181392, 'steps': 3778, 'loss/train': 2.4701616764068604} +07/25/2024 11:43:06 - INFO - __main__ - Step 3780: {'lr': 0.0004994754853778126, 'samples': 181440, 'steps': 3779, 'loss/train': 2.651665210723877} +07/25/2024 11:43:06 - INFO - __main__ - Step 3781: {'lr': 0.0004994751447372398, 'samples': 181488, 'steps': 3780, 'loss/train': 2.5985023975372314} +07/25/2024 11:43:06 - INFO - __main__ - Step 3782: {'lr': 0.0004994748039862062, 'samples': 181536, 'steps': 3781, 'loss/train': 2.3103561401367188} +07/25/2024 11:43:07 - INFO - __main__ - Step 3783: {'lr': 0.0004994744631247124, 'samples': 181584, 'steps': 3782, 'loss/train': 2.321051597595215} +07/25/2024 11:43:07 - INFO - __main__ - Step 3784: {'lr': 0.0004994741221527581, 'samples': 181632, 'steps': 3783, 'loss/train': 2.6781575679779053} +07/25/2024 11:43:07 - INFO - __main__ - Step 3785: {'lr': 0.0004994737810703438, 'samples': 181680, 'steps': 3784, 'loss/train': 2.458789587020874} +07/25/2024 11:43:07 - INFO - __main__ - Step 3786: {'lr': 0.0004994734398774694, 'samples': 181728, 'steps': 3785, 'loss/train': 2.6121528148651123} +07/25/2024 11:43:08 - INFO - __main__ - Step 3787: {'lr': 0.0004994730985741352, 'samples': 181776, 'steps': 3786, 'loss/train': 1.5266438722610474} +07/25/2024 11:43:08 - INFO - __main__ - Step 3788: {'lr': 0.0004994727571603412, 'samples': 181824, 'steps': 3787, 'loss/train': 3.4185471534729004} +07/25/2024 11:43:08 - INFO - __main__ - Step 3789: {'lr': 0.0004994724156360878, 'samples': 181872, 'steps': 3788, 'loss/train': 2.596578598022461} +07/25/2024 11:43:09 - INFO - __main__ - Step 3790: {'lr': 0.000499472074001375, 'samples': 181920, 'steps': 3789, 'loss/train': 3.7350711822509766} +07/25/2024 11:43:09 - INFO - __main__ - Step 3791: {'lr': 0.0004994717322562029, 'samples': 181968, 'steps': 3790, 'loss/train': 2.042241096496582} +07/25/2024 11:43:09 - INFO - __main__ - Step 3792: {'lr': 0.0004994713904005717, 'samples': 182016, 'steps': 3791, 'loss/train': 2.2401976585388184} +07/25/2024 11:43:09 - INFO - __main__ - Step 3793: {'lr': 0.0004994710484344815, 'samples': 182064, 'steps': 3792, 'loss/train': 2.455958366394043} +07/25/2024 11:43:10 - INFO - __main__ - Step 3794: {'lr': 0.0004994707063579326, 'samples': 182112, 'steps': 3793, 'loss/train': 2.8223414421081543} +07/25/2024 11:43:10 - INFO - __main__ - Step 3795: {'lr': 0.0004994703641709251, 'samples': 182160, 'steps': 3794, 'loss/train': 2.946564197540283} +07/25/2024 11:43:10 - INFO - __main__ - Step 3796: {'lr': 0.000499470021873459, 'samples': 182208, 'steps': 3795, 'loss/train': 3.002896308898926} +07/25/2024 11:43:11 - INFO - __main__ - Step 3797: {'lr': 0.0004994696794655346, 'samples': 182256, 'steps': 3796, 'loss/train': 2.7590718269348145} +07/25/2024 11:43:11 - INFO - __main__ - Step 3798: {'lr': 0.000499469336947152, 'samples': 182304, 'steps': 3797, 'loss/train': 2.369051218032837} +07/25/2024 11:43:11 - INFO - __main__ - Step 3799: {'lr': 0.0004994689943183114, 'samples': 182352, 'steps': 3798, 'loss/train': 1.7553220987319946} +07/25/2024 11:43:11 - INFO - __main__ - Step 3800: {'lr': 0.000499468651579013, 'samples': 182400, 'steps': 3799, 'loss/train': 0.8608822226524353} +07/25/2024 11:43:12 - INFO - __main__ - Step 3801: {'lr': 0.0004994683087292568, 'samples': 182448, 'steps': 3800, 'loss/train': 1.3394461870193481} +07/25/2024 11:43:12 - INFO - __main__ - Step 3802: {'lr': 0.0004994679657690429, 'samples': 182496, 'steps': 3801, 'loss/train': 2.4995946884155273} +07/25/2024 11:43:12 - INFO - __main__ - Step 3803: {'lr': 0.0004994676226983717, 'samples': 182544, 'steps': 3802, 'loss/train': 2.2567849159240723} +07/25/2024 11:43:13 - INFO - __main__ - Step 3804: {'lr': 0.0004994672795172432, 'samples': 182592, 'steps': 3803, 'loss/train': 2.6736695766448975} +07/25/2024 11:43:13 - DEBUG - datasets.packaged_modules.json.json - Batch of 10489514 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:43:13 - INFO - __main__ - Step 3805: {'lr': 0.0004994669362256576, 'samples': 182640, 'steps': 3804, 'loss/train': 2.476327419281006} +07/25/2024 11:43:13 - INFO - __main__ - Step 3806: {'lr': 0.000499466592823615, 'samples': 182688, 'steps': 3805, 'loss/train': 2.824322462081909} +07/25/2024 11:43:13 - INFO - __main__ - Step 3807: {'lr': 0.0004994662493111156, 'samples': 182736, 'steps': 3806, 'loss/train': 2.598965883255005} +07/25/2024 11:43:14 - INFO - __main__ - Step 3808: {'lr': 0.0004994659056881596, 'samples': 182784, 'steps': 3807, 'loss/train': 2.702453851699829} +07/25/2024 11:43:14 - INFO - __main__ - Step 3809: {'lr': 0.000499465561954747, 'samples': 182832, 'steps': 3808, 'loss/train': 2.830897092819214} +07/25/2024 11:43:14 - INFO - __main__ - Step 3810: {'lr': 0.000499465218110878, 'samples': 182880, 'steps': 3809, 'loss/train': 2.781473398208618} +07/25/2024 11:43:15 - INFO - __main__ - Step 3811: {'lr': 0.0004994648741565529, 'samples': 182928, 'steps': 3810, 'loss/train': 1.604185938835144} +07/25/2024 11:43:15 - INFO - __main__ - Step 3812: {'lr': 0.0004994645300917717, 'samples': 182976, 'steps': 3811, 'loss/train': 3.631564140319824} +07/25/2024 11:43:15 - INFO - __main__ - Step 3813: {'lr': 0.0004994641859165347, 'samples': 183024, 'steps': 3812, 'loss/train': 2.050452709197998} +07/25/2024 11:43:15 - INFO - __main__ - Step 3814: {'lr': 0.0004994638416308417, 'samples': 183072, 'steps': 3813, 'loss/train': 3.434537172317505} +07/25/2024 11:43:16 - INFO - __main__ - Step 3815: {'lr': 0.0004994634972346933, 'samples': 183120, 'steps': 3814, 'loss/train': 2.602476119995117} +07/25/2024 11:43:16 - INFO - __main__ - Step 3816: {'lr': 0.0004994631527280894, 'samples': 183168, 'steps': 3815, 'loss/train': 2.724132776260376} +07/25/2024 11:43:16 - INFO - __main__ - Step 3817: {'lr': 0.0004994628081110301, 'samples': 183216, 'steps': 3816, 'loss/train': 2.1793620586395264} +07/25/2024 11:43:16 - DEBUG - datasets.packaged_modules.json.json - Batch of 10533372 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:43:17 - INFO - __main__ - Step 3818: {'lr': 0.0004994624633835159, 'samples': 183264, 'steps': 3817, 'loss/train': 3.0105841159820557} +07/25/2024 11:43:17 - INFO - __main__ - Step 3819: {'lr': 0.0004994621185455465, 'samples': 183312, 'steps': 3818, 'loss/train': 2.096404552459717} +07/25/2024 11:43:17 - INFO - __main__ - Step 3820: {'lr': 0.0004994617735971223, 'samples': 183360, 'steps': 3819, 'loss/train': 2.4151721000671387} +07/25/2024 11:43:17 - INFO - __main__ - Step 3821: {'lr': 0.0004994614285382435, 'samples': 183408, 'steps': 3820, 'loss/train': 2.970036268234253} +07/25/2024 11:43:17 - DEBUG - datasets.packaged_modules.json.json - Batch of 10486259 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:43:18 - INFO - __main__ - Step 3822: {'lr': 0.0004994610833689102, 'samples': 183456, 'steps': 3821, 'loss/train': 2.117595672607422} +07/25/2024 11:43:18 - INFO - __main__ - Step 3823: {'lr': 0.0004994607380891224, 'samples': 183504, 'steps': 3822, 'loss/train': 1.6835442781448364} +07/25/2024 11:43:18 - INFO - __main__ - Step 3824: {'lr': 0.0004994603926988804, 'samples': 183552, 'steps': 3823, 'loss/train': 0.9540035724639893} +07/25/2024 11:43:19 - INFO - __main__ - Step 3825: {'lr': 0.0004994600471981843, 'samples': 183600, 'steps': 3824, 'loss/train': 2.3868017196655273} +07/25/2024 11:43:19 - INFO - __main__ - Step 3826: {'lr': 0.0004994597015870344, 'samples': 183648, 'steps': 3825, 'loss/train': 2.346362829208374} +07/25/2024 11:43:19 - INFO - __main__ - Step 3827: {'lr': 0.0004994593558654307, 'samples': 183696, 'steps': 3826, 'loss/train': 2.180529832839966} +07/25/2024 11:43:19 - DEBUG - datasets.packaged_modules.json.json - Batch of 10487440 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:43:19 - INFO - __main__ - Step 3828: {'lr': 0.0004994590100333733, 'samples': 183744, 'steps': 3827, 'loss/train': 2.5777034759521484} +07/25/2024 11:43:20 - INFO - __main__ - Step 3829: {'lr': 0.0004994586640908625, 'samples': 183792, 'steps': 3828, 'loss/train': 2.0151631832122803} +07/25/2024 11:43:20 - INFO - __main__ - Step 3830: {'lr': 0.0004994583180378984, 'samples': 183840, 'steps': 3829, 'loss/train': 2.3433876037597656} +07/25/2024 11:43:20 - INFO - __main__ - Step 3831: {'lr': 0.0004994579718744811, 'samples': 183888, 'steps': 3830, 'loss/train': 2.7826220989227295} +07/25/2024 11:43:21 - INFO - __main__ - Step 3832: {'lr': 0.0004994576256006108, 'samples': 183936, 'steps': 3831, 'loss/train': 2.416983127593994} +07/25/2024 11:43:21 - INFO - __main__ - Step 3833: {'lr': 0.0004994572792162877, 'samples': 183984, 'steps': 3832, 'loss/train': 2.1882975101470947} +07/25/2024 11:43:21 - INFO - __main__ - Step 3834: {'lr': 0.0004994569327215119, 'samples': 184032, 'steps': 3833, 'loss/train': 3.004965305328369} +07/25/2024 11:43:21 - INFO - __main__ - Step 3835: {'lr': 0.0004994565861162836, 'samples': 184080, 'steps': 3834, 'loss/train': 2.499955415725708} +07/25/2024 11:43:22 - INFO - __main__ - Step 3836: {'lr': 0.0004994562394006028, 'samples': 184128, 'steps': 3835, 'loss/train': 3.665646553039551} +07/25/2024 11:43:22 - INFO - __main__ - Step 3837: {'lr': 0.0004994558925744697, 'samples': 184176, 'steps': 3836, 'loss/train': 2.307673692703247} +07/25/2024 11:43:22 - INFO - __main__ - Step 3838: {'lr': 0.0004994555456378848, 'samples': 184224, 'steps': 3837, 'loss/train': 2.9966955184936523} +07/25/2024 11:43:23 - INFO - __main__ - Step 3839: {'lr': 0.0004994551985908478, 'samples': 184272, 'steps': 3838, 'loss/train': 2.318330764770508} +07/25/2024 11:43:23 - INFO - __main__ - Step 3840: {'lr': 0.000499454851433359, 'samples': 184320, 'steps': 3839, 'loss/train': 2.5954270362854004} +07/25/2024 11:43:23 - INFO - __main__ - Step 3841: {'lr': 0.0004994545041654187, 'samples': 184368, 'steps': 3840, 'loss/train': 2.6581928730010986} +07/25/2024 11:43:23 - INFO - __main__ - Step 3842: {'lr': 0.0004994541567870268, 'samples': 184416, 'steps': 3841, 'loss/train': 2.6773900985717773} +07/25/2024 11:43:24 - DEBUG - datasets.packaged_modules.json.json - Batch of 10496408 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:43:24 - INFO - __main__ - Step 3843: {'lr': 0.0004994538092981837, 'samples': 184464, 'steps': 3842, 'loss/train': 2.580338478088379} +07/25/2024 11:43:24 - INFO - __main__ - Step 3844: {'lr': 0.0004994534616988894, 'samples': 184512, 'steps': 3843, 'loss/train': 3.397965669631958} +07/25/2024 11:43:24 - INFO - __main__ - Step 3845: {'lr': 0.000499453113989144, 'samples': 184560, 'steps': 3844, 'loss/train': 2.513498067855835} +07/25/2024 11:43:25 - INFO - __main__ - Step 3846: {'lr': 0.0004994527661689479, 'samples': 184608, 'steps': 3845, 'loss/train': 2.486363172531128} +07/25/2024 11:43:25 - INFO - __main__ - Step 3847: {'lr': 0.000499452418238301, 'samples': 184656, 'steps': 3846, 'loss/train': 1.573664903640747} +07/25/2024 11:43:25 - INFO - __main__ - Step 3848: {'lr': 0.0004994520701972037, 'samples': 184704, 'steps': 3847, 'loss/train': 0.9282979965209961} +07/25/2024 11:43:25 - INFO - __main__ - Step 3849: {'lr': 0.0004994517220456559, 'samples': 184752, 'steps': 3848, 'loss/train': 2.571040630340576} +07/25/2024 11:43:26 - INFO - __main__ - Step 3850: {'lr': 0.0004994513737836579, 'samples': 184800, 'steps': 3849, 'loss/train': 2.3205318450927734} +07/25/2024 11:43:26 - INFO - __main__ - Step 3851: {'lr': 0.0004994510254112097, 'samples': 184848, 'steps': 3850, 'loss/train': 2.4992613792419434} +07/25/2024 11:43:26 - INFO - __main__ - Step 3852: {'lr': 0.0004994506769283118, 'samples': 184896, 'steps': 3851, 'loss/train': 2.754502773284912} +07/25/2024 11:43:27 - INFO - __main__ - Step 3853: {'lr': 0.0004994503283349641, 'samples': 184944, 'steps': 3852, 'loss/train': 2.4970035552978516} +07/25/2024 11:43:27 - INFO - __main__ - Step 3854: {'lr': 0.0004994499796311667, 'samples': 184992, 'steps': 3853, 'loss/train': 1.9282610416412354} +07/25/2024 11:43:27 - INFO - __main__ - Step 3855: {'lr': 0.0004994496308169198, 'samples': 185040, 'steps': 3854, 'loss/train': 2.6228580474853516} +07/25/2024 11:43:27 - DEBUG - datasets.packaged_modules.json.json - Batch of 10487177 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:43:27 - INFO - __main__ - Step 3856: {'lr': 0.0004994492818922237, 'samples': 185088, 'steps': 3855, 'loss/train': 2.3294079303741455} +07/25/2024 11:43:28 - INFO - __main__ - Step 3857: {'lr': 0.0004994489328570783, 'samples': 185136, 'steps': 3856, 'loss/train': 1.995585560798645} +07/25/2024 11:43:28 - INFO - __main__ - Step 3858: {'lr': 0.000499448583711484, 'samples': 185184, 'steps': 3857, 'loss/train': 2.699307918548584} +07/25/2024 11:43:28 - INFO - __main__ - Step 3859: {'lr': 0.0004994482344554407, 'samples': 185232, 'steps': 3858, 'loss/train': 2.1374547481536865} +07/25/2024 11:43:29 - INFO - __main__ - Step 3860: {'lr': 0.0004994478850889489, 'samples': 185280, 'steps': 3859, 'loss/train': 2.665416717529297} +07/25/2024 11:43:29 - INFO - __main__ - Step 3861: {'lr': 0.0004994475356120085, 'samples': 185328, 'steps': 3860, 'loss/train': 1.3095626831054688} +07/25/2024 11:43:29 - INFO - __main__ - Step 3862: {'lr': 0.0004994471860246198, 'samples': 185376, 'steps': 3861, 'loss/train': 2.661999225616455} +07/25/2024 11:43:29 - INFO - __main__ - Step 3863: {'lr': 0.0004994468363267827, 'samples': 185424, 'steps': 3862, 'loss/train': 3.04203200340271} +07/25/2024 11:43:30 - INFO - __main__ - Step 3864: {'lr': 0.0004994464865184978, 'samples': 185472, 'steps': 3863, 'loss/train': 2.4488890171051025} +07/25/2024 11:43:30 - INFO - __main__ - Step 3865: {'lr': 0.0004994461365997648, 'samples': 185520, 'steps': 3864, 'loss/train': 2.7782411575317383} +07/25/2024 11:43:30 - INFO - __main__ - Step 3866: {'lr': 0.000499445786570584, 'samples': 185568, 'steps': 3865, 'loss/train': 2.1791481971740723} +07/25/2024 11:43:31 - INFO - __main__ - Step 3867: {'lr': 0.0004994454364309557, 'samples': 185616, 'steps': 3866, 'loss/train': 2.395064353942871} +07/25/2024 11:43:31 - INFO - __main__ - Step 3868: {'lr': 0.0004994450861808799, 'samples': 185664, 'steps': 3867, 'loss/train': 2.7214365005493164} +07/25/2024 11:43:31 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488584 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:43:31 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488584 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:43:31 - INFO - __main__ - Step 3869: {'lr': 0.0004994447358203567, 'samples': 185712, 'steps': 3868, 'loss/train': 2.7145936489105225} +07/25/2024 11:43:31 - INFO - __main__ - Step 3870: {'lr': 0.0004994443853493866, 'samples': 185760, 'steps': 3869, 'loss/train': 2.035419464111328} +07/25/2024 11:43:32 - INFO - __main__ - Step 3871: {'lr': 0.0004994440347679694, 'samples': 185808, 'steps': 3870, 'loss/train': 2.389688730239868} +07/25/2024 11:43:32 - INFO - __main__ - Step 3872: {'lr': 0.0004994436840761052, 'samples': 185856, 'steps': 3871, 'loss/train': 0.7634067535400391} +07/25/2024 11:43:32 - INFO - __main__ - Step 3873: {'lr': 0.0004994433332737947, 'samples': 185904, 'steps': 3872, 'loss/train': 2.7999484539031982} +07/25/2024 11:43:33 - INFO - __main__ - Step 3874: {'lr': 0.0004994429823610374, 'samples': 185952, 'steps': 3873, 'loss/train': 2.33988356590271} +07/25/2024 11:43:33 - INFO - __main__ - Step 3875: {'lr': 0.0004994426313378338, 'samples': 186000, 'steps': 3874, 'loss/train': 2.2646918296813965} +07/25/2024 11:43:33 - INFO - __main__ - Step 3876: {'lr': 0.0004994422802041841, 'samples': 186048, 'steps': 3875, 'loss/train': 2.698319435119629} +07/25/2024 11:43:33 - INFO - __main__ - Step 3877: {'lr': 0.0004994419289600881, 'samples': 186096, 'steps': 3876, 'loss/train': 2.7667901515960693} +07/25/2024 11:43:34 - INFO - __main__ - Step 3878: {'lr': 0.0004994415776055464, 'samples': 186144, 'steps': 3877, 'loss/train': 2.59255313873291} +07/25/2024 11:43:34 - INFO - __main__ - Step 3879: {'lr': 0.0004994412261405589, 'samples': 186192, 'steps': 3878, 'loss/train': 2.281388521194458} +07/25/2024 11:43:34 - INFO - __main__ - Step 3880: {'lr': 0.0004994408745651258, 'samples': 186240, 'steps': 3879, 'loss/train': 2.2507643699645996} +07/25/2024 11:43:35 - INFO - __main__ - Step 3881: {'lr': 0.0004994405228792474, 'samples': 186288, 'steps': 3880, 'loss/train': 2.7327980995178223} +07/25/2024 11:43:35 - INFO - __main__ - Step 3882: {'lr': 0.0004994401710829235, 'samples': 186336, 'steps': 3881, 'loss/train': 3.1189799308776855} +07/25/2024 11:43:35 - INFO - __main__ - Step 3883: {'lr': 0.0004994398191761547, 'samples': 186384, 'steps': 3882, 'loss/train': 2.0002689361572266} +07/25/2024 11:43:35 - INFO - __main__ - Step 3884: {'lr': 0.0004994394671589408, 'samples': 186432, 'steps': 3883, 'loss/train': 2.678487777709961} +07/25/2024 11:43:36 - INFO - __main__ - Step 3885: {'lr': 0.0004994391150312821, 'samples': 186480, 'steps': 3884, 'loss/train': 2.775843858718872} +07/25/2024 11:43:36 - INFO - __main__ - Step 3886: {'lr': 0.0004994387627931788, 'samples': 186528, 'steps': 3885, 'loss/train': 3.1289730072021484} +07/25/2024 11:43:36 - INFO - __main__ - Step 3887: {'lr': 0.000499438410444631, 'samples': 186576, 'steps': 3886, 'loss/train': 2.342472553253174} +07/25/2024 11:43:37 - INFO - __main__ - Step 3888: {'lr': 0.0004994380579856388, 'samples': 186624, 'steps': 3887, 'loss/train': 2.563936471939087} +07/25/2024 11:43:37 - INFO - __main__ - Step 3889: {'lr': 0.0004994377054162025, 'samples': 186672, 'steps': 3888, 'loss/train': 2.784013509750366} +07/25/2024 11:43:37 - INFO - __main__ - Step 3890: {'lr': 0.0004994373527363222, 'samples': 186720, 'steps': 3889, 'loss/train': 2.7401299476623535} +07/25/2024 11:43:37 - INFO - __main__ - Step 3891: {'lr': 0.0004994369999459979, 'samples': 186768, 'steps': 3890, 'loss/train': 2.306135416030884} +07/25/2024 11:43:38 - INFO - __main__ - Step 3892: {'lr': 0.0004994366470452299, 'samples': 186816, 'steps': 3891, 'loss/train': 1.0020488500595093} +07/25/2024 11:43:38 - INFO - __main__ - Step 3893: {'lr': 0.0004994362940340184, 'samples': 186864, 'steps': 3892, 'loss/train': 2.459162473678589} +07/25/2024 11:43:38 - INFO - __main__ - Step 3894: {'lr': 0.0004994359409123635, 'samples': 186912, 'steps': 3893, 'loss/train': 2.6011433601379395} +07/25/2024 11:43:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10500774 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:43:38 - DEBUG - datasets.packaged_modules.json.json - Batch of 10563307 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:43:38 - INFO - __main__ - Step 3895: {'lr': 0.0004994355876802654, 'samples': 186960, 'steps': 3894, 'loss/train': 2.8232851028442383} +07/25/2024 11:43:39 - INFO - __main__ - Step 3896: {'lr': 0.000499435234337724, 'samples': 187008, 'steps': 3895, 'loss/train': 0.8468306064605713} +07/25/2024 11:43:39 - INFO - __main__ - Step 3897: {'lr': 0.0004994348808847399, 'samples': 187056, 'steps': 3896, 'loss/train': 1.9020169973373413} +07/25/2024 11:43:39 - INFO - __main__ - Step 3898: {'lr': 0.000499434527321313, 'samples': 187104, 'steps': 3897, 'loss/train': 1.6589299440383911} +07/25/2024 11:43:40 - INFO - __main__ - Step 3899: {'lr': 0.0004994341736474434, 'samples': 187152, 'steps': 3898, 'loss/train': 2.343111276626587} +07/25/2024 11:43:40 - INFO - __main__ - Step 3900: {'lr': 0.0004994338198631315, 'samples': 187200, 'steps': 3899, 'loss/train': 2.649456024169922} +07/25/2024 11:43:40 - INFO - __main__ - Step 3901: {'lr': 0.000499433465968377, 'samples': 187248, 'steps': 3900, 'loss/train': 2.4905951023101807} +07/25/2024 11:43:40 - INFO - __main__ - Step 3902: {'lr': 0.0004994331119631806, 'samples': 187296, 'steps': 3901, 'loss/train': 1.942293405532837} +07/25/2024 11:43:41 - INFO - __main__ - Step 3903: {'lr': 0.0004994327578475421, 'samples': 187344, 'steps': 3902, 'loss/train': 2.2086732387542725} +07/25/2024 11:43:41 - INFO - __main__ - Step 3904: {'lr': 0.0004994324036214617, 'samples': 187392, 'steps': 3903, 'loss/train': 2.28857159614563} +07/25/2024 11:43:41 - INFO - __main__ - Step 3905: {'lr': 0.0004994320492849397, 'samples': 187440, 'steps': 3904, 'loss/train': 3.0247421264648438} +07/25/2024 11:43:42 - INFO - __main__ - Step 3906: {'lr': 0.0004994316948379763, 'samples': 187488, 'steps': 3905, 'loss/train': 3.2449917793273926} +07/25/2024 11:43:42 - INFO - __main__ - Step 3907: {'lr': 0.0004994313402805713, 'samples': 187536, 'steps': 3906, 'loss/train': 2.569895029067993} +07/25/2024 11:43:42 - INFO - __main__ - Step 3908: {'lr': 0.0004994309856127253, 'samples': 187584, 'steps': 3907, 'loss/train': 2.331875801086426} +07/25/2024 11:43:42 - INFO - __main__ - Step 3909: {'lr': 0.0004994306308344381, 'samples': 187632, 'steps': 3908, 'loss/train': 3.153609275817871} +07/25/2024 11:43:43 - INFO - __main__ - Step 3910: {'lr': 0.00049943027594571, 'samples': 187680, 'steps': 3909, 'loss/train': 2.8305723667144775} +07/25/2024 11:43:43 - INFO - __main__ - Step 3911: {'lr': 0.0004994299209465413, 'samples': 187728, 'steps': 3910, 'loss/train': 2.4253089427948} +07/25/2024 11:43:43 - INFO - __main__ - Step 3912: {'lr': 0.0004994295658369319, 'samples': 187776, 'steps': 3911, 'loss/train': 1.8792599439620972} +07/25/2024 11:43:44 - INFO - __main__ - Step 3913: {'lr': 0.0004994292106168821, 'samples': 187824, 'steps': 3912, 'loss/train': 1.7842167615890503} +07/25/2024 11:43:44 - INFO - __main__ - Step 3914: {'lr': 0.0004994288552863921, 'samples': 187872, 'steps': 3913, 'loss/train': 2.337853193283081} +07/25/2024 11:43:44 - INFO - __main__ - Step 3915: {'lr': 0.0004994284998454619, 'samples': 187920, 'steps': 3914, 'loss/train': 2.0004093647003174} +07/25/2024 11:43:44 - INFO - __main__ - Step 3916: {'lr': 0.0004994281442940918, 'samples': 187968, 'steps': 3915, 'loss/train': 1.1348803043365479} +07/25/2024 11:43:45 - INFO - __main__ - Step 3917: {'lr': 0.0004994277886322819, 'samples': 188016, 'steps': 3916, 'loss/train': 2.5542571544647217} +07/25/2024 11:43:45 - INFO - __main__ - Step 3918: {'lr': 0.0004994274328600323, 'samples': 188064, 'steps': 3917, 'loss/train': 2.554544687271118} +07/25/2024 11:43:45 - INFO - __main__ - Step 3919: {'lr': 0.0004994270769773433, 'samples': 188112, 'steps': 3918, 'loss/train': 2.5326128005981445} +07/25/2024 11:43:46 - INFO - __main__ - Step 3920: {'lr': 0.000499426720984215, 'samples': 188160, 'steps': 3919, 'loss/train': 0.8415496349334717} +07/25/2024 11:43:46 - INFO - __main__ - Step 3921: {'lr': 0.0004994263648806475, 'samples': 188208, 'steps': 3920, 'loss/train': 2.0436394214630127} +07/25/2024 11:43:46 - INFO - __main__ - Step 3922: {'lr': 0.000499426008666641, 'samples': 188256, 'steps': 3921, 'loss/train': 1.3904979228973389} +07/25/2024 11:43:46 - INFO - __main__ - Step 3923: {'lr': 0.0004994256523421956, 'samples': 188304, 'steps': 3922, 'loss/train': 2.303971767425537} +07/25/2024 11:43:47 - INFO - __main__ - Step 3924: {'lr': 0.0004994252959073116, 'samples': 188352, 'steps': 3923, 'loss/train': 2.5797853469848633} +07/25/2024 11:43:47 - INFO - __main__ - Step 3925: {'lr': 0.0004994249393619891, 'samples': 188400, 'steps': 3924, 'loss/train': 2.6218655109405518} +07/25/2024 11:43:47 - INFO - __main__ - Step 3926: {'lr': 0.000499424582706228, 'samples': 188448, 'steps': 3925, 'loss/train': 2.814453125} +07/25/2024 11:43:48 - INFO - __main__ - Step 3927: {'lr': 0.0004994242259400289, 'samples': 188496, 'steps': 3926, 'loss/train': 2.409531831741333} +07/25/2024 11:43:48 - INFO - __main__ - Step 3928: {'lr': 0.0004994238690633917, 'samples': 188544, 'steps': 3927, 'loss/train': 1.4907013177871704} +07/25/2024 11:43:48 - INFO - __main__ - Step 3929: {'lr': 0.0004994235120763166, 'samples': 188592, 'steps': 3928, 'loss/train': 2.6707098484039307} +07/25/2024 11:43:48 - INFO - __main__ - Step 3930: {'lr': 0.0004994231549788038, 'samples': 188640, 'steps': 3929, 'loss/train': 2.52081561088562} +07/25/2024 11:43:49 - INFO - __main__ - Step 3931: {'lr': 0.0004994227977708533, 'samples': 188688, 'steps': 3930, 'loss/train': 2.4686784744262695} +07/25/2024 11:43:49 - INFO - __main__ - Step 3932: {'lr': 0.0004994224404524654, 'samples': 188736, 'steps': 3931, 'loss/train': 2.5412304401397705} +07/25/2024 11:43:49 - DEBUG - datasets.packaged_modules.json.json - Batch of 10502779 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:43:49 - INFO - __main__ - Step 3933: {'lr': 0.0004994220830236403, 'samples': 188784, 'steps': 3932, 'loss/train': 2.5406084060668945} +07/25/2024 11:43:50 - INFO - __main__ - Step 3934: {'lr': 0.000499421725484378, 'samples': 188832, 'steps': 3933, 'loss/train': 2.7313995361328125} +07/25/2024 11:43:50 - INFO - __main__ - Step 3935: {'lr': 0.0004994213678346787, 'samples': 188880, 'steps': 3934, 'loss/train': 2.070375680923462} +07/25/2024 11:43:50 - INFO - __main__ - Step 3936: {'lr': 0.0004994210100745428, 'samples': 188928, 'steps': 3935, 'loss/train': 2.094877004623413} +07/25/2024 11:43:50 - INFO - __main__ - Step 3937: {'lr': 0.0004994206522039702, 'samples': 188976, 'steps': 3936, 'loss/train': 1.918496012687683} +07/25/2024 11:43:51 - INFO - __main__ - Step 3938: {'lr': 0.0004994202942229611, 'samples': 189024, 'steps': 3937, 'loss/train': 2.2615063190460205} +07/25/2024 11:43:51 - INFO - __main__ - Step 3939: {'lr': 0.0004994199361315156, 'samples': 189072, 'steps': 3938, 'loss/train': 2.837360143661499} +07/25/2024 11:43:51 - INFO - __main__ - Step 3940: {'lr': 0.000499419577929634, 'samples': 189120, 'steps': 3939, 'loss/train': 0.4514876902103424} +07/25/2024 11:43:52 - INFO - __main__ - Step 3941: {'lr': 0.0004994192196173164, 'samples': 189168, 'steps': 3940, 'loss/train': 2.2691588401794434} +07/25/2024 11:43:52 - INFO - __main__ - Step 3942: {'lr': 0.000499418861194563, 'samples': 189216, 'steps': 3941, 'loss/train': 2.2991878986358643} +07/25/2024 11:43:52 - INFO - __main__ - Step 3943: {'lr': 0.0004994185026613739, 'samples': 189264, 'steps': 3942, 'loss/train': 2.8553216457366943} +07/25/2024 11:43:52 - INFO - __main__ - Step 3944: {'lr': 0.0004994181440177493, 'samples': 189312, 'steps': 3943, 'loss/train': 0.8672656416893005} +07/25/2024 11:43:53 - INFO - __main__ - Step 3945: {'lr': 0.0004994177852636893, 'samples': 189360, 'steps': 3944, 'loss/train': 2.222386598587036} +07/25/2024 11:43:53 - INFO - __main__ - Step 3946: {'lr': 0.0004994174263991941, 'samples': 189408, 'steps': 3945, 'loss/train': 1.4683501720428467} +07/25/2024 11:43:53 - INFO - __main__ - Step 3947: {'lr': 0.0004994170674242638, 'samples': 189456, 'steps': 3946, 'loss/train': 2.1312761306762695} +07/25/2024 11:43:54 - INFO - __main__ - Step 3948: {'lr': 0.0004994167083388986, 'samples': 189504, 'steps': 3947, 'loss/train': 2.3809821605682373} +07/25/2024 11:43:54 - INFO - __main__ - Step 3949: {'lr': 0.0004994163491430988, 'samples': 189552, 'steps': 3948, 'loss/train': 2.5698981285095215} +07/25/2024 11:43:54 - INFO - __main__ - Step 3950: {'lr': 0.0004994159898368643, 'samples': 189600, 'steps': 3949, 'loss/train': 1.9343997240066528} +07/25/2024 11:43:54 - INFO - __main__ - Step 3951: {'lr': 0.0004994156304201954, 'samples': 189648, 'steps': 3950, 'loss/train': 1.9688053131103516} +07/25/2024 11:43:55 - INFO - __main__ - Step 3952: {'lr': 0.0004994152708930924, 'samples': 189696, 'steps': 3951, 'loss/train': 2.4033563137054443} +07/25/2024 11:43:55 - INFO - __main__ - Step 3953: {'lr': 0.0004994149112555552, 'samples': 189744, 'steps': 3952, 'loss/train': 2.339702844619751} +07/25/2024 11:43:55 - INFO - __main__ - Step 3954: {'lr': 0.0004994145515075841, 'samples': 189792, 'steps': 3953, 'loss/train': 2.041099786758423} +07/25/2024 11:43:56 - INFO - __main__ - Step 3955: {'lr': 0.0004994141916491792, 'samples': 189840, 'steps': 3954, 'loss/train': 2.5325701236724854} +07/25/2024 11:43:56 - INFO - __main__ - Step 3956: {'lr': 0.0004994138316803406, 'samples': 189888, 'steps': 3955, 'loss/train': 2.559525489807129} +07/25/2024 11:43:56 - INFO - __main__ - Step 3957: {'lr': 0.0004994134716010686, 'samples': 189936, 'steps': 3956, 'loss/train': 2.407536745071411} +07/25/2024 11:43:56 - INFO - __main__ - Step 3958: {'lr': 0.0004994131114113633, 'samples': 189984, 'steps': 3957, 'loss/train': 1.826395869255066} +07/25/2024 11:43:57 - INFO - __main__ - Step 3959: {'lr': 0.0004994127511112248, 'samples': 190032, 'steps': 3958, 'loss/train': 2.2544355392456055} +07/25/2024 11:43:57 - INFO - __main__ - Step 3960: {'lr': 0.0004994123907006534, 'samples': 190080, 'steps': 3959, 'loss/train': 2.507251501083374} +07/25/2024 11:43:57 - INFO - __main__ - Step 3961: {'lr': 0.0004994120301796493, 'samples': 190128, 'steps': 3960, 'loss/train': 2.2937188148498535} +07/25/2024 11:43:58 - INFO - __main__ - Step 3962: {'lr': 0.0004994116695482124, 'samples': 190176, 'steps': 3961, 'loss/train': 2.9150943756103516} +07/25/2024 11:43:58 - INFO - __main__ - Step 3963: {'lr': 0.0004994113088063431, 'samples': 190224, 'steps': 3962, 'loss/train': 2.470123291015625} +07/25/2024 11:43:58 - INFO - __main__ - Step 3964: {'lr': 0.0004994109479540413, 'samples': 190272, 'steps': 3963, 'loss/train': 0.3366045653820038} +07/25/2024 11:43:58 - INFO - __main__ - Step 3965: {'lr': 0.0004994105869913075, 'samples': 190320, 'steps': 3964, 'loss/train': 2.2820608615875244} +07/25/2024 11:43:59 - INFO - __main__ - Step 3966: {'lr': 0.0004994102259181416, 'samples': 190368, 'steps': 3965, 'loss/train': 2.576035976409912} +07/25/2024 11:43:59 - INFO - __main__ - Step 3967: {'lr': 0.0004994098647345438, 'samples': 190416, 'steps': 3966, 'loss/train': 2.1536405086517334} +07/25/2024 11:43:59 - INFO - __main__ - Step 3968: {'lr': 0.0004994095034405142, 'samples': 190464, 'steps': 3967, 'loss/train': 0.8400634527206421} +07/25/2024 11:44:00 - INFO - __main__ - Step 3969: {'lr': 0.0004994091420360533, 'samples': 190512, 'steps': 3968, 'loss/train': 2.5633370876312256} +07/25/2024 11:44:00 - INFO - __main__ - Step 3970: {'lr': 0.0004994087805211608, 'samples': 190560, 'steps': 3969, 'loss/train': 2.3350045680999756} +07/25/2024 11:44:00 - INFO - __main__ - Step 3971: {'lr': 0.0004994084188958373, 'samples': 190608, 'steps': 3970, 'loss/train': 2.162202835083008} +07/25/2024 11:44:00 - INFO - __main__ - Step 3972: {'lr': 0.0004994080571600827, 'samples': 190656, 'steps': 3971, 'loss/train': 2.9186999797821045} +07/25/2024 11:44:01 - INFO - __main__ - Step 3973: {'lr': 0.0004994076953138971, 'samples': 190704, 'steps': 3972, 'loss/train': 2.779479503631592} +07/25/2024 11:44:01 - INFO - __main__ - Step 3974: {'lr': 0.0004994073333572807, 'samples': 190752, 'steps': 3973, 'loss/train': 1.767830729484558} +07/25/2024 11:44:01 - INFO - __main__ - Step 3975: {'lr': 0.0004994069712902338, 'samples': 190800, 'steps': 3974, 'loss/train': 1.8509985208511353} +07/25/2024 11:44:02 - INFO - __main__ - Step 3976: {'lr': 0.0004994066091127566, 'samples': 190848, 'steps': 3975, 'loss/train': 2.467649459838867} +07/25/2024 11:44:02 - INFO - __main__ - Step 3977: {'lr': 0.0004994062468248491, 'samples': 190896, 'steps': 3976, 'loss/train': 2.685195207595825} +07/25/2024 11:44:02 - INFO - __main__ - Step 3978: {'lr': 0.0004994058844265114, 'samples': 190944, 'steps': 3977, 'loss/train': 2.1745660305023193} +07/25/2024 11:44:02 - INFO - __main__ - Step 3979: {'lr': 0.0004994055219177437, 'samples': 190992, 'steps': 3978, 'loss/train': 2.4404537677764893} +07/25/2024 11:44:03 - INFO - __main__ - Step 3980: {'lr': 0.0004994051592985463, 'samples': 191040, 'steps': 3979, 'loss/train': 2.439316511154175} +07/25/2024 11:44:03 - INFO - __main__ - Step 3981: {'lr': 0.0004994047965689194, 'samples': 191088, 'steps': 3980, 'loss/train': 2.094463586807251} +07/25/2024 11:44:03 - INFO - __main__ - Step 3982: {'lr': 0.0004994044337288629, 'samples': 191136, 'steps': 3981, 'loss/train': 3.4185574054718018} +07/25/2024 11:44:04 - INFO - __main__ - Step 3983: {'lr': 0.0004994040707783771, 'samples': 191184, 'steps': 3982, 'loss/train': 2.3135995864868164} +07/25/2024 11:44:04 - DEBUG - datasets.packaged_modules.json.json - Batch of 10520236 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:44:04 - INFO - __main__ - Step 3984: {'lr': 0.0004994037077174624, 'samples': 191232, 'steps': 3983, 'loss/train': 2.5372071266174316} +07/25/2024 11:44:04 - INFO - __main__ - Step 3985: {'lr': 0.0004994033445461185, 'samples': 191280, 'steps': 3984, 'loss/train': 1.9403111934661865} +07/25/2024 11:44:04 - DEBUG - datasets.packaged_modules.json.json - Batch of 10510075 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:44:04 - INFO - __main__ - Step 3986: {'lr': 0.0004994029812643458, 'samples': 191328, 'steps': 3985, 'loss/train': 2.2246973514556885} +07/25/2024 11:44:05 - INFO - __main__ - Step 3987: {'lr': 0.0004994026178721444, 'samples': 191376, 'steps': 3986, 'loss/train': 0.8517030477523804} +07/25/2024 11:44:05 - INFO - __main__ - Step 3988: {'lr': 0.0004994022543695147, 'samples': 191424, 'steps': 3987, 'loss/train': 0.3450881242752075} +07/25/2024 11:44:05 - INFO - __main__ - Step 3989: {'lr': 0.0004994018907564566, 'samples': 191472, 'steps': 3988, 'loss/train': 1.6388287544250488} +07/25/2024 11:44:06 - INFO - __main__ - Step 3990: {'lr': 0.0004994015270329702, 'samples': 191520, 'steps': 3989, 'loss/train': 1.0203131437301636} +07/25/2024 11:44:06 - INFO - __main__ - Step 3991: {'lr': 0.000499401163199056, 'samples': 191568, 'steps': 3990, 'loss/train': 2.0322976112365723} +07/25/2024 11:44:06 - INFO - __main__ - Step 3992: {'lr': 0.0004994007992547137, 'samples': 191616, 'steps': 3991, 'loss/train': 0.8174245953559875} +07/25/2024 11:44:06 - INFO - __main__ - Step 3993: {'lr': 0.0004994004351999439, 'samples': 191664, 'steps': 3992, 'loss/train': 2.8378357887268066} +07/25/2024 11:44:07 - INFO - __main__ - Step 3994: {'lr': 0.0004994000710347465, 'samples': 191712, 'steps': 3993, 'loss/train': 2.7653825283050537} +07/25/2024 11:44:07 - INFO - __main__ - Step 3995: {'lr': 0.0004993997067591218, 'samples': 191760, 'steps': 3994, 'loss/train': 2.0679898262023926} +07/25/2024 11:44:07 - INFO - __main__ - Step 3996: {'lr': 0.0004993993423730699, 'samples': 191808, 'steps': 3995, 'loss/train': 2.867063045501709} +07/25/2024 11:44:07 - INFO - __main__ - Step 3997: {'lr': 0.000499398977876591, 'samples': 191856, 'steps': 3996, 'loss/train': 2.550776481628418} +07/25/2024 11:44:08 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488851 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:44:08 - INFO - __main__ - Step 3998: {'lr': 0.0004993986132696851, 'samples': 191904, 'steps': 3997, 'loss/train': 2.168668031692505} +07/25/2024 11:44:08 - INFO - __main__ - Step 3999: {'lr': 0.0004993982485523525, 'samples': 191952, 'steps': 3998, 'loss/train': 2.856213092803955} +07/25/2024 11:44:08 - INFO - __main__ - Step 4000: {'lr': 0.0004993978837245933, 'samples': 192000, 'steps': 3999, 'loss/train': 2.5290701389312744} +07/25/2024 11:44:09 - INFO - __main__ - Step 4001: {'lr': 0.0004993975187864078, 'samples': 192048, 'steps': 4000, 'loss/train': 2.3915367126464844} +07/25/2024 11:44:09 - INFO - __main__ - Step 4002: {'lr': 0.000499397153737796, 'samples': 192096, 'steps': 4001, 'loss/train': 1.902687430381775} +07/25/2024 11:44:09 - INFO - __main__ - Step 4003: {'lr': 0.0004993967885787582, 'samples': 192144, 'steps': 4002, 'loss/train': 2.3994812965393066} +07/25/2024 11:44:09 - INFO - __main__ - Step 4004: {'lr': 0.0004993964233092946, 'samples': 192192, 'steps': 4003, 'loss/train': 2.320565938949585} +07/25/2024 11:44:10 - INFO - __main__ - Step 4005: {'lr': 0.000499396057929405, 'samples': 192240, 'steps': 4004, 'loss/train': 2.3212783336639404} +07/25/2024 11:44:10 - INFO - __main__ - Step 4006: {'lr': 0.0004993956924390899, 'samples': 192288, 'steps': 4005, 'loss/train': 2.7089381217956543} +07/25/2024 11:44:10 - INFO - __main__ - Step 4007: {'lr': 0.0004993953268383494, 'samples': 192336, 'steps': 4006, 'loss/train': 2.003720760345459} +07/25/2024 11:44:11 - INFO - __main__ - Step 4008: {'lr': 0.0004993949611271837, 'samples': 192384, 'steps': 4007, 'loss/train': 1.8957616090774536} +07/25/2024 11:44:11 - INFO - __main__ - Step 4009: {'lr': 0.0004993945953055928, 'samples': 192432, 'steps': 4008, 'loss/train': 2.7090606689453125} +07/25/2024 11:44:11 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488358 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:44:11 - INFO - __main__ - Step 4010: {'lr': 0.000499394229373577, 'samples': 192480, 'steps': 4009, 'loss/train': 1.721059799194336} +07/25/2024 11:44:11 - INFO - __main__ - Step 4011: {'lr': 0.0004993938633311365, 'samples': 192528, 'steps': 4010, 'loss/train': 1.6494895219802856} +07/25/2024 11:44:12 - INFO - __main__ - Step 4012: {'lr': 0.0004993934971782713, 'samples': 192576, 'steps': 4011, 'loss/train': 0.526854395866394} +07/25/2024 11:44:12 - INFO - __main__ - Step 4013: {'lr': 0.0004993931309149816, 'samples': 192624, 'steps': 4012, 'loss/train': 2.515601634979248} +07/25/2024 11:44:12 - INFO - __main__ - Step 4014: {'lr': 0.0004993927645412677, 'samples': 192672, 'steps': 4013, 'loss/train': 1.8146823644638062} +07/25/2024 11:44:13 - INFO - __main__ - Step 4015: {'lr': 0.0004993923980571296, 'samples': 192720, 'steps': 4014, 'loss/train': 1.4679744243621826} +07/25/2024 11:44:13 - INFO - __main__ - Step 4016: {'lr': 0.0004993920314625677, 'samples': 192768, 'steps': 4015, 'loss/train': 0.9296603202819824} +07/25/2024 11:44:13 - INFO - __main__ - Step 4017: {'lr': 0.0004993916647575817, 'samples': 192816, 'steps': 4016, 'loss/train': 1.885560154914856} +07/25/2024 11:44:13 - INFO - __main__ - Step 4018: {'lr': 0.0004993912979421723, 'samples': 192864, 'steps': 4017, 'loss/train': 3.353346586227417} +07/25/2024 11:44:14 - INFO - __main__ - Step 4019: {'lr': 0.0004993909310163393, 'samples': 192912, 'steps': 4018, 'loss/train': 2.974470853805542} +07/25/2024 11:44:14 - INFO - __main__ - Step 4020: {'lr': 0.0004993905639800831, 'samples': 192960, 'steps': 4019, 'loss/train': 2.4973487854003906} +07/25/2024 11:44:14 - INFO - __main__ - Step 4021: {'lr': 0.0004993901968334036, 'samples': 193008, 'steps': 4020, 'loss/train': 2.2981274127960205} +07/25/2024 11:44:15 - INFO - __main__ - Step 4022: {'lr': 0.0004993898295763011, 'samples': 193056, 'steps': 4021, 'loss/train': 3.471933364868164} +07/25/2024 11:44:15 - INFO - __main__ - Step 4023: {'lr': 0.000499389462208776, 'samples': 193104, 'steps': 4022, 'loss/train': 2.5848450660705566} +07/25/2024 11:44:15 - INFO - __main__ - Step 4024: {'lr': 0.000499389094730828, 'samples': 193152, 'steps': 4023, 'loss/train': 1.9623281955718994} +07/25/2024 11:44:15 - INFO - __main__ - Step 4025: {'lr': 0.0004993887271424576, 'samples': 193200, 'steps': 4024, 'loss/train': 2.834892511367798} +07/25/2024 11:44:16 - INFO - __main__ - Step 4026: {'lr': 0.0004993883594436649, 'samples': 193248, 'steps': 4025, 'loss/train': 1.6416233777999878} +07/25/2024 11:44:16 - INFO - __main__ - Step 4027: {'lr': 0.0004993879916344499, 'samples': 193296, 'steps': 4026, 'loss/train': 2.1812944412231445} +07/25/2024 11:44:16 - INFO - __main__ - Step 4028: {'lr': 0.000499387623714813, 'samples': 193344, 'steps': 4027, 'loss/train': 2.1821253299713135} +07/25/2024 11:44:17 - INFO - __main__ - Step 4029: {'lr': 0.0004993872556847542, 'samples': 193392, 'steps': 4028, 'loss/train': 2.6583330631256104} +07/25/2024 11:44:17 - INFO - __main__ - Step 4030: {'lr': 0.0004993868875442737, 'samples': 193440, 'steps': 4029, 'loss/train': 2.7159156799316406} +07/25/2024 11:44:17 - INFO - __main__ - Step 4031: {'lr': 0.0004993865192933718, 'samples': 193488, 'steps': 4030, 'loss/train': 2.526489734649658} +07/25/2024 11:44:17 - INFO - __main__ - Step 4032: {'lr': 0.0004993861509320483, 'samples': 193536, 'steps': 4031, 'loss/train': 2.2746739387512207} +07/25/2024 11:44:18 - INFO - __main__ - Step 4033: {'lr': 0.0004993857824603038, 'samples': 193584, 'steps': 4032, 'loss/train': 2.5109338760375977} +07/25/2024 11:44:18 - INFO - __main__ - Step 4034: {'lr': 0.0004993854138781382, 'samples': 193632, 'steps': 4033, 'loss/train': 2.828665018081665} +07/25/2024 11:44:18 - INFO - __main__ - Step 4035: {'lr': 0.0004993850451855517, 'samples': 193680, 'steps': 4034, 'loss/train': 2.244917154312134} +07/25/2024 11:44:19 - INFO - __main__ - Step 4036: {'lr': 0.0004993846763825444, 'samples': 193728, 'steps': 4035, 'loss/train': 0.32587331533432007} +07/25/2024 11:44:19 - INFO - __main__ - Step 4037: {'lr': 0.0004993843074691168, 'samples': 193776, 'steps': 4036, 'loss/train': 2.481549024581909} +07/25/2024 11:44:19 - INFO - __main__ - Step 4038: {'lr': 0.0004993839384452686, 'samples': 193824, 'steps': 4037, 'loss/train': 2.2999491691589355} +07/25/2024 11:44:19 - INFO - __main__ - Step 4039: {'lr': 0.0004993835693110003, 'samples': 193872, 'steps': 4038, 'loss/train': 1.2446590662002563} +07/25/2024 11:44:19 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488214 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:44:19 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488214 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 11:44:20 - INFO - __main__ - Step 4040: {'lr': 0.0004993832000663119, 'samples': 193920, 'steps': 4039, 'loss/train': 0.8786347508430481} +07/25/2024 11:44:20 - INFO - __main__ - Step 4041: {'lr': 0.0004993828307112035, 'samples': 193968, 'steps': 4040, 'loss/train': 1.9392789602279663} +07/25/2024 11:44:20 - INFO - __main__ - Step 4042: {'lr': 0.0004993824612456756, 'samples': 194016, 'steps': 4041, 'loss/train': 3.1035773754119873} +07/25/2024 11:44:21 - INFO - __main__ - Step 4043: {'lr': 0.0004993820916697279, 'samples': 194064, 'steps': 4042, 'loss/train': 2.6916134357452393} +07/25/2024 11:44:21 - INFO - __main__ - Step 4044: {'lr': 0.0004993817219833609, 'samples': 194112, 'steps': 4043, 'loss/train': 2.5381550788879395} +07/25/2024 11:44:21 - INFO - __main__ - Step 4045: {'lr': 0.0004993813521865746, 'samples': 194160, 'steps': 4044, 'loss/train': 2.4076499938964844} +07/25/2024 11:44:21 - INFO - __main__ - Step 4046: {'lr': 0.0004993809822793694, 'samples': 194208, 'steps': 4045, 'loss/train': 2.832277536392212} +07/25/2024 11:44:22 - INFO - __main__ - Step 4047: {'lr': 0.0004993806122617451, 'samples': 194256, 'steps': 4046, 'loss/train': 2.7886505126953125} +07/25/2024 11:44:22 - INFO - __main__ - Step 4048: {'lr': 0.0004993802421337021, 'samples': 194304, 'steps': 4047, 'loss/train': 2.658658981323242} +07/25/2024 11:44:22 - INFO - __main__ - Step 4049: {'lr': 0.0004993798718952406, 'samples': 194352, 'steps': 4048, 'loss/train': 2.1667308807373047} +07/25/2024 11:44:23 - INFO - __main__ - Step 4050: {'lr': 0.0004993795015463606, 'samples': 194400, 'steps': 4049, 'loss/train': 1.6126019954681396} +07/25/2024 11:44:23 - INFO - __main__ - Step 4051: {'lr': 0.0004993791310870624, 'samples': 194448, 'steps': 4050, 'loss/train': 2.252687931060791} +07/25/2024 11:44:23 - INFO - __main__ - Step 4052: {'lr': 0.0004993787605173461, 'samples': 194496, 'steps': 4051, 'loss/train': 2.428487777709961} +07/25/2024 11:44:23 - INFO - __main__ - Step 4053: {'lr': 0.0004993783898372119, 'samples': 194544, 'steps': 4052, 'loss/train': 2.783052682876587} +07/25/2024 11:44:24 - INFO - __main__ - Step 4054: {'lr': 0.0004993780190466598, 'samples': 194592, 'steps': 4053, 'loss/train': 2.924306631088257} +07/25/2024 11:44:24 - INFO - __main__ - Step 4055: {'lr': 0.0004993776481456902, 'samples': 194640, 'steps': 4054, 'loss/train': 2.4031083583831787} +07/25/2024 11:44:24 - INFO - __main__ - Step 4056: {'lr': 0.0004993772771343031, 'samples': 194688, 'steps': 4055, 'loss/train': 2.640939950942993} +07/25/2024 11:44:25 - INFO - __main__ - Step 4057: {'lr': 0.0004993769060124989, 'samples': 194736, 'steps': 4056, 'loss/train': 2.4779231548309326} +07/25/2024 11:44:25 - INFO - __main__ - Step 4058: {'lr': 0.0004993765347802774, 'samples': 194784, 'steps': 4057, 'loss/train': 2.657998561859131} +07/25/2024 11:44:25 - INFO - __main__ - Step 4059: {'lr': 0.000499376163437639, 'samples': 194832, 'steps': 4058, 'loss/train': 2.4317264556884766} +07/25/2024 11:44:25 - INFO - __main__ - Step 4060: {'lr': 0.000499375791984584, 'samples': 194880, 'steps': 4059, 'loss/train': 0.3313402831554413} +07/25/2024 11:44:26 - INFO - __main__ - Step 4061: {'lr': 0.0004993754204211121, 'samples': 194928, 'steps': 4060, 'loss/train': 2.550886631011963} +07/25/2024 11:44:26 - INFO - __main__ - Step 4062: {'lr': 0.000499375048747224, 'samples': 194976, 'steps': 4061, 'loss/train': 2.116123914718628} +07/25/2024 11:44:26 - INFO - __main__ - Step 4063: {'lr': 0.0004993746769629194, 'samples': 195024, 'steps': 4062, 'loss/train': 2.4653890132904053} +07/25/2024 11:44:27 - INFO - __main__ - Step 4064: {'lr': 0.0004993743050681989, 'samples': 195072, 'steps': 4063, 'loss/train': 1.012247085571289} +07/25/2024 11:44:27 - INFO - __main__ - Step 4065: {'lr': 0.0004993739330630623, 'samples': 195120, 'steps': 4064, 'loss/train': 2.3399405479431152} +07/25/2024 11:44:27 - INFO - __main__ - Step 4066: {'lr': 0.00049937356094751, 'samples': 195168, 'steps': 4065, 'loss/train': 2.975707530975342} +07/25/2024 11:44:27 - INFO - __main__ - Step 4067: {'lr': 0.000499373188721542, 'samples': 195216, 'steps': 4066, 'loss/train': 2.656440496444702} +07/25/2024 11:44:28 - INFO - __main__ - Step 4068: {'lr': 0.0004993728163851586, 'samples': 195264, 'steps': 4067, 'loss/train': 2.3532536029815674} +07/25/2024 11:44:28 - INFO - __main__ - Step 4069: {'lr': 0.0004993724439383598, 'samples': 195312, 'steps': 4068, 'loss/train': 2.2893102169036865} +07/25/2024 11:44:28 - INFO - __main__ - Step 4070: {'lr': 0.0004993720713811459, 'samples': 195360, 'steps': 4069, 'loss/train': 2.722074031829834} +07/25/2024 11:44:29 - INFO - __main__ - Step 4071: {'lr': 0.000499371698713517, 'samples': 195408, 'steps': 4070, 'loss/train': 2.7159616947174072} +07/25/2024 11:44:29 - INFO - __main__ - Step 4072: {'lr': 0.0004993713259354735, 'samples': 195456, 'steps': 4071, 'loss/train': 2.469196081161499} +07/25/2024 11:44:29 - INFO - __main__ - Step 4073: {'lr': 0.0004993709530470152, 'samples': 195504, 'steps': 4072, 'loss/train': 2.4205288887023926} +07/25/2024 11:44:29 - INFO - __main__ - Step 4074: {'lr': 0.0004993705800481424, 'samples': 195552, 'steps': 4073, 'loss/train': 2.221895217895508} +07/25/2024 11:44:30 - INFO - __main__ - Step 4075: {'lr': 0.0004993702069388553, 'samples': 195600, 'steps': 4074, 'loss/train': 2.6408848762512207} +07/25/2024 11:44:30 - INFO - __main__ - Step 4076: {'lr': 0.0004993698337191542, 'samples': 195648, 'steps': 4075, 'loss/train': 2.254742383956909} +07/25/2024 11:44:30 - INFO - __main__ - Step 4077: {'lr': 0.0004993694603890391, 'samples': 195696, 'steps': 4076, 'loss/train': 2.6150152683258057} +07/25/2024 11:44:31 - INFO - __main__ - Step 4078: {'lr': 0.0004993690869485101, 'samples': 195744, 'steps': 4077, 'loss/train': 2.9090683460235596} +07/25/2024 11:44:31 - INFO - __main__ - Step 4079: {'lr': 0.0004993687133975674, 'samples': 195792, 'steps': 4078, 'loss/train': 2.7779712677001953} +07/25/2024 11:44:31 - INFO - __main__ - Step 4080: {'lr': 0.0004993683397362113, 'samples': 195840, 'steps': 4079, 'loss/train': 2.689649820327759} +07/25/2024 11:44:31 - INFO - __main__ - Step 4081: {'lr': 0.0004993679659644419, 'samples': 195888, 'steps': 4080, 'loss/train': 2.493683338165283} +07/25/2024 11:44:32 - INFO - __main__ - Step 4082: {'lr': 0.0004993675920822593, 'samples': 195936, 'steps': 4081, 'loss/train': 1.4683271646499634} +07/25/2024 11:44:32 - INFO - __main__ - Step 4083: {'lr': 0.0004993672180896639, 'samples': 195984, 'steps': 4082, 'loss/train': 2.93395733833313} +07/25/2024 11:44:32 - INFO - __main__ - Step 4084: {'lr': 0.0004993668439866555, 'samples': 196032, 'steps': 4083, 'loss/train': 0.3706550598144531} +07/25/2024 11:44:33 - INFO - __main__ - Step 4085: {'lr': 0.0004993664697732344, 'samples': 196080, 'steps': 4084, 'loss/train': 2.2787227630615234} +07/25/2024 11:44:33 - INFO - __main__ - Step 4086: {'lr': 0.000499366095449401, 'samples': 196128, 'steps': 4085, 'loss/train': 2.147397994995117} +07/25/2024 11:44:33 - INFO - __main__ - Step 4087: {'lr': 0.0004993657210151552, 'samples': 196176, 'steps': 4086, 'loss/train': 2.3419029712677} +07/25/2024 11:44:33 - INFO - __main__ - Step 4088: {'lr': 0.0004993653464704972, 'samples': 196224, 'steps': 4087, 'loss/train': 0.9555820226669312} +07/25/2024 11:44:34 - INFO - __main__ - Step 4089: {'lr': 0.0004993649718154273, 'samples': 196272, 'steps': 4088, 'loss/train': 2.029557704925537} +07/25/2024 11:44:34 - INFO - __main__ - Step 4090: {'lr': 0.0004993645970499455, 'samples': 196320, 'steps': 4089, 'loss/train': 3.0597100257873535} +07/25/2024 11:44:34 - INFO - __main__ - Step 4091: {'lr': 0.000499364222174052, 'samples': 196368, 'steps': 4090, 'loss/train': 2.0666894912719727} +07/25/2024 11:44:35 - INFO - __main__ - Step 4092: {'lr': 0.0004993638471877471, 'samples': 196416, 'steps': 4091, 'loss/train': 1.9124701023101807} +07/25/2024 11:44:35 - INFO - __main__ - Step 4093: {'lr': 0.0004993634720910308, 'samples': 196464, 'steps': 4092, 'loss/train': 2.2486963272094727} +07/25/2024 11:44:35 - INFO - __main__ - Step 4094: {'lr': 0.0004993630968839034, 'samples': 196512, 'steps': 4093, 'loss/train': 2.6682965755462646} +07/25/2024 11:44:35 - INFO - __main__ - Step 4095: {'lr': 0.0004993627215663651, 'samples': 196560, 'steps': 4094, 'loss/train': 2.891833543777466} +07/25/2024 11:44:36 - INFO - __main__ - Step 4096: {'lr': 0.0004993623461384158, 'samples': 196608, 'steps': 4095, 'loss/train': 2.8318874835968018} +07/25/2024 11:44:36 - INFO - __main__ - Step 4097: {'lr': 0.000499361970600056, 'samples': 196656, 'steps': 4096, 'loss/train': 2.7326228618621826} +07/25/2024 11:44:36 - INFO - __main__ - Step 4098: {'lr': 0.0004993615949512855, 'samples': 196704, 'steps': 4097, 'loss/train': 2.4361913204193115} +07/25/2024 11:44:36 - INFO - __main__ - Step 4099: {'lr': 0.0004993612191921048, 'samples': 196752, 'steps': 4098, 'loss/train': 1.9519349336624146} +07/25/2024 11:44:37 - INFO - __main__ - Step 4100: {'lr': 0.0004993608433225139, 'samples': 196800, 'steps': 4099, 'loss/train': 2.3020989894866943} +07/25/2024 11:44:37 - INFO - __main__ - Step 4101: {'lr': 0.0004993604673425131, 'samples': 196848, 'steps': 4100, 'loss/train': 2.6164159774780273} +07/25/2024 11:44:37 - INFO - __main__ - Step 4102: {'lr': 0.0004993600912521025, 'samples': 196896, 'steps': 4101, 'loss/train': 2.072788953781128} +07/25/2024 11:44:38 - INFO - __main__ - Step 4103: {'lr': 0.0004993597150512821, 'samples': 196944, 'steps': 4102, 'loss/train': 2.681243419647217} +07/25/2024 11:44:38 - INFO - __main__ - Step 4104: {'lr': 0.0004993593387400523, 'samples': 196992, 'steps': 4103, 'loss/train': 2.7531096935272217} +07/25/2024 11:44:38 - INFO - __main__ - Step 4105: {'lr': 0.0004993589623184131, 'samples': 197040, 'steps': 4104, 'loss/train': 2.365511178970337} +07/25/2024 11:44:38 - INFO - __main__ - Step 4106: {'lr': 0.0004993585857863647, 'samples': 197088, 'steps': 4105, 'loss/train': 2.604022264480591} +07/25/2024 11:44:39 - INFO - __main__ - Step 4107: {'lr': 0.0004993582091439074, 'samples': 197136, 'steps': 4106, 'loss/train': 2.503084421157837} +07/25/2024 11:44:39 - INFO - __main__ - Step 4108: {'lr': 0.0004993578323910413, 'samples': 197184, 'steps': 4107, 'loss/train': 0.44024187326431274} +07/25/2024 11:44:39 - INFO - __main__ - Step 4109: {'lr': 0.0004993574555277664, 'samples': 197232, 'steps': 4108, 'loss/train': 2.4738974571228027} +07/25/2024 11:44:40 - INFO - __main__ - Step 4110: {'lr': 0.0004993570785540831, 'samples': 197280, 'steps': 4109, 'loss/train': 2.224621534347534} +07/25/2024 11:44:40 - INFO - __main__ - Step 4111: {'lr': 0.0004993567014699914, 'samples': 197328, 'steps': 4110, 'loss/train': 2.5334110260009766} +07/25/2024 11:44:40 - INFO - __main__ - Step 4112: {'lr': 0.0004993563242754916, 'samples': 197376, 'steps': 4111, 'loss/train': 2.2926344871520996} +07/25/2024 11:44:40 - INFO - __main__ - Step 4113: {'lr': 0.0004993559469705838, 'samples': 197424, 'steps': 4112, 'loss/train': 2.1706631183624268} +07/25/2024 11:44:41 - INFO - __main__ - Step 4114: {'lr': 0.0004993555695552682, 'samples': 197472, 'steps': 4113, 'loss/train': 2.341495990753174} +07/25/2024 11:44:41 - INFO - __main__ - Step 4115: {'lr': 0.000499355192029545, 'samples': 197520, 'steps': 4114, 'loss/train': 2.622209310531616} +07/25/2024 11:44:41 - INFO - __main__ - Step 4116: {'lr': 0.0004993548143934142, 'samples': 197568, 'steps': 4115, 'loss/train': 2.4907147884368896} +07/25/2024 11:44:42 - INFO - __main__ - Step 4117: {'lr': 0.000499354436646876, 'samples': 197616, 'steps': 4116, 'loss/train': 2.049144744873047} +07/25/2024 11:44:42 - INFO - __main__ - Step 4118: {'lr': 0.0004993540587899307, 'samples': 197664, 'steps': 4117, 'loss/train': 3.129163980484009} +07/25/2024 11:44:42 - INFO - __main__ - Step 4119: {'lr': 0.0004993536808225786, 'samples': 197712, 'steps': 4118, 'loss/train': 2.650268316268921} +07/25/2024 11:44:42 - INFO - __main__ - Step 4120: {'lr': 0.0004993533027448195, 'samples': 197760, 'steps': 4119, 'loss/train': 2.5528078079223633} +07/25/2024 11:44:43 - INFO - __main__ - Step 4121: {'lr': 0.0004993529245566538, 'samples': 197808, 'steps': 4120, 'loss/train': 2.3918304443359375} +07/25/2024 11:44:43 - INFO - __main__ - Step 4122: {'lr': 0.0004993525462580816, 'samples': 197856, 'steps': 4121, 'loss/train': 2.195772171020508} +07/25/2024 11:44:43 - INFO - __main__ - Step 4123: {'lr': 0.000499352167849103, 'samples': 197904, 'steps': 4122, 'loss/train': 1.9284499883651733} +07/25/2024 11:44:44 - INFO - __main__ - Step 4124: {'lr': 0.0004993517893297183, 'samples': 197952, 'steps': 4123, 'loss/train': 2.5264062881469727} +07/25/2024 11:44:44 - INFO - __main__ - Step 4125: {'lr': 0.0004993514106999276, 'samples': 198000, 'steps': 4124, 'loss/train': 1.775503158569336} +07/25/2024 11:44:44 - INFO - __main__ - Step 4126: {'lr': 0.0004993510319597312, 'samples': 198048, 'steps': 4125, 'loss/train': 2.1854610443115234} +07/25/2024 11:44:44 - INFO - __main__ - Step 4127: {'lr': 0.0004993506531091289, 'samples': 198096, 'steps': 4126, 'loss/train': 2.601653814315796} +07/25/2024 11:44:45 - INFO - __main__ - Step 4128: {'lr': 0.0004993502741481214, 'samples': 198144, 'steps': 4127, 'loss/train': 2.6762564182281494} +07/25/2024 11:44:45 - INFO - __main__ - Step 4129: {'lr': 0.0004993498950767084, 'samples': 198192, 'steps': 4128, 'loss/train': 2.03108549118042} +07/25/2024 11:44:45 - INFO - __main__ - Step 4130: {'lr': 0.0004993495158948903, 'samples': 198240, 'steps': 4129, 'loss/train': 2.46513032913208} +07/25/2024 11:44:46 - INFO - __main__ - Step 4131: {'lr': 0.0004993491366026673, 'samples': 198288, 'steps': 4130, 'loss/train': 2.4372706413269043} +07/25/2024 11:44:46 - INFO - __main__ - Step 4132: {'lr': 0.0004993487572000394, 'samples': 198336, 'steps': 4131, 'loss/train': 0.3599218726158142} +07/25/2024 11:44:46 - INFO - __main__ - Step 4133: {'lr': 0.0004993483776870069, 'samples': 198384, 'steps': 4132, 'loss/train': 2.3124923706054688} +07/25/2024 11:44:46 - INFO - __main__ - Step 4134: {'lr': 0.0004993479980635699, 'samples': 198432, 'steps': 4133, 'loss/train': 2.1982738971710205} +07/25/2024 11:44:47 - INFO - __main__ - Step 4135: {'lr': 0.0004993476183297286, 'samples': 198480, 'steps': 4134, 'loss/train': 2.7606167793273926} +07/25/2024 11:44:47 - INFO - __main__ - Step 4136: {'lr': 0.0004993472384854831, 'samples': 198528, 'steps': 4135, 'loss/train': 3.0613038539886475} +07/25/2024 11:44:47 - INFO - __main__ - Step 4137: {'lr': 0.0004993468585308337, 'samples': 198576, 'steps': 4136, 'loss/train': 2.0057482719421387} +07/25/2024 11:44:48 - INFO - __main__ - Step 4138: {'lr': 0.0004993464784657806, 'samples': 198624, 'steps': 4137, 'loss/train': 2.32039737701416} +07/25/2024 11:44:48 - INFO - __main__ - Step 4139: {'lr': 0.0004993460982903237, 'samples': 198672, 'steps': 4138, 'loss/train': 2.782940626144409} +07/25/2024 11:44:48 - INFO - __main__ - Step 4140: {'lr': 0.0004993457180044634, 'samples': 198720, 'steps': 4139, 'loss/train': 1.3843215703964233} +07/25/2024 11:44:48 - INFO - __main__ - Step 4141: {'lr': 0.0004993453376081997, 'samples': 198768, 'steps': 4140, 'loss/train': 2.6198675632476807} +07/25/2024 11:44:49 - INFO - __main__ - Step 4142: {'lr': 0.0004993449571015331, 'samples': 198816, 'steps': 4141, 'loss/train': 2.2538840770721436} +07/25/2024 11:44:49 - INFO - __main__ - Step 4143: {'lr': 0.0004993445764844633, 'samples': 198864, 'steps': 4142, 'loss/train': 1.7781985998153687} +07/25/2024 11:44:49 - INFO - __main__ - Step 4144: {'lr': 0.0004993441957569909, 'samples': 198912, 'steps': 4143, 'loss/train': 2.327817916870117} +07/25/2024 11:44:50 - INFO - __main__ - Step 4145: {'lr': 0.0004993438149191158, 'samples': 198960, 'steps': 4144, 'loss/train': 2.494027614593506} +07/25/2024 11:44:50 - INFO - __main__ - Step 4146: {'lr': 0.0004993434339708382, 'samples': 199008, 'steps': 4145, 'loss/train': 2.389383316040039} +07/25/2024 11:44:50 - INFO - __main__ - Step 4147: {'lr': 0.0004993430529121583, 'samples': 199056, 'steps': 4146, 'loss/train': 1.809712529182434} +07/25/2024 11:44:50 - INFO - __main__ - Step 4148: {'lr': 0.0004993426717430764, 'samples': 199104, 'steps': 4147, 'loss/train': 1.5342870950698853} +07/25/2024 11:44:51 - INFO - __main__ - Step 4149: {'lr': 0.0004993422904635924, 'samples': 199152, 'steps': 4148, 'loss/train': 1.6308975219726562} +07/25/2024 11:44:51 - INFO - __main__ - Step 4150: {'lr': 0.0004993419090737067, 'samples': 199200, 'steps': 4149, 'loss/train': 1.9064011573791504} +07/25/2024 11:44:51 - INFO - __main__ - Step 4151: {'lr': 0.0004993415275734193, 'samples': 199248, 'steps': 4150, 'loss/train': 2.4017012119293213} +07/25/2024 11:44:52 - INFO - __main__ - Step 4152: {'lr': 0.0004993411459627307, 'samples': 199296, 'steps': 4151, 'loss/train': 2.6736397743225098} +07/25/2024 11:44:52 - INFO - __main__ - Step 4153: {'lr': 0.0004993407642416406, 'samples': 199344, 'steps': 4152, 'loss/train': 1.9094969034194946} +07/25/2024 11:44:52 - INFO - __main__ - Step 4154: {'lr': 0.0004993403824101495, 'samples': 199392, 'steps': 4153, 'loss/train': 3.1149206161499023} +07/25/2024 11:44:52 - INFO - __main__ - Step 4155: {'lr': 0.0004993400004682574, 'samples': 199440, 'steps': 4154, 'loss/train': 2.099217414855957} +07/25/2024 11:44:53 - INFO - __main__ - Step 4156: {'lr': 0.0004993396184159646, 'samples': 199488, 'steps': 4155, 'loss/train': 3.182431697845459} +07/25/2024 11:44:53 - DEBUG - datasets.packaged_modules.json.json - Batch of 10487267 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:44:53 - INFO - __main__ - Step 4157: {'lr': 0.0004993392362532712, 'samples': 199536, 'steps': 4156, 'loss/train': 2.6365878582000732} +07/25/2024 11:44:53 - INFO - __main__ - Step 4158: {'lr': 0.0004993388539801773, 'samples': 199584, 'steps': 4157, 'loss/train': 2.7265233993530273} +07/25/2024 11:44:54 - INFO - __main__ - Step 4159: {'lr': 0.0004993384715966832, 'samples': 199632, 'steps': 4158, 'loss/train': 2.5410268306732178} +07/25/2024 11:44:54 - INFO - __main__ - Step 4160: {'lr': 0.0004993380891027889, 'samples': 199680, 'steps': 4159, 'loss/train': 2.2149524688720703} +07/25/2024 11:44:54 - INFO - __main__ - Step 4161: {'lr': 0.0004993377064984949, 'samples': 199728, 'steps': 4160, 'loss/train': 2.3493266105651855} +07/25/2024 11:44:54 - INFO - __main__ - Step 4162: {'lr': 0.000499337323783801, 'samples': 199776, 'steps': 4161, 'loss/train': 2.8862102031707764} +07/25/2024 11:44:55 - INFO - __main__ - Step 4163: {'lr': 0.0004993369409587075, 'samples': 199824, 'steps': 4162, 'loss/train': 1.9541131258010864} +07/25/2024 11:44:55 - INFO - __main__ - Step 4164: {'lr': 0.0004993365580232146, 'samples': 199872, 'steps': 4163, 'loss/train': 1.8984540700912476} +07/25/2024 11:44:55 - INFO - __main__ - Step 4165: {'lr': 0.0004993361749773225, 'samples': 199920, 'steps': 4164, 'loss/train': 2.261218309402466} +07/25/2024 11:44:56 - INFO - __main__ - Step 4166: {'lr': 0.0004993357918210314, 'samples': 199968, 'steps': 4165, 'loss/train': 2.2773149013519287} +07/25/2024 11:44:56 - INFO - __main__ - Step 4167: {'lr': 0.0004993354085543413, 'samples': 200016, 'steps': 4166, 'loss/train': 2.190098762512207} +07/25/2024 11:44:56 - INFO - __main__ - Step 4168: {'lr': 0.0004993350251772525, 'samples': 200064, 'steps': 4167, 'loss/train': 1.9576964378356934} +07/25/2024 11:44:56 - INFO - __main__ - Step 4169: {'lr': 0.0004993346416897651, 'samples': 200112, 'steps': 4168, 'loss/train': 1.9315770864486694} +07/25/2024 11:44:57 - INFO - __main__ - Step 4170: {'lr': 0.0004993342580918794, 'samples': 200160, 'steps': 4169, 'loss/train': 2.3262555599212646} +07/25/2024 11:44:57 - INFO - __main__ - Step 4171: {'lr': 0.0004993338743835954, 'samples': 200208, 'steps': 4170, 'loss/train': 2.128384828567505} +07/25/2024 11:44:57 - INFO - __main__ - Step 4172: {'lr': 0.0004993334905649133, 'samples': 200256, 'steps': 4171, 'loss/train': 1.9328382015228271} +07/25/2024 11:44:57 - INFO - __main__ - Step 4173: {'lr': 0.0004993331066358333, 'samples': 200304, 'steps': 4172, 'loss/train': 1.7228145599365234} +07/25/2024 11:44:58 - INFO - __main__ - Step 4174: {'lr': 0.0004993327225963558, 'samples': 200352, 'steps': 4173, 'loss/train': 2.4372034072875977} +07/25/2024 11:44:58 - DEBUG - datasets.packaged_modules.json.json - Batch of 10512417 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:44:58 - INFO - __main__ - Step 4175: {'lr': 0.0004993323384464805, 'samples': 200400, 'steps': 4174, 'loss/train': 2.427626132965088} +07/25/2024 11:44:58 - INFO - __main__ - Step 4176: {'lr': 0.000499331954186208, 'samples': 200448, 'steps': 4175, 'loss/train': 2.427145481109619} +07/25/2024 11:44:59 - INFO - __main__ - Step 4177: {'lr': 0.0004993315698155383, 'samples': 200496, 'steps': 4176, 'loss/train': 2.28497052192688} +07/25/2024 11:44:59 - INFO - __main__ - Step 4178: {'lr': 0.0004993311853344714, 'samples': 200544, 'steps': 4177, 'loss/train': 2.4098892211914062} +07/25/2024 11:44:59 - INFO - __main__ - Step 4179: {'lr': 0.0004993308007430078, 'samples': 200592, 'steps': 4178, 'loss/train': 2.2719156742095947} +07/25/2024 11:44:59 - INFO - __main__ - Step 4180: {'lr': 0.0004993304160411475, 'samples': 200640, 'steps': 4179, 'loss/train': 2.7807657718658447} +07/25/2024 11:45:00 - INFO - __main__ - Step 4181: {'lr': 0.0004993300312288905, 'samples': 200688, 'steps': 4180, 'loss/train': 1.9224132299423218} +07/25/2024 11:45:00 - INFO - __main__ - Step 4182: {'lr': 0.0004993296463062373, 'samples': 200736, 'steps': 4181, 'loss/train': 1.9911619424819946} +07/25/2024 11:45:00 - INFO - __main__ - Step 4183: {'lr': 0.0004993292612731879, 'samples': 200784, 'steps': 4182, 'loss/train': 2.725829601287842} +07/25/2024 11:45:01 - INFO - __main__ - Step 4184: {'lr': 0.0004993288761297425, 'samples': 200832, 'steps': 4183, 'loss/train': 2.347118854522705} +07/25/2024 11:45:01 - INFO - __main__ - Step 4185: {'lr': 0.0004993284908759013, 'samples': 200880, 'steps': 4184, 'loss/train': 2.0236480236053467} +07/25/2024 11:45:01 - INFO - __main__ - Step 4186: {'lr': 0.0004993281055116643, 'samples': 200928, 'steps': 4185, 'loss/train': 2.77817702293396} +07/25/2024 11:45:01 - INFO - __main__ - Step 4187: {'lr': 0.0004993277200370319, 'samples': 200976, 'steps': 4186, 'loss/train': 2.3380179405212402} +07/25/2024 11:45:02 - INFO - __main__ - Step 4188: {'lr': 0.0004993273344520042, 'samples': 201024, 'steps': 4187, 'loss/train': 1.9899202585220337} +07/25/2024 11:45:02 - INFO - __main__ - Step 4189: {'lr': 0.0004993269487565812, 'samples': 201072, 'steps': 4188, 'loss/train': 2.3377673625946045} +07/25/2024 11:45:02 - INFO - __main__ - Step 4190: {'lr': 0.0004993265629507634, 'samples': 201120, 'steps': 4189, 'loss/train': 2.478104591369629} +07/25/2024 11:45:03 - INFO - __main__ - Step 4191: {'lr': 0.0004993261770345507, 'samples': 201168, 'steps': 4190, 'loss/train': 3.28354811668396} +07/25/2024 11:45:03 - INFO - __main__ - Step 4192: {'lr': 0.0004993257910079433, 'samples': 201216, 'steps': 4191, 'loss/train': 2.6464309692382812} +07/25/2024 11:45:03 - INFO - __main__ - Step 4193: {'lr': 0.0004993254048709415, 'samples': 201264, 'steps': 4192, 'loss/train': 2.5580146312713623} +07/25/2024 11:45:03 - INFO - __main__ - Step 4194: {'lr': 0.0004993250186235453, 'samples': 201312, 'steps': 4193, 'loss/train': 2.94477915763855} +07/25/2024 11:45:04 - INFO - __main__ - Step 4195: {'lr': 0.000499324632265755, 'samples': 201360, 'steps': 4194, 'loss/train': 2.8157384395599365} +07/25/2024 11:45:04 - INFO - __main__ - Step 4196: {'lr': 0.0004993242457975708, 'samples': 201408, 'steps': 4195, 'loss/train': 2.585756301879883} +07/25/2024 11:45:04 - INFO - __main__ - Step 4197: {'lr': 0.0004993238592189928, 'samples': 201456, 'steps': 4196, 'loss/train': 1.7086182832717896} +07/25/2024 11:45:05 - INFO - __main__ - Step 4198: {'lr': 0.0004993234725300212, 'samples': 201504, 'steps': 4197, 'loss/train': 3.004218816757202} +07/25/2024 11:45:05 - INFO - __main__ - Step 4199: {'lr': 0.0004993230857306561, 'samples': 201552, 'steps': 4198, 'loss/train': 2.562262535095215} +07/25/2024 11:45:05 - INFO - __main__ - Step 4200: {'lr': 0.0004993226988208977, 'samples': 201600, 'steps': 4199, 'loss/train': 2.0022895336151123} +07/25/2024 11:45:05 - INFO - __main__ - Step 4201: {'lr': 0.0004993223118007462, 'samples': 201648, 'steps': 4200, 'loss/train': 1.2544910907745361} +07/25/2024 11:45:06 - INFO - __main__ - Step 4202: {'lr': 0.0004993219246702018, 'samples': 201696, 'steps': 4201, 'loss/train': 2.303770065307617} +07/25/2024 11:45:06 - INFO - __main__ - Step 4203: {'lr': 0.0004993215374292647, 'samples': 201744, 'steps': 4202, 'loss/train': 2.4371426105499268} +07/25/2024 11:45:06 - INFO - __main__ - Step 4204: {'lr': 0.0004993211500779348, 'samples': 201792, 'steps': 4203, 'loss/train': 2.6943492889404297} +07/25/2024 11:45:07 - INFO - __main__ - Step 4205: {'lr': 0.0004993207626162127, 'samples': 201840, 'steps': 4204, 'loss/train': 1.7797795534133911} +07/25/2024 11:45:07 - INFO - __main__ - Step 4206: {'lr': 0.0004993203750440984, 'samples': 201888, 'steps': 4205, 'loss/train': 2.041520833969116} +07/25/2024 11:45:07 - INFO - __main__ - Step 4207: {'lr': 0.0004993199873615918, 'samples': 201936, 'steps': 4206, 'loss/train': 2.3423986434936523} +07/25/2024 11:45:07 - INFO - __main__ - Step 4208: {'lr': 0.0004993195995686934, 'samples': 201984, 'steps': 4207, 'loss/train': 2.131133556365967} +07/25/2024 11:45:08 - INFO - __main__ - Step 4209: {'lr': 0.0004993192116654033, 'samples': 202032, 'steps': 4208, 'loss/train': 2.8144078254699707} +07/25/2024 11:45:08 - INFO - __main__ - Step 4210: {'lr': 0.0004993188236517216, 'samples': 202080, 'steps': 4209, 'loss/train': 2.125744104385376} +07/25/2024 11:45:08 - INFO - __main__ - Step 4211: {'lr': 0.0004993184355276485, 'samples': 202128, 'steps': 4210, 'loss/train': 2.545719861984253} +07/25/2024 11:45:09 - INFO - __main__ - Step 4212: {'lr': 0.0004993180472931842, 'samples': 202176, 'steps': 4211, 'loss/train': 1.896785855293274} +07/25/2024 11:45:09 - INFO - __main__ - Step 4213: {'lr': 0.0004993176589483288, 'samples': 202224, 'steps': 4212, 'loss/train': 2.6632258892059326} +07/25/2024 11:45:09 - INFO - __main__ - Step 4214: {'lr': 0.0004993172704930826, 'samples': 202272, 'steps': 4213, 'loss/train': 2.7375595569610596} +07/25/2024 11:45:09 - INFO - __main__ - Step 4215: {'lr': 0.0004993168819274458, 'samples': 202320, 'steps': 4214, 'loss/train': 2.2277872562408447} +07/25/2024 11:45:10 - INFO - __main__ - Step 4216: {'lr': 0.0004993164932514183, 'samples': 202368, 'steps': 4215, 'loss/train': 2.398463010787964} +07/25/2024 11:45:10 - INFO - __main__ - Step 4217: {'lr': 0.0004993161044650004, 'samples': 202416, 'steps': 4216, 'loss/train': 2.3047804832458496} +07/25/2024 11:45:10 - INFO - __main__ - Step 4218: {'lr': 0.0004993157155681924, 'samples': 202464, 'steps': 4217, 'loss/train': 2.7152860164642334} +07/25/2024 11:45:11 - INFO - __main__ - Step 4219: {'lr': 0.0004993153265609945, 'samples': 202512, 'steps': 4218, 'loss/train': 1.8714675903320312} +07/25/2024 11:45:11 - INFO - __main__ - Step 4220: {'lr': 0.0004993149374434066, 'samples': 202560, 'steps': 4219, 'loss/train': 2.4097983837127686} +07/25/2024 11:45:11 - INFO - __main__ - Step 4221: {'lr': 0.000499314548215429, 'samples': 202608, 'steps': 4220, 'loss/train': 2.536639451980591} +07/25/2024 11:45:11 - INFO - __main__ - Step 4222: {'lr': 0.0004993141588770621, 'samples': 202656, 'steps': 4221, 'loss/train': 2.1901321411132812} +07/25/2024 11:45:12 - INFO - __main__ - Step 4223: {'lr': 0.0004993137694283057, 'samples': 202704, 'steps': 4222, 'loss/train': 2.1203722953796387} +07/25/2024 11:45:12 - INFO - __main__ - Step 4224: {'lr': 0.0004993133798691603, 'samples': 202752, 'steps': 4223, 'loss/train': 2.1864752769470215} +07/25/2024 11:45:12 - INFO - __main__ - Step 4225: {'lr': 0.0004993129901996258, 'samples': 202800, 'steps': 4224, 'loss/train': 2.729058265686035} +07/25/2024 11:45:13 - INFO - __main__ - Step 4226: {'lr': 0.0004993126004197025, 'samples': 202848, 'steps': 4225, 'loss/train': 2.1359643936157227} +07/25/2024 11:45:13 - INFO - __main__ - Step 4227: {'lr': 0.0004993122105293906, 'samples': 202896, 'steps': 4226, 'loss/train': 2.8726046085357666} +07/25/2024 11:45:13 - INFO - __main__ - Step 4228: {'lr': 0.0004993118205286901, 'samples': 202944, 'steps': 4227, 'loss/train': 2.616323947906494} +07/25/2024 11:45:13 - INFO - __main__ - Step 4229: {'lr': 0.0004993114304176015, 'samples': 202992, 'steps': 4228, 'loss/train': 1.5519304275512695} +07/25/2024 11:45:14 - INFO - __main__ - Step 4230: {'lr': 0.0004993110401961248, 'samples': 203040, 'steps': 4229, 'loss/train': 2.2255356311798096} +07/25/2024 11:45:14 - INFO - __main__ - Step 4231: {'lr': 0.00049931064986426, 'samples': 203088, 'steps': 4230, 'loss/train': 2.2012014389038086} +07/25/2024 11:45:14 - INFO - __main__ - Step 4232: {'lr': 0.0004993102594220076, 'samples': 203136, 'steps': 4231, 'loss/train': 2.1695592403411865} +07/25/2024 11:45:15 - INFO - __main__ - Step 4233: {'lr': 0.0004993098688693675, 'samples': 203184, 'steps': 4232, 'loss/train': 3.1616108417510986} +07/25/2024 11:45:15 - INFO - __main__ - Step 4234: {'lr': 0.00049930947820634, 'samples': 203232, 'steps': 4233, 'loss/train': 2.571950912475586} +07/25/2024 11:45:15 - INFO - __main__ - Step 4235: {'lr': 0.0004993090874329252, 'samples': 203280, 'steps': 4234, 'loss/train': 2.690931558609009} +07/25/2024 11:45:15 - INFO - __main__ - Step 4236: {'lr': 0.0004993086965491233, 'samples': 203328, 'steps': 4235, 'loss/train': 2.4288036823272705} +07/25/2024 11:45:16 - INFO - __main__ - Step 4237: {'lr': 0.0004993083055549346, 'samples': 203376, 'steps': 4236, 'loss/train': 2.6115646362304688} +07/25/2024 11:45:16 - INFO - __main__ - Step 4238: {'lr': 0.0004993079144503591, 'samples': 203424, 'steps': 4237, 'loss/train': 2.306856632232666} +07/25/2024 11:45:16 - INFO - __main__ - Step 4239: {'lr': 0.0004993075232353971, 'samples': 203472, 'steps': 4238, 'loss/train': 2.3542113304138184} +07/25/2024 11:45:17 - INFO - __main__ - Step 4240: {'lr': 0.0004993071319100487, 'samples': 203520, 'steps': 4239, 'loss/train': 2.3797271251678467} +07/25/2024 11:45:17 - INFO - __main__ - Step 4241: {'lr': 0.0004993067404743141, 'samples': 203568, 'steps': 4240, 'loss/train': 2.0188825130462646} +07/25/2024 11:45:17 - INFO - __main__ - Step 4242: {'lr': 0.0004993063489281933, 'samples': 203616, 'steps': 4241, 'loss/train': 2.0118649005889893} +07/25/2024 11:45:17 - INFO - __main__ - Step 4243: {'lr': 0.0004993059572716869, 'samples': 203664, 'steps': 4242, 'loss/train': 2.5300025939941406} +07/25/2024 11:45:18 - INFO - __main__ - Step 4244: {'lr': 0.0004993055655047946, 'samples': 203712, 'steps': 4243, 'loss/train': 2.829883337020874} +07/25/2024 11:45:18 - INFO - __main__ - Step 4245: {'lr': 0.0004993051736275168, 'samples': 203760, 'steps': 4244, 'loss/train': 2.94853138923645} +07/25/2024 11:45:18 - INFO - __main__ - Step 4246: {'lr': 0.0004993047816398538, 'samples': 203808, 'steps': 4245, 'loss/train': 2.605856418609619} +07/25/2024 11:45:19 - INFO - __main__ - Step 4247: {'lr': 0.0004993043895418055, 'samples': 203856, 'steps': 4246, 'loss/train': 2.0945093631744385} +07/25/2024 11:45:19 - INFO - __main__ - Step 4248: {'lr': 0.0004993039973333721, 'samples': 203904, 'steps': 4247, 'loss/train': 2.268376588821411} +07/25/2024 11:45:19 - INFO - __main__ - Step 4249: {'lr': 0.0004993036050145542, 'samples': 203952, 'steps': 4248, 'loss/train': 2.2731528282165527} +07/25/2024 11:45:19 - INFO - __main__ - Step 4250: {'lr': 0.0004993032125853514, 'samples': 204000, 'steps': 4249, 'loss/train': 2.7105345726013184} +07/25/2024 11:45:20 - INFO - __main__ - Step 4251: {'lr': 0.0004993028200457641, 'samples': 204048, 'steps': 4250, 'loss/train': 1.6677980422973633} +07/25/2024 11:45:20 - INFO - __main__ - Step 4252: {'lr': 0.0004993024273957927, 'samples': 204096, 'steps': 4251, 'loss/train': 2.5199756622314453} +07/25/2024 11:45:20 - INFO - __main__ - Step 4253: {'lr': 0.000499302034635437, 'samples': 204144, 'steps': 4252, 'loss/train': 3.164468288421631} +07/25/2024 11:45:21 - INFO - __main__ - Step 4254: {'lr': 0.0004993016417646974, 'samples': 204192, 'steps': 4253, 'loss/train': 1.9298948049545288} +07/25/2024 11:45:21 - INFO - __main__ - Step 4255: {'lr': 0.000499301248783574, 'samples': 204240, 'steps': 4254, 'loss/train': 2.467778444290161} +07/25/2024 11:45:21 - INFO - __main__ - Step 4256: {'lr': 0.000499300855692067, 'samples': 204288, 'steps': 4255, 'loss/train': 2.046841859817505} +07/25/2024 11:45:21 - INFO - __main__ - Step 4257: {'lr': 0.0004993004624901766, 'samples': 204336, 'steps': 4256, 'loss/train': 2.408585548400879} +07/25/2024 11:45:22 - INFO - __main__ - Step 4258: {'lr': 0.0004993000691779028, 'samples': 204384, 'steps': 4257, 'loss/train': 2.706338405609131} +07/25/2024 11:45:22 - INFO - __main__ - Step 4259: {'lr': 0.0004992996757552461, 'samples': 204432, 'steps': 4258, 'loss/train': 2.1436691284179688} +07/25/2024 11:45:22 - INFO - __main__ - Step 4260: {'lr': 0.0004992992822222065, 'samples': 204480, 'steps': 4259, 'loss/train': 2.4607884883880615} +07/25/2024 11:45:22 - INFO - __main__ - Step 4261: {'lr': 0.0004992988885787839, 'samples': 204528, 'steps': 4260, 'loss/train': 2.437016010284424} +07/25/2024 11:45:23 - INFO - __main__ - Step 4262: {'lr': 0.000499298494824979, 'samples': 204576, 'steps': 4261, 'loss/train': 2.065227746963501} +07/25/2024 11:45:23 - INFO - __main__ - Step 4263: {'lr': 0.0004992981009607916, 'samples': 204624, 'steps': 4262, 'loss/train': 1.9615916013717651} +07/25/2024 11:45:23 - INFO - __main__ - Step 4264: {'lr': 0.0004992977069862221, 'samples': 204672, 'steps': 4263, 'loss/train': 2.074395179748535} +07/25/2024 11:45:24 - DEBUG - datasets.packaged_modules.json.json - Batch of 10487562 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 11:45:24 - INFO - __main__ - Step 4265: {'lr': 0.0004992973129012704, 'samples': 204720, 'steps': 4264, 'loss/train': 2.003657817840576} +07/25/2024 11:45:24 - INFO - __main__ - Step 4266: {'lr': 0.0004992969187059369, 'samples': 204768, 'steps': 4265, 'loss/train': 2.0723843574523926} +07/25/2024 11:45:24 - INFO - __main__ - Step 4267: {'lr': 0.0004992965244002217, 'samples': 204816, 'steps': 4266, 'loss/train': 1.9201899766921997} +07/25/2024 11:45:24 - INFO - __main__ - Step 4268: {'lr': 0.000499296129984125, 'samples': 204864, 'steps': 4267, 'loss/train': 2.316678762435913} +07/25/2024 11:45:25 - INFO - __main__ - Step 4269: {'lr': 0.000499295735457647, 'samples': 204912, 'steps': 4268, 'loss/train': 2.811699628829956} +07/25/2024 11:45:25 - INFO - __main__ - Step 4270: {'lr': 0.0004992953408207878, 'samples': 204960, 'steps': 4269, 'loss/train': 2.9348838329315186} +07/25/2024 11:45:25 - INFO - __main__ - Step 4271: {'lr': 0.0004992949460735476, 'samples': 205008, 'steps': 4270, 'loss/train': 2.086275577545166} +07/25/2024 11:45:26 - INFO - __main__ - Step 4272: {'lr': 0.0004992945512159266, 'samples': 205056, 'steps': 4271, 'loss/train': 2.0373668670654297} +07/25/2024 11:45:26 - INFO - __main__ - Step 4273: {'lr': 0.0004992941562479249, 'samples': 205104, 'steps': 4272, 'loss/train': 2.0170235633850098} +07/25/2024 11:45:26 - INFO - __main__ - Step 4274: {'lr': 0.0004992937611695428, 'samples': 205152, 'steps': 4273, 'loss/train': 1.995599627494812} +07/25/2024 11:45:26 - INFO - __main__ - Step 4275: {'lr': 0.0004992933659807803, 'samples': 205200, 'steps': 4274, 'loss/train': 2.797962188720703} +07/25/2024 11:45:27 - INFO - __main__ - Step 4276: {'lr': 0.0004992929706816379, 'samples': 205248, 'steps': 4275, 'loss/train': 2.557635545730591} +07/25/2024 11:45:27 - INFO - __main__ - Step 4277: {'lr': 0.0004992925752721153, 'samples': 205296, 'steps': 4276, 'loss/train': 5.027467727661133} +07/25/2024 11:45:27 - INFO - __main__ - Step 4278: {'lr': 0.000499292179752213, 'samples': 205344, 'steps': 4277, 'loss/train': 2.467042922973633} +07/25/2024 11:45:28 - INFO - __main__ - Step 4279: {'lr': 0.0004992917841219314, 'samples': 205392, 'steps': 4278, 'loss/train': 2.319767951965332} +07/25/2024 11:45:28 - INFO - __main__ - Step 4280: {'lr': 0.00049929138838127, 'samples': 205440, 'steps': 4279, 'loss/train': 2.4276676177978516} +07/25/2024 11:45:28 - INFO - __main__ - Step 4281: {'lr': 0.0004992909925302296, 'samples': 205488, 'steps': 4280, 'loss/train': 2.2961373329162598} +07/25/2024 11:45:28 - INFO - __main__ - Step 4282: {'lr': 0.00049929059656881, 'samples': 205536, 'steps': 4281, 'loss/train': 2.729497194290161} +07/25/2024 11:45:29 - INFO - __main__ - Step 4283: {'lr': 0.0004992902004970115, 'samples': 205584, 'steps': 4282, 'loss/train': 2.530452251434326} +07/25/2024 11:45:29 - INFO - __main__ - Step 4284: {'lr': 0.0004992898043148344, 'samples': 205632, 'steps': 4283, 'loss/train': 2.355902671813965} +07/25/2024 11:45:29 - INFO - __main__ - Step 4285: {'lr': 0.0004992894080222786, 'samples': 205680, 'steps': 4284, 'loss/train': 2.0834872722625732} +07/25/2024 11:45:30 - INFO - __main__ - Step 4286: {'lr': 0.0004992890116193446, 'samples': 205728, 'steps': 4285, 'loss/train': 2.1463935375213623} +07/25/2024 11:45:30 - INFO - __main__ - Step 4287: {'lr': 0.0004992886151060323, 'samples': 205776, 'steps': 4286, 'loss/train': 1.660485863685608} +07/25/2024 11:45:30 - INFO - __main__ - Step 4288: {'lr': 0.0004992882184823421, 'samples': 205824, 'steps': 4287, 'loss/train': 2.459176778793335} +07/25/2024 11:45:30 - INFO - __main__ - Step 4289: {'lr': 0.0004992878217482739, 'samples': 205872, 'steps': 4288, 'loss/train': 2.1972413063049316} +07/25/2024 11:45:31 - INFO - __main__ - Step 4290: {'lr': 0.0004992874249038282, 'samples': 205920, 'steps': 4289, 'loss/train': 2.5100624561309814} +07/25/2024 11:45:31 - INFO - __main__ - Step 4291: {'lr': 0.0004992870279490049, 'samples': 205968, 'steps': 4290, 'loss/train': 3.182960271835327} +07/25/2024 11:45:31 - INFO - __main__ - Step 4292: {'lr': 0.0004992866308838043, 'samples': 206016, 'steps': 4291, 'loss/train': 2.0092389583587646} +07/25/2024 11:45:32 - INFO - __main__ - Step 4293: {'lr': 0.0004992862337082265, 'samples': 206064, 'steps': 4292, 'loss/train': 2.806396007537842} +07/25/2024 11:45:32 - INFO - __main__ - Step 4294: {'lr': 0.000499285836422272, 'samples': 206112, 'steps': 4293, 'loss/train': 2.958878755569458} +07/25/2024 11:45:32 - INFO - __main__ - Step 4295: {'lr': 0.0004992854390259405, 'samples': 206160, 'steps': 4294, 'loss/train': 2.281064510345459} +07/25/2024 11:45:32 - INFO - __main__ - Step 4296: {'lr': 0.0004992850415192324, 'samples': 206208, 'steps': 4295, 'loss/train': 2.047947645187378} +07/25/2024 11:45:33 - INFO - __main__ - Step 4297: {'lr': 0.0004992846439021479, 'samples': 206256, 'steps': 4296, 'loss/train': 2.0703799724578857} +07/25/2024 11:45:33 - INFO - __main__ - Step 4298: {'lr': 0.0004992842461746872, 'samples': 206304, 'steps': 4297, 'loss/train': 2.315223455429077} +07/25/2024 11:45:33 - INFO - __main__ - Step 4299: {'lr': 0.0004992838483368504, 'samples': 206352, 'steps': 4298, 'loss/train': 1.4964083433151245} +07/25/2024 11:45:34 - INFO - __main__ - Step 4300: {'lr': 0.0004992834503886376, 'samples': 206400, 'steps': 4299, 'loss/train': 2.1888039112091064} +07/25/2024 11:45:34 - INFO - __main__ - Step 4301: {'lr': 0.0004992830523300491, 'samples': 206448, 'steps': 4300, 'loss/train': 4.5857720375061035} +07/25/2024 11:45:34 - INFO - __main__ - Step 4302: {'lr': 0.0004992826541610852, 'samples': 206496, 'steps': 4301, 'loss/train': 2.7875730991363525} +07/25/2024 11:45:34 - INFO - __main__ - Step 4303: {'lr': 0.0004992822558817458, 'samples': 206544, 'steps': 4302, 'loss/train': 1.9458978176116943} +07/25/2024 11:45:35 - INFO - __main__ - Step 4304: {'lr': 0.0004992818574920312, 'samples': 206592, 'steps': 4303, 'loss/train': 2.036431312561035} +07/25/2024 11:45:35 - INFO - __main__ - Step 4305: {'lr': 0.0004992814589919416, 'samples': 206640, 'steps': 4304, 'loss/train': 2.2750253677368164} +07/25/2024 11:45:35 - INFO - __main__ - Step 4306: {'lr': 0.0004992810603814771, 'samples': 206688, 'steps': 4305, 'loss/train': 2.457859516143799} +07/25/2024 11:45:36 - INFO - __main__ - Step 4307: {'lr': 0.000499280661660638, 'samples': 206736, 'steps': 4306, 'loss/train': 3.134495496749878} +07/25/2024 11:45:36 - INFO - __main__ - Step 4308: {'lr': 0.0004992802628294244, 'samples': 206784, 'steps': 4307, 'loss/train': 2.710634231567383} +07/25/2024 11:45:36 - INFO - __main__ - Step 4309: {'lr': 0.0004992798638878365, 'samples': 206832, 'steps': 4308, 'loss/train': 1.634002685546875} +07/25/2024 11:45:36 - INFO - __main__ - Step 4310: {'lr': 0.0004992794648358745, 'samples': 206880, 'steps': 4309, 'loss/train': 0.6422200798988342} +07/25/2024 11:45:37 - INFO - __main__ - Step 4311: {'lr': 0.0004992790656735384, 'samples': 206928, 'steps': 4310, 'loss/train': 2.413313388824463} +07/25/2024 11:45:37 - INFO - __main__ - Step 4312: {'lr': 0.0004992786664008286, 'samples': 206976, 'steps': 4311, 'loss/train': 2.105401039123535} +07/25/2024 11:45:37 - INFO - __main__ - Step 4313: {'lr': 0.0004992782670177452, 'samples': 207024, 'steps': 4312, 'loss/train': 2.1528680324554443} +07/25/2024 11:45:38 - INFO - __main__ - Step 4314: {'lr': 0.0004992778675242883, 'samples': 207072, 'steps': 4313, 'loss/train': 2.8153486251831055} +07/25/2024 11:45:38 - INFO - __main__ - Step 4315: {'lr': 0.0004992774679204583, 'samples': 207120, 'steps': 4314, 'loss/train': 3.889583110809326} +07/25/2024 11:45:38 - INFO - __main__ - Step 4316: {'lr': 0.0004992770682062551, 'samples': 207168, 'steps': 4315, 'loss/train': 1.4201974868774414} +07/25/2024 11:45:38 - INFO - __main__ - Step 4317: {'lr': 0.000499276668381679, 'samples': 207216, 'steps': 4316, 'loss/train': 2.3590025901794434} +07/25/2024 11:45:39 - INFO - __main__ - Step 4318: {'lr': 0.0004992762684467301, 'samples': 207264, 'steps': 4317, 'loss/train': 2.8138389587402344} +07/25/2024 11:45:39 - INFO - __main__ - Step 4319: {'lr': 0.0004992758684014088, 'samples': 207312, 'steps': 4318, 'loss/train': 2.5930449962615967} +07/25/2024 11:45:39 - INFO - __main__ - Step 4320: {'lr': 0.0004992754682457151, 'samples': 207360, 'steps': 4319, 'loss/train': 2.3219504356384277} +07/25/2024 11:45:40 - INFO - __main__ - Step 4321: {'lr': 0.0004992750679796491, 'samples': 207408, 'steps': 4320, 'loss/train': 2.7325363159179688} +07/25/2024 11:45:40 - INFO - __main__ - Step 4322: {'lr': 0.0004992746676032112, 'samples': 207456, 'steps': 4321, 'loss/train': 2.593810558319092} +07/25/2024 11:45:40 - INFO - __main__ - Step 4323: {'lr': 0.0004992742671164014, 'samples': 207504, 'steps': 4322, 'loss/train': 1.8395766019821167} +07/25/2024 11:45:40 - INFO - __main__ - Step 4324: {'lr': 0.00049927386651922, 'samples': 207552, 'steps': 4323, 'loss/train': 1.361940622329712} +07/25/2024 11:45:41 - INFO - __main__ - Step 4325: {'lr': 0.0004992734658116671, 'samples': 207600, 'steps': 4324, 'loss/train': 4.719241142272949} +07/25/2024 11:45:41 - INFO - __main__ - Step 4326: {'lr': 0.0004992730649937429, 'samples': 207648, 'steps': 4325, 'loss/train': 2.630531072616577} +07/25/2024 11:45:41 - INFO - __main__ - Step 4327: {'lr': 0.0004992726640654474, 'samples': 207696, 'steps': 4326, 'loss/train': 2.2725937366485596} +07/25/2024 11:45:42 - INFO - __main__ - Step 4328: {'lr': 0.0004992722630267812, 'samples': 207744, 'steps': 4327, 'loss/train': 2.3824431896209717} +07/25/2024 11:45:42 - INFO - __main__ - Step 4329: {'lr': 0.0004992718618777441, 'samples': 207792, 'steps': 4328, 'loss/train': 2.6938703060150146} +07/25/2024 11:45:42 - INFO - __main__ - Step 4330: {'lr': 0.0004992714606183365, 'samples': 207840, 'steps': 4329, 'loss/train': 2.444098949432373} +07/25/2024 11:45:42 - INFO - __main__ - Step 4331: {'lr': 0.0004992710592485583, 'samples': 207888, 'steps': 4330, 'loss/train': 3.0523176193237305} +07/25/2024 11:45:43 - INFO - __main__ - Step 4332: {'lr': 0.0004992706577684101, 'samples': 207936, 'steps': 4331, 'loss/train': 2.5037755966186523} +07/25/2024 11:45:43 - INFO - __main__ - Step 4333: {'lr': 0.0004992702561778916, 'samples': 207984, 'steps': 4332, 'loss/train': 1.5920299291610718} +07/25/2024 11:45:43 - INFO - __main__ - Step 4334: {'lr': 0.0004992698544770034, 'samples': 208032, 'steps': 4333, 'loss/train': 0.414475679397583} +07/25/2024 11:45:43 - INFO - __main__ - Step 4335: {'lr': 0.0004992694526657454, 'samples': 208080, 'steps': 4334, 'loss/train': 2.6431477069854736} +07/25/2024 11:45:44 - INFO - __main__ - Step 4336: {'lr': 0.0004992690507441179, 'samples': 208128, 'steps': 4335, 'loss/train': 2.6033644676208496} +07/25/2024 11:45:44 - INFO - __main__ - Step 4337: {'lr': 0.000499268648712121, 'samples': 208176, 'steps': 4336, 'loss/train': 2.064070701599121} +07/25/2024 11:45:44 - INFO - __main__ - Step 4338: {'lr': 0.0004992682465697549, 'samples': 208224, 'steps': 4337, 'loss/train': 2.6844522953033447} +07/25/2024 11:45:45 - INFO - __main__ - Step 4339: {'lr': 0.0004992678443170199, 'samples': 208272, 'steps': 4338, 'loss/train': 4.591844081878662} +07/25/2024 11:45:45 - INFO - __main__ - Step 4340: {'lr': 0.000499267441953916, 'samples': 208320, 'steps': 4339, 'loss/train': 1.4844462871551514} +07/25/2024 11:45:45 - INFO - __main__ - Step 4341: {'lr': 0.0004992670394804435, 'samples': 208368, 'steps': 4340, 'loss/train': 2.140204429626465} +07/25/2024 11:45:45 - INFO - __main__ - Step 4342: {'lr': 0.0004992666368966026, 'samples': 208416, 'steps': 4341, 'loss/train': 2.3666257858276367} +07/25/2024 11:45:46 - INFO - __main__ - Step 4343: {'lr': 0.0004992662342023933, 'samples': 208464, 'steps': 4342, 'loss/train': 3.3194243907928467} +07/25/2024 11:45:46 - INFO - __main__ - Step 4344: {'lr': 0.000499265831397816, 'samples': 208512, 'steps': 4343, 'loss/train': 2.3515799045562744} +07/25/2024 11:45:46 - INFO - __main__ - Step 4345: {'lr': 0.0004992654284828708, 'samples': 208560, 'steps': 4344, 'loss/train': 2.4010748863220215} +07/25/2024 11:45:47 - INFO - __main__ - Step 4346: {'lr': 0.0004992650254575578, 'samples': 208608, 'steps': 4345, 'loss/train': 2.4308528900146484} +07/25/2024 11:45:47 - INFO - __main__ - Step 4347: {'lr': 0.0004992646223218772, 'samples': 208656, 'steps': 4346, 'loss/train': 2.0148568153381348} +07/25/2024 11:45:47 - INFO - __main__ - Step 4348: {'lr': 0.0004992642190758292, 'samples': 208704, 'steps': 4347, 'loss/train': 2.3548405170440674} +07/25/2024 11:45:47 - INFO - __main__ - Step 4349: {'lr': 0.000499263815719414, 'samples': 208752, 'steps': 4348, 'loss/train': 4.940942764282227} +07/25/2024 11:45:48 - INFO - __main__ - Step 4350: {'lr': 0.0004992634122526318, 'samples': 208800, 'steps': 4349, 'loss/train': 2.448582410812378} +07/25/2024 11:45:48 - INFO - __main__ - Step 4351: {'lr': 0.0004992630086754828, 'samples': 208848, 'steps': 4350, 'loss/train': 2.5525741577148438} +07/25/2024 11:45:48 - INFO - __main__ - Step 4352: {'lr': 0.0004992626049879669, 'samples': 208896, 'steps': 4351, 'loss/train': 2.3469362258911133} +07/25/2024 11:45:49 - INFO - __main__ - Step 4353: {'lr': 0.0004992622011900848, 'samples': 208944, 'steps': 4352, 'loss/train': 3.0171875953674316} +07/25/2024 11:45:49 - INFO - __main__ - Step 4354: {'lr': 0.0004992617972818362, 'samples': 208992, 'steps': 4353, 'loss/train': 2.4163172245025635} +07/25/2024 11:45:49 - INFO - __main__ - Step 4355: {'lr': 0.0004992613932632216, 'samples': 209040, 'steps': 4354, 'loss/train': 2.754922866821289} +07/25/2024 11:45:49 - INFO - __main__ - Step 4356: {'lr': 0.0004992609891342408, 'samples': 209088, 'steps': 4355, 'loss/train': 0.8338007926940918} +07/25/2024 11:45:50 - INFO - __main__ - Step 4357: {'lr': 0.0004992605848948943, 'samples': 209136, 'steps': 4356, 'loss/train': 2.3344857692718506} +07/25/2024 11:45:50 - INFO - __main__ - Step 4358: {'lr': 0.0004992601805451823, 'samples': 209184, 'steps': 4357, 'loss/train': 0.34429699182510376} +07/25/2024 11:45:50 - INFO - __main__ - Step 4359: {'lr': 0.0004992597760851049, 'samples': 209232, 'steps': 4358, 'loss/train': 2.503654956817627} +07/25/2024 11:45:51 - INFO - __main__ - Step 4360: {'lr': 0.0004992593715146622, 'samples': 209280, 'steps': 4359, 'loss/train': 2.993178129196167} +07/25/2024 11:45:51 - INFO - __main__ - Step 4361: {'lr': 0.0004992589668338543, 'samples': 209328, 'steps': 4360, 'loss/train': 2.4621548652648926} +07/25/2024 11:45:51 - INFO - __main__ - Step 4362: {'lr': 0.0004992585620426817, 'samples': 209376, 'steps': 4361, 'loss/train': 2.113550901412964} +07/25/2024 11:45:51 - INFO - __main__ - Step 4363: {'lr': 0.0004992581571411443, 'samples': 209424, 'steps': 4362, 'loss/train': 4.196419715881348} +07/25/2024 11:45:52 - INFO - __main__ - Step 4364: {'lr': 0.0004992577521292424, 'samples': 209472, 'steps': 4363, 'loss/train': 2.11497163772583} +07/25/2024 11:45:52 - INFO - __main__ - Step 4365: {'lr': 0.0004992573470069761, 'samples': 209520, 'steps': 4364, 'loss/train': 1.8029972314834595} +07/25/2024 11:45:52 - INFO - __main__ - Step 4366: {'lr': 0.0004992569417743457, 'samples': 209568, 'steps': 4365, 'loss/train': 2.1351125240325928} +07/25/2024 11:45:53 - INFO - __main__ - Step 4367: {'lr': 0.0004992565364313513, 'samples': 209616, 'steps': 4366, 'loss/train': 2.6785709857940674} +07/25/2024 11:45:53 - INFO - __main__ - Step 4368: {'lr': 0.0004992561309779931, 'samples': 209664, 'steps': 4367, 'loss/train': 3.0343241691589355} +07/25/2024 11:45:53 - INFO - __main__ - Step 4369: {'lr': 0.0004992557254142712, 'samples': 209712, 'steps': 4368, 'loss/train': 2.4996087551116943} +07/25/2024 11:45:53 - INFO - __main__ - Step 4370: {'lr': 0.0004992553197401859, 'samples': 209760, 'steps': 4369, 'loss/train': 2.6241302490234375} +07/25/2024 11:45:54 - INFO - __main__ - Step 4371: {'lr': 0.0004992549139557373, 'samples': 209808, 'steps': 4370, 'loss/train': 2.250387191772461} +07/25/2024 11:45:54 - INFO - __main__ - Step 4372: {'lr': 0.0004992545080609257, 'samples': 209856, 'steps': 4371, 'loss/train': 1.7641083002090454} +07/25/2024 11:45:54 - INFO - __main__ - Step 4373: {'lr': 0.0004992541020557511, 'samples': 209904, 'steps': 4372, 'loss/train': 1.3997673988342285} +07/25/2024 11:45:55 - INFO - __main__ - Step 4374: {'lr': 0.0004992536959402138, 'samples': 209952, 'steps': 4373, 'loss/train': 2.96522855758667} +07/25/2024 11:45:55 - INFO - __main__ - Step 4375: {'lr': 0.0004992532897143139, 'samples': 210000, 'steps': 4374, 'loss/train': 2.339970350265503} +07/25/2024 11:45:55 - INFO - __main__ - Step 4376: {'lr': 0.0004992528833780517, 'samples': 210048, 'steps': 4375, 'loss/train': 2.629485845565796} +07/25/2024 11:45:55 - INFO - __main__ - Step 4377: {'lr': 0.0004992524769314272, 'samples': 210096, 'steps': 4376, 'loss/train': 2.660001039505005} +07/25/2024 11:45:56 - INFO - __main__ - Step 4378: {'lr': 0.0004992520703744409, 'samples': 210144, 'steps': 4377, 'loss/train': 2.313253402709961} +07/25/2024 11:45:56 - INFO - __main__ - Step 4379: {'lr': 0.0004992516637070926, 'samples': 210192, 'steps': 4378, 'loss/train': 2.902031183242798} +07/25/2024 11:45:56 - INFO - __main__ - Step 4380: {'lr': 0.0004992512569293826, 'samples': 210240, 'steps': 4379, 'loss/train': 2.4150843620300293} +07/25/2024 11:45:57 - INFO - __main__ - Step 4381: {'lr': 0.0004992508500413112, 'samples': 210288, 'steps': 4380, 'loss/train': 2.5543673038482666} +07/25/2024 11:45:57 - INFO - __main__ - Step 4382: {'lr': 0.0004992504430428786, 'samples': 210336, 'steps': 4381, 'loss/train': 0.39632782340049744} +07/25/2024 11:45:57 - INFO - __main__ - Step 4383: {'lr': 0.0004992500359340847, 'samples': 210384, 'steps': 4382, 'loss/train': 2.457228899002075} +07/25/2024 11:45:57 - INFO - __main__ - Step 4384: {'lr': 0.0004992496287149301, 'samples': 210432, 'steps': 4383, 'loss/train': 4.905389785766602} +07/25/2024 11:45:58 - INFO - __main__ - Step 4385: {'lr': 0.0004992492213854146, 'samples': 210480, 'steps': 4384, 'loss/train': 1.4178279638290405} +07/25/2024 11:45:58 - INFO - __main__ - Step 4386: {'lr': 0.0004992488139455385, 'samples': 210528, 'steps': 4385, 'loss/train': 0.686678946018219} +07/25/2024 11:45:58 - INFO - __main__ - Step 4387: {'lr': 0.0004992484063953021, 'samples': 210576, 'steps': 4386, 'loss/train': 3.989506244659424} +07/25/2024 11:45:59 - INFO - __main__ - Step 4388: {'lr': 0.0004992479987347054, 'samples': 210624, 'steps': 4387, 'loss/train': 2.3716301918029785} +07/25/2024 11:45:59 - INFO - __main__ - Step 4389: {'lr': 0.0004992475909637486, 'samples': 210672, 'steps': 4388, 'loss/train': 1.5895341634750366} +07/25/2024 11:45:59 - INFO - __main__ - Step 4390: {'lr': 0.0004992471830824321, 'samples': 210720, 'steps': 4389, 'loss/train': 2.2962608337402344} +07/25/2024 11:45:59 - INFO - __main__ - Step 4391: {'lr': 0.0004992467750907559, 'samples': 210768, 'steps': 4390, 'loss/train': 2.2714340686798096} +07/25/2024 11:46:00 - INFO - __main__ - Step 4392: {'lr': 0.0004992463669887202, 'samples': 210816, 'steps': 4391, 'loss/train': 2.831747055053711} +07/25/2024 11:46:00 - INFO - __main__ - Step 4393: {'lr': 0.0004992459587763252, 'samples': 210864, 'steps': 4392, 'loss/train': 2.274494171142578} +07/25/2024 11:46:00 - INFO - __main__ - Step 4394: {'lr': 0.000499245550453571, 'samples': 210912, 'steps': 4393, 'loss/train': 2.43046236038208} +07/25/2024 11:46:01 - INFO - __main__ - Step 4395: {'lr': 0.000499245142020458, 'samples': 210960, 'steps': 4394, 'loss/train': 2.499318838119507} +07/25/2024 11:46:01 - INFO - __main__ - Step 4396: {'lr': 0.0004992447334769862, 'samples': 211008, 'steps': 4395, 'loss/train': 1.788936972618103} +07/25/2024 11:46:01 - INFO - __main__ - Step 4397: {'lr': 0.0004992443248231557, 'samples': 211056, 'steps': 4396, 'loss/train': 1.8276575803756714} +07/25/2024 11:46:01 - INFO - __main__ - Step 4398: {'lr': 0.0004992439160589669, 'samples': 211104, 'steps': 4397, 'loss/train': 2.0498623847961426} +07/25/2024 11:46:02 - INFO - __main__ - Step 4399: {'lr': 0.0004992435071844198, 'samples': 211152, 'steps': 4398, 'loss/train': 2.1392359733581543} +07/25/2024 11:46:02 - INFO - __main__ - Step 4400: {'lr': 0.0004992430981995147, 'samples': 211200, 'steps': 4399, 'loss/train': 3.134134292602539} +07/25/2024 11:46:02 - INFO - __main__ - Step 4401: {'lr': 0.0004992426891042517, 'samples': 211248, 'steps': 4400, 'loss/train': 2.672778606414795} +07/25/2024 11:46:03 - INFO - __main__ - Step 4402: {'lr': 0.0004992422798986312, 'samples': 211296, 'steps': 4401, 'loss/train': 2.2195324897766113} +07/25/2024 11:46:03 - INFO - __main__ - Step 4403: {'lr': 0.000499241870582653, 'samples': 211344, 'steps': 4402, 'loss/train': 2.463639974594116} +07/25/2024 11:46:03 - INFO - __main__ - Step 4404: {'lr': 0.0004992414611563175, 'samples': 211392, 'steps': 4403, 'loss/train': 1.9991735219955444} +07/25/2024 11:46:03 - INFO - __main__ - Step 4405: {'lr': 0.000499241051619625, 'samples': 211440, 'steps': 4404, 'loss/train': 2.4528098106384277} +07/25/2024 11:46:04 - INFO - __main__ - Step 4406: {'lr': 0.0004992406419725754, 'samples': 211488, 'steps': 4405, 'loss/train': 0.35940998792648315} +07/25/2024 11:46:04 - INFO - __main__ - Step 4407: {'lr': 0.0004992402322151692, 'samples': 211536, 'steps': 4406, 'loss/train': 2.7754855155944824} +07/25/2024 11:46:04 - INFO - __main__ - Step 4408: {'lr': 0.0004992398223474063, 'samples': 211584, 'steps': 4407, 'loss/train': 2.743269681930542} +07/25/2024 11:46:04 - INFO - __main__ - Step 4409: {'lr': 0.0004992394123692869, 'samples': 211632, 'steps': 4408, 'loss/train': 1.9506940841674805} +07/25/2024 11:46:05 - INFO - __main__ - Step 4410: {'lr': 0.0004992390022808115, 'samples': 211680, 'steps': 4409, 'loss/train': 1.7444158792495728} +07/25/2024 11:46:05 - INFO - __main__ - Step 4411: {'lr': 0.0004992385920819798, 'samples': 211728, 'steps': 4410, 'loss/train': 3.9626498222351074} +07/25/2024 11:46:05 - INFO - __main__ - Step 4412: {'lr': 0.0004992381817727924, 'samples': 211776, 'steps': 4411, 'loss/train': 2.8791096210479736} +07/25/2024 11:46:06 - INFO - __main__ - Step 4413: {'lr': 0.0004992377713532492, 'samples': 211824, 'steps': 4412, 'loss/train': 2.132880687713623} +07/25/2024 11:46:06 - INFO - __main__ - Step 4414: {'lr': 0.0004992373608233506, 'samples': 211872, 'steps': 4413, 'loss/train': 2.4769270420074463} +07/25/2024 11:46:06 - INFO - __main__ - Step 4415: {'lr': 0.0004992369501830966, 'samples': 211920, 'steps': 4414, 'loss/train': 2.134791612625122} +07/25/2024 11:46:06 - INFO - __main__ - Step 4416: {'lr': 0.0004992365394324875, 'samples': 211968, 'steps': 4415, 'loss/train': 1.7853995561599731} +07/25/2024 11:46:07 - INFO - __main__ - Step 4417: {'lr': 0.0004992361285715235, 'samples': 212016, 'steps': 4416, 'loss/train': 1.7427023649215698} +07/25/2024 11:46:07 - INFO - __main__ - Step 4418: {'lr': 0.0004992357176002047, 'samples': 212064, 'steps': 4417, 'loss/train': 2.096099376678467} +07/25/2024 11:46:07 - INFO - __main__ - Step 4419: {'lr': 0.0004992353065185313, 'samples': 212112, 'steps': 4418, 'loss/train': 2.3010261058807373} +07/25/2024 11:46:08 - INFO - __main__ - Step 4420: {'lr': 0.0004992348953265035, 'samples': 212160, 'steps': 4419, 'loss/train': 1.897314190864563} +07/25/2024 11:46:08 - INFO - __main__ - Step 4421: {'lr': 0.0004992344840241213, 'samples': 212208, 'steps': 4420, 'loss/train': 1.8526846170425415} +07/25/2024 11:46:08 - INFO - __main__ - Step 4422: {'lr': 0.0004992340726113853, 'samples': 212256, 'steps': 4421, 'loss/train': 2.143414258956909} +07/25/2024 11:46:08 - INFO - __main__ - Step 4423: {'lr': 0.0004992336610882953, 'samples': 212304, 'steps': 4422, 'loss/train': 1.8459445238113403} +07/25/2024 11:46:09 - INFO - __main__ - Step 4424: {'lr': 0.0004992332494548517, 'samples': 212352, 'steps': 4423, 'loss/train': 2.4393692016601562} +07/25/2024 11:46:09 - INFO - __main__ - Step 4425: {'lr': 0.0004992328377110546, 'samples': 212400, 'steps': 4424, 'loss/train': 2.1267459392547607} +07/25/2024 11:46:09 - INFO - __main__ - Step 4426: {'lr': 0.0004992324258569041, 'samples': 212448, 'steps': 4425, 'loss/train': 1.9341087341308594} +07/25/2024 11:46:10 - INFO - __main__ - Step 4427: {'lr': 0.0004992320138924004, 'samples': 212496, 'steps': 4426, 'loss/train': 2.1993935108184814} +07/25/2024 11:46:10 - INFO - __main__ - Step 4428: {'lr': 0.0004992316018175439, 'samples': 212544, 'steps': 4427, 'loss/train': 1.9243098497390747} +07/25/2024 11:46:10 - INFO - __main__ - Step 4429: {'lr': 0.0004992311896323346, 'samples': 212592, 'steps': 4428, 'loss/train': 2.528188467025757} +07/25/2024 11:46:10 - INFO - __main__ - Step 4430: {'lr': 0.0004992307773367727, 'samples': 212640, 'steps': 4429, 'loss/train': 0.2947622239589691} +07/25/2024 11:46:11 - INFO - __main__ - Step 4431: {'lr': 0.0004992303649308583, 'samples': 212688, 'steps': 4430, 'loss/train': 2.3717267513275146} +07/25/2024 11:46:11 - INFO - __main__ - Step 4432: {'lr': 0.0004992299524145918, 'samples': 212736, 'steps': 4431, 'loss/train': 2.5352883338928223} +07/25/2024 11:46:11 - INFO - __main__ - Step 4433: {'lr': 0.0004992295397879732, 'samples': 212784, 'steps': 4432, 'loss/train': 2.254422903060913} +07/25/2024 11:46:12 - INFO - __main__ - Step 4434: {'lr': 0.0004992291270510029, 'samples': 212832, 'steps': 4433, 'loss/train': 2.5402140617370605} +07/25/2024 11:46:12 - INFO - __main__ - Step 4435: {'lr': 0.0004992287142036807, 'samples': 212880, 'steps': 4434, 'loss/train': 3.663245677947998} +07/25/2024 11:46:12 - INFO - __main__ - Step 4436: {'lr': 0.0004992283012460072, 'samples': 212928, 'steps': 4435, 'loss/train': 2.1582701206207275} +07/25/2024 11:46:12 - INFO - __main__ - Step 4437: {'lr': 0.0004992278881779822, 'samples': 212976, 'steps': 4436, 'loss/train': 1.9817758798599243} +07/25/2024 11:46:13 - INFO - __main__ - Step 4438: {'lr': 0.0004992274749996061, 'samples': 213024, 'steps': 4437, 'loss/train': 1.822417140007019} +07/25/2024 11:46:13 - INFO - __main__ - Step 4439: {'lr': 0.0004992270617108791, 'samples': 213072, 'steps': 4438, 'loss/train': 2.632852792739868} +07/25/2024 11:46:13 - INFO - __main__ - Step 4440: {'lr': 0.0004992266483118015, 'samples': 213120, 'steps': 4439, 'loss/train': 1.9273731708526611} +07/25/2024 11:46:14 - INFO - __main__ - Step 4441: {'lr': 0.0004992262348023731, 'samples': 213168, 'steps': 4440, 'loss/train': 1.9885362386703491} +07/25/2024 11:46:14 - INFO - __main__ - Step 4442: {'lr': 0.0004992258211825944, 'samples': 213216, 'steps': 4441, 'loss/train': 2.201331377029419} +07/25/2024 11:46:14 - INFO - __main__ - Step 4443: {'lr': 0.0004992254074524656, 'samples': 213264, 'steps': 4442, 'loss/train': 2.682494640350342} +07/25/2024 11:46:14 - INFO - __main__ - Step 4444: {'lr': 0.0004992249936119866, 'samples': 213312, 'steps': 4443, 'loss/train': 2.465909004211426} +07/25/2024 11:46:15 - INFO - __main__ - Step 4445: {'lr': 0.0004992245796611579, 'samples': 213360, 'steps': 4444, 'loss/train': 1.0014334917068481} +07/25/2024 11:46:15 - INFO - __main__ - Step 4446: {'lr': 0.0004992241655999794, 'samples': 213408, 'steps': 4445, 'loss/train': 2.766120195388794} +07/25/2024 11:46:15 - INFO - __main__ - Step 4447: {'lr': 0.0004992237514284514, 'samples': 213456, 'steps': 4446, 'loss/train': 1.97720468044281} +07/25/2024 11:46:16 - INFO - __main__ - Step 4448: {'lr': 0.0004992233371465742, 'samples': 213504, 'steps': 4447, 'loss/train': 1.941521406173706} +07/25/2024 11:46:16 - INFO - __main__ - Step 4449: {'lr': 0.0004992229227543479, 'samples': 213552, 'steps': 4448, 'loss/train': 2.1931352615356445} +07/25/2024 11:46:16 - INFO - __main__ - Step 4450: {'lr': 0.0004992225082517726, 'samples': 213600, 'steps': 4449, 'loss/train': 2.8199424743652344} +07/25/2024 11:46:16 - INFO - __main__ - Step 4451: {'lr': 0.0004992220936388486, 'samples': 213648, 'steps': 4450, 'loss/train': 1.7708934545516968} +07/25/2024 11:46:17 - INFO - __main__ - Step 4452: {'lr': 0.0004992216789155761, 'samples': 213696, 'steps': 4451, 'loss/train': 2.749555826187134} +07/25/2024 11:46:17 - INFO - __main__ - Step 4453: {'lr': 0.0004992212640819552, 'samples': 213744, 'steps': 4452, 'loss/train': 3.1525683403015137} +07/25/2024 11:46:17 - INFO - __main__ - Step 4454: {'lr': 0.000499220849137986, 'samples': 213792, 'steps': 4453, 'loss/train': 0.28329598903656006} +07/25/2024 11:46:18 - INFO - __main__ - Step 4455: {'lr': 0.000499220434083669, 'samples': 213840, 'steps': 4454, 'loss/train': 0.7181069850921631} +07/25/2024 11:46:18 - INFO - __main__ - Step 4456: {'lr': 0.0004992200189190039, 'samples': 213888, 'steps': 4455, 'loss/train': 2.8014423847198486} +07/25/2024 11:46:18 - INFO - __main__ - Step 4457: {'lr': 0.0004992196036439913, 'samples': 213936, 'steps': 4456, 'loss/train': 2.6276252269744873} +07/25/2024 11:46:18 - INFO - __main__ - Step 4458: {'lr': 0.0004992191882586313, 'samples': 213984, 'steps': 4457, 'loss/train': 2.0096983909606934} +07/25/2024 11:46:19 - INFO - __main__ - Step 4459: {'lr': 0.0004992187727629239, 'samples': 214032, 'steps': 4458, 'loss/train': 3.496185302734375} +07/25/2024 11:46:19 - INFO - __main__ - Step 4460: {'lr': 0.0004992183571568696, 'samples': 214080, 'steps': 4459, 'loss/train': 2.3193342685699463} +07/25/2024 11:46:19 - INFO - __main__ - Step 4461: {'lr': 0.0004992179414404682, 'samples': 214128, 'steps': 4460, 'loss/train': 2.141636848449707} +07/25/2024 11:46:20 - INFO - __main__ - Step 4462: {'lr': 0.0004992175256137201, 'samples': 214176, 'steps': 4461, 'loss/train': 2.468804121017456} +07/25/2024 11:46:20 - INFO - __main__ - Step 4463: {'lr': 0.0004992171096766256, 'samples': 214224, 'steps': 4462, 'loss/train': 2.4953536987304688} +07/25/2024 11:46:20 - INFO - __main__ - Step 4464: {'lr': 0.0004992166936291846, 'samples': 214272, 'steps': 4463, 'loss/train': 2.083169460296631} +07/25/2024 11:46:20 - INFO - __main__ - Step 4465: {'lr': 0.0004992162774713976, 'samples': 214320, 'steps': 4464, 'loss/train': 2.290921211242676} +07/25/2024 11:46:21 - INFO - __main__ - Step 4466: {'lr': 0.0004992158612032644, 'samples': 214368, 'steps': 4465, 'loss/train': 2.738373279571533} +07/25/2024 11:46:21 - INFO - __main__ - Step 4467: {'lr': 0.0004992154448247855, 'samples': 214416, 'steps': 4466, 'loss/train': 2.201272487640381} +07/25/2024 11:46:21 - INFO - __main__ - Step 4468: {'lr': 0.000499215028335961, 'samples': 214464, 'steps': 4467, 'loss/train': 2.640759229660034} +07/25/2024 11:46:22 - INFO - __main__ - Step 4469: {'lr': 0.0004992146117367911, 'samples': 214512, 'steps': 4468, 'loss/train': 2.0885119438171387} +07/25/2024 11:46:22 - INFO - __main__ - Step 4470: {'lr': 0.0004992141950272759, 'samples': 214560, 'steps': 4469, 'loss/train': 2.148953437805176} +07/25/2024 11:46:22 - INFO - __main__ - Step 4471: {'lr': 0.0004992137782074157, 'samples': 214608, 'steps': 4470, 'loss/train': 2.027545690536499} +07/25/2024 11:46:22 - INFO - __main__ - Step 4472: {'lr': 0.0004992133612772105, 'samples': 214656, 'steps': 4471, 'loss/train': 2.6038644313812256} +07/25/2024 11:46:23 - INFO - __main__ - Step 4473: {'lr': 0.0004992129442366606, 'samples': 214704, 'steps': 4472, 'loss/train': 2.2543492317199707} +07/25/2024 11:46:23 - INFO - __main__ - Step 4474: {'lr': 0.0004992125270857664, 'samples': 214752, 'steps': 4473, 'loss/train': 2.5979435443878174} +07/25/2024 11:46:23 - INFO - __main__ - Step 4475: {'lr': 0.0004992121098245277, 'samples': 214800, 'steps': 4474, 'loss/train': 2.2819948196411133} +07/25/2024 11:46:24 - INFO - __main__ - Step 4476: {'lr': 0.0004992116924529449, 'samples': 214848, 'steps': 4475, 'loss/train': 2.782907724380493} +07/25/2024 11:46:24 - INFO - __main__ - Step 4477: {'lr': 0.0004992112749710181, 'samples': 214896, 'steps': 4476, 'loss/train': 5.962057590484619} +07/25/2024 11:46:24 - INFO - __main__ - Step 4478: {'lr': 0.0004992108573787476, 'samples': 214944, 'steps': 4477, 'loss/train': 0.400335431098938} +07/25/2024 11:46:24 - INFO - __main__ - Step 4479: {'lr': 0.0004992104396761335, 'samples': 214992, 'steps': 4478, 'loss/train': 0.4530230462551117} +07/25/2024 11:46:25 - INFO - __main__ - Step 4480: {'lr': 0.000499210021863176, 'samples': 215040, 'steps': 4479, 'loss/train': 2.4006175994873047} +07/25/2024 11:46:25 - INFO - __main__ - Step 4481: {'lr': 0.0004992096039398753, 'samples': 215088, 'steps': 4480, 'loss/train': 2.4574546813964844} +07/25/2024 11:46:25 - INFO - __main__ - Step 4482: {'lr': 0.0004992091859062315, 'samples': 215136, 'steps': 4481, 'loss/train': 2.5407791137695312} +07/25/2024 11:46:25 - INFO - __main__ - Step 4483: {'lr': 0.000499208767762245, 'samples': 215184, 'steps': 4482, 'loss/train': 3.40519380569458} +07/25/2024 11:46:26 - INFO - __main__ - Step 4484: {'lr': 0.0004992083495079157, 'samples': 215232, 'steps': 4483, 'loss/train': 3.206439971923828} +07/25/2024 11:46:26 - INFO - __main__ - Step 4485: {'lr': 0.000499207931143244, 'samples': 215280, 'steps': 4484, 'loss/train': 0.7475208044052124} +07/25/2024 11:46:26 - INFO - __main__ - Step 4486: {'lr': 0.0004992075126682299, 'samples': 215328, 'steps': 4485, 'loss/train': 2.033686399459839} +07/25/2024 11:46:27 - INFO - __main__ - Step 4487: {'lr': 0.0004992070940828738, 'samples': 215376, 'steps': 4486, 'loss/train': 2.5375356674194336} +07/25/2024 11:46:27 - INFO - __main__ - Step 4488: {'lr': 0.0004992066753871757, 'samples': 215424, 'steps': 4487, 'loss/train': 2.430860757827759} +07/25/2024 11:46:27 - INFO - __main__ - Step 4489: {'lr': 0.000499206256581136, 'samples': 215472, 'steps': 4488, 'loss/train': 1.0844337940216064} +07/25/2024 11:46:27 - INFO - __main__ - Step 4490: {'lr': 0.0004992058376647547, 'samples': 215520, 'steps': 4489, 'loss/train': 2.1186625957489014} +07/25/2024 11:46:28 - INFO - __main__ - Step 4491: {'lr': 0.0004992054186380321, 'samples': 215568, 'steps': 4490, 'loss/train': 2.6663882732391357} +07/25/2024 11:46:28 - INFO - __main__ - Step 4492: {'lr': 0.0004992049995009682, 'samples': 215616, 'steps': 4491, 'loss/train': 2.211048126220703} +07/25/2024 11:46:28 - INFO - __main__ - Step 4493: {'lr': 0.0004992045802535634, 'samples': 215664, 'steps': 4492, 'loss/train': 2.288989782333374} +07/25/2024 11:46:29 - INFO - __main__ - Step 4494: {'lr': 0.0004992041608958177, 'samples': 215712, 'steps': 4493, 'loss/train': 2.457303762435913} +07/25/2024 11:46:29 - INFO - __main__ - Step 4495: {'lr': 0.0004992037414277314, 'samples': 215760, 'steps': 4494, 'loss/train': 2.5774810314178467} +07/25/2024 11:46:29 - INFO - __main__ - Step 4496: {'lr': 0.0004992033218493046, 'samples': 215808, 'steps': 4495, 'loss/train': 2.677100896835327} +07/25/2024 11:46:29 - INFO - __main__ - Step 4497: {'lr': 0.0004992029021605377, 'samples': 215856, 'steps': 4496, 'loss/train': 2.3117263317108154} +07/25/2024 11:46:30 - INFO - __main__ - Step 4498: {'lr': 0.0004992024823614307, 'samples': 215904, 'steps': 4497, 'loss/train': 2.600198745727539} +07/25/2024 11:46:30 - INFO - __main__ - Step 4499: {'lr': 0.0004992020624519837, 'samples': 215952, 'steps': 4498, 'loss/train': 2.0136663913726807} +07/25/2024 11:46:30 - INFO - __main__ - Step 4500: {'lr': 0.0004992016424321972, 'samples': 216000, 'steps': 4499, 'loss/train': 2.1925485134124756} +07/25/2024 11:46:31 - INFO - __main__ - Step 4501: {'lr': 0.0004992012223020711, 'samples': 216048, 'steps': 4500, 'loss/train': 5.966180324554443} +07/25/2024 11:46:31 - INFO - __main__ - Step 4502: {'lr': 0.0004992008020616057, 'samples': 216096, 'steps': 4501, 'loss/train': 1.708321452140808} +07/25/2024 11:46:31 - INFO - __main__ - Step 4503: {'lr': 0.0004992003817108011, 'samples': 216144, 'steps': 4502, 'loss/train': 0.3667314350605011} +07/25/2024 11:46:31 - INFO - __main__ - Step 4504: {'lr': 0.0004991999612496576, 'samples': 216192, 'steps': 4503, 'loss/train': 2.421506404876709} +07/25/2024 11:46:32 - INFO - __main__ - Step 4505: {'lr': 0.0004991995406781754, 'samples': 216240, 'steps': 4504, 'loss/train': 2.166931390762329} +07/25/2024 11:46:32 - INFO - __main__ - Step 4506: {'lr': 0.0004991991199963546, 'samples': 216288, 'steps': 4505, 'loss/train': 3.315972328186035} +07/25/2024 11:46:32 - INFO - __main__ - Step 4507: {'lr': 0.0004991986992041954, 'samples': 216336, 'steps': 4506, 'loss/train': 2.839082717895508} +07/25/2024 11:46:33 - INFO - __main__ - Step 4508: {'lr': 0.0004991982783016979, 'samples': 216384, 'steps': 4507, 'loss/train': 2.6922566890716553} +07/25/2024 11:46:33 - INFO - __main__ - Step 4509: {'lr': 0.0004991978572888625, 'samples': 216432, 'steps': 4508, 'loss/train': 0.46611565351486206} +07/25/2024 11:46:33 - INFO - __main__ - Step 4510: {'lr': 0.0004991974361656892, 'samples': 216480, 'steps': 4509, 'loss/train': 2.6650643348693848} +07/25/2024 11:46:33 - INFO - __main__ - Step 4511: {'lr': 0.0004991970149321784, 'samples': 216528, 'steps': 4510, 'loss/train': 2.0197527408599854} +07/25/2024 11:46:34 - INFO - __main__ - Step 4512: {'lr': 0.00049919659358833, 'samples': 216576, 'steps': 4511, 'loss/train': 2.455219030380249} +07/25/2024 11:46:34 - INFO - __main__ - Step 4513: {'lr': 0.0004991961721341443, 'samples': 216624, 'steps': 4512, 'loss/train': 1.0009955167770386} +07/25/2024 11:46:34 - INFO - __main__ - Step 4514: {'lr': 0.0004991957505696217, 'samples': 216672, 'steps': 4513, 'loss/train': 2.1803505420684814} +07/25/2024 11:46:35 - INFO - __main__ - Step 4515: {'lr': 0.000499195328894762, 'samples': 216720, 'steps': 4514, 'loss/train': 2.513720989227295} +07/25/2024 11:46:35 - INFO - __main__ - Step 4516: {'lr': 0.0004991949071095657, 'samples': 216768, 'steps': 4515, 'loss/train': 2.225796699523926} +07/25/2024 11:46:35 - INFO - __main__ - Step 4517: {'lr': 0.0004991944852140329, 'samples': 216816, 'steps': 4516, 'loss/train': 2.1237704753875732} +07/25/2024 11:46:35 - INFO - __main__ - Step 4518: {'lr': 0.0004991940632081638, 'samples': 216864, 'steps': 4517, 'loss/train': 2.6272854804992676} +07/25/2024 11:46:36 - INFO - __main__ - Step 4519: {'lr': 0.0004991936410919584, 'samples': 216912, 'steps': 4518, 'loss/train': 2.415038824081421} +07/25/2024 11:46:36 - INFO - __main__ - Step 4520: {'lr': 0.0004991932188654171, 'samples': 216960, 'steps': 4519, 'loss/train': 2.4363644123077393} +07/25/2024 11:46:36 - INFO - __main__ - Step 4521: {'lr': 0.00049919279652854, 'samples': 217008, 'steps': 4520, 'loss/train': 2.6275386810302734} +07/25/2024 11:46:37 - INFO - __main__ - Step 4522: {'lr': 0.0004991923740813274, 'samples': 217056, 'steps': 4521, 'loss/train': 2.3606717586517334} +07/25/2024 11:46:37 - INFO - __main__ - Step 4523: {'lr': 0.0004991919515237794, 'samples': 217104, 'steps': 4522, 'loss/train': 2.1422691345214844} +07/25/2024 11:46:37 - INFO - __main__ - Step 4524: {'lr': 0.0004991915288558961, 'samples': 217152, 'steps': 4523, 'loss/train': 2.1199069023132324} +07/25/2024 11:46:37 - INFO - __main__ - Step 4525: {'lr': 0.0004991911060776778, 'samples': 217200, 'steps': 4524, 'loss/train': 6.032202243804932} +07/25/2024 11:46:38 - INFO - __main__ - Step 4526: {'lr': 0.0004991906831891247, 'samples': 217248, 'steps': 4525, 'loss/train': 2.9806196689605713} +07/25/2024 11:46:38 - INFO - __main__ - Step 4527: {'lr': 0.0004991902601902369, 'samples': 217296, 'steps': 4526, 'loss/train': 0.4590044319629669} +07/25/2024 11:46:38 - INFO - __main__ - Step 4528: {'lr': 0.0004991898370810147, 'samples': 217344, 'steps': 4527, 'loss/train': 2.4536478519439697} +07/25/2024 11:46:39 - INFO - __main__ - Step 4529: {'lr': 0.0004991894138614582, 'samples': 217392, 'steps': 4528, 'loss/train': 2.384617328643799} +07/25/2024 11:46:39 - INFO - __main__ - Step 4530: {'lr': 0.0004991889905315675, 'samples': 217440, 'steps': 4529, 'loss/train': 2.46450138092041} +07/25/2024 11:46:39 - INFO - __main__ - Step 4531: {'lr': 0.000499188567091343, 'samples': 217488, 'steps': 4530, 'loss/train': 2.960568428039551} +07/25/2024 11:46:39 - INFO - __main__ - Step 4532: {'lr': 0.0004991881435407847, 'samples': 217536, 'steps': 4531, 'loss/train': 2.73934268951416} +07/25/2024 11:46:40 - INFO - __main__ - Step 4533: {'lr': 0.000499187719879893, 'samples': 217584, 'steps': 4532, 'loss/train': 0.3919295072555542} +07/25/2024 11:46:40 - INFO - __main__ - Step 4534: {'lr': 0.000499187296108668, 'samples': 217632, 'steps': 4533, 'loss/train': 2.659205675125122} +07/25/2024 11:46:40 - INFO - __main__ - Step 4535: {'lr': 0.0004991868722271096, 'samples': 217680, 'steps': 4534, 'loss/train': 2.6175296306610107} +07/25/2024 11:46:41 - INFO - __main__ - Step 4536: {'lr': 0.0004991864482352185, 'samples': 217728, 'steps': 4535, 'loss/train': 2.3847122192382812} +07/25/2024 11:46:41 - INFO - __main__ - Step 4537: {'lr': 0.0004991860241329944, 'samples': 217776, 'steps': 4536, 'loss/train': 1.8731437921524048} +07/25/2024 11:46:41 - INFO - __main__ - Step 4538: {'lr': 0.0004991855999204378, 'samples': 217824, 'steps': 4537, 'loss/train': 2.556373119354248} +07/25/2024 11:46:41 - INFO - __main__ - Step 4539: {'lr': 0.0004991851755975488, 'samples': 217872, 'steps': 4538, 'loss/train': 2.674379825592041} +07/25/2024 11:46:42 - INFO - __main__ - Step 4540: {'lr': 0.0004991847511643277, 'samples': 217920, 'steps': 4539, 'loss/train': 2.5195250511169434} +07/25/2024 11:46:42 - INFO - __main__ - Step 4541: {'lr': 0.0004991843266207744, 'samples': 217968, 'steps': 4540, 'loss/train': 2.4827072620391846} +07/25/2024 11:46:42 - INFO - __main__ - Step 4542: {'lr': 0.0004991839019668894, 'samples': 218016, 'steps': 4541, 'loss/train': 2.304121494293213} +07/25/2024 11:46:43 - INFO - __main__ - Step 4543: {'lr': 0.0004991834772026726, 'samples': 218064, 'steps': 4542, 'loss/train': 2.4403960704803467} +07/25/2024 11:46:43 - INFO - __main__ - Step 4544: {'lr': 0.0004991830523281245, 'samples': 218112, 'steps': 4543, 'loss/train': 2.311654567718506} +07/25/2024 11:46:43 - INFO - __main__ - Step 4545: {'lr': 0.0004991826273432451, 'samples': 218160, 'steps': 4544, 'loss/train': 3.042865037918091} +07/25/2024 11:46:43 - INFO - __main__ - Step 4546: {'lr': 0.0004991822022480345, 'samples': 218208, 'steps': 4545, 'loss/train': 1.9436851739883423} +07/25/2024 11:46:44 - INFO - __main__ - Step 4547: {'lr': 0.0004991817770424931, 'samples': 218256, 'steps': 4546, 'loss/train': 2.167893648147583} +07/25/2024 11:46:44 - INFO - __main__ - Step 4548: {'lr': 0.0004991813517266209, 'samples': 218304, 'steps': 4547, 'loss/train': 2.3980183601379395} +07/25/2024 11:46:44 - INFO - __main__ - Step 4549: {'lr': 0.0004991809263004183, 'samples': 218352, 'steps': 4548, 'loss/train': 6.123282432556152} +07/25/2024 11:46:45 - INFO - __main__ - Step 4550: {'lr': 0.0004991805007638853, 'samples': 218400, 'steps': 4549, 'loss/train': 1.9611694812774658} +07/25/2024 11:46:45 - INFO - __main__ - Step 4551: {'lr': 0.0004991800751170222, 'samples': 218448, 'steps': 4550, 'loss/train': 0.451272577047348} +07/25/2024 11:46:45 - INFO - __main__ - Step 4552: {'lr': 0.000499179649359829, 'samples': 218496, 'steps': 4551, 'loss/train': 2.520423412322998} +07/25/2024 11:46:45 - INFO - __main__ - Step 4553: {'lr': 0.0004991792234923062, 'samples': 218544, 'steps': 4552, 'loss/train': 2.3575899600982666} +07/25/2024 11:46:46 - INFO - __main__ - Step 4554: {'lr': 0.0004991787975144539, 'samples': 218592, 'steps': 4553, 'loss/train': 2.5893137454986572} +07/25/2024 11:46:46 - INFO - __main__ - Step 4555: {'lr': 0.000499178371426272, 'samples': 218640, 'steps': 4554, 'loss/train': 3.069305419921875} +07/25/2024 11:46:46 - INFO - __main__ - Step 4556: {'lr': 0.0004991779452277609, 'samples': 218688, 'steps': 4555, 'loss/train': 2.8104498386383057} +07/25/2024 11:46:47 - INFO - __main__ - Step 4557: {'lr': 0.0004991775189189209, 'samples': 218736, 'steps': 4556, 'loss/train': 0.3518194854259491} +07/25/2024 11:46:47 - INFO - __main__ - Step 4558: {'lr': 0.0004991770924997521, 'samples': 218784, 'steps': 4557, 'loss/train': 2.08672833442688} +07/25/2024 11:46:47 - INFO - __main__ - Step 4559: {'lr': 0.0004991766659702546, 'samples': 218832, 'steps': 4558, 'loss/train': 2.4619550704956055} +07/25/2024 11:46:47 - INFO - __main__ - Step 4560: {'lr': 0.0004991762393304286, 'samples': 218880, 'steps': 4559, 'loss/train': 2.4873099327087402} +07/25/2024 11:46:48 - INFO - __main__ - Step 4561: {'lr': 0.0004991758125802745, 'samples': 218928, 'steps': 4560, 'loss/train': 2.3555846214294434} +07/25/2024 11:46:48 - INFO - __main__ - Step 4562: {'lr': 0.0004991753857197922, 'samples': 218976, 'steps': 4561, 'loss/train': 2.3923356533050537} +07/25/2024 11:46:48 - INFO - __main__ - Step 4563: {'lr': 0.000499174958748982, 'samples': 219024, 'steps': 4562, 'loss/train': 2.051098108291626} +07/25/2024 11:46:48 - INFO - __main__ - Step 4564: {'lr': 0.0004991745316678442, 'samples': 219072, 'steps': 4563, 'loss/train': 2.272995948791504} +07/25/2024 11:46:49 - INFO - __main__ - Step 4565: {'lr': 0.0004991741044763787, 'samples': 219120, 'steps': 4564, 'loss/train': 2.200082302093506} +07/25/2024 11:46:49 - INFO - __main__ - Step 4566: {'lr': 0.0004991736771745862, 'samples': 219168, 'steps': 4565, 'loss/train': 2.509655475616455} +07/25/2024 11:46:49 - INFO - __main__ - Step 4567: {'lr': 0.0004991732497624664, 'samples': 219216, 'steps': 4566, 'loss/train': 2.131190538406372} +07/25/2024 11:46:50 - INFO - __main__ - Step 4568: {'lr': 0.0004991728222400197, 'samples': 219264, 'steps': 4567, 'loss/train': 2.504392385482788} +07/25/2024 11:46:50 - INFO - __main__ - Step 4569: {'lr': 0.0004991723946072463, 'samples': 219312, 'steps': 4568, 'loss/train': 2.384366035461426} +07/25/2024 11:46:50 - INFO - __main__ - Step 4570: {'lr': 0.0004991719668641462, 'samples': 219360, 'steps': 4569, 'loss/train': 2.738532543182373} +07/25/2024 11:46:50 - INFO - __main__ - Step 4571: {'lr': 0.0004991715390107199, 'samples': 219408, 'steps': 4570, 'loss/train': 2.1412692070007324} +07/25/2024 11:46:51 - INFO - __main__ - Step 4572: {'lr': 0.0004991711110469674, 'samples': 219456, 'steps': 4571, 'loss/train': 2.6782822608947754} +07/25/2024 11:46:51 - INFO - __main__ - Step 4573: {'lr': 0.0004991706829728888, 'samples': 219504, 'steps': 4572, 'loss/train': 6.05974817276001} +07/25/2024 11:46:51 - INFO - __main__ - Step 4574: {'lr': 0.0004991702547884844, 'samples': 219552, 'steps': 4573, 'loss/train': 2.23760986328125} +07/25/2024 11:46:52 - INFO - __main__ - Step 4575: {'lr': 0.0004991698264937546, 'samples': 219600, 'steps': 4574, 'loss/train': 0.47148463129997253} +07/25/2024 11:46:52 - INFO - __main__ - Step 4576: {'lr': 0.0004991693980886992, 'samples': 219648, 'steps': 4575, 'loss/train': 2.4356133937835693} +07/25/2024 11:46:52 - INFO - __main__ - Step 4577: {'lr': 0.0004991689695733186, 'samples': 219696, 'steps': 4576, 'loss/train': 1.9279307126998901} +07/25/2024 11:46:52 - INFO - __main__ - Step 4578: {'lr': 0.000499168540947613, 'samples': 219744, 'steps': 4577, 'loss/train': 2.318352222442627} +07/25/2024 11:46:53 - INFO - __main__ - Step 4579: {'lr': 0.0004991681122115825, 'samples': 219792, 'steps': 4578, 'loss/train': 3.173447608947754} +07/25/2024 11:46:53 - INFO - __main__ - Step 4580: {'lr': 0.0004991676833652274, 'samples': 219840, 'steps': 4579, 'loss/train': 2.6576051712036133} +07/25/2024 11:46:53 - INFO - __main__ - Step 4581: {'lr': 0.0004991672544085478, 'samples': 219888, 'steps': 4580, 'loss/train': 0.5844359397888184} +07/25/2024 11:46:54 - INFO - __main__ - Step 4582: {'lr': 0.0004991668253415439, 'samples': 219936, 'steps': 4581, 'loss/train': 2.0855612754821777} +07/25/2024 11:46:54 - INFO - __main__ - Step 4583: {'lr': 0.000499166396164216, 'samples': 219984, 'steps': 4582, 'loss/train': 2.6686432361602783} +07/25/2024 11:46:54 - INFO - __main__ - Step 4584: {'lr': 0.0004991659668765641, 'samples': 220032, 'steps': 4583, 'loss/train': 1.8168822526931763} +07/25/2024 11:46:54 - INFO - __main__ - Step 4585: {'lr': 0.0004991655374785885, 'samples': 220080, 'steps': 4584, 'loss/train': 2.493912696838379} +07/25/2024 11:46:55 - INFO - __main__ - Step 4586: {'lr': 0.0004991651079702895, 'samples': 220128, 'steps': 4585, 'loss/train': 2.390552520751953} +07/25/2024 11:46:55 - INFO - __main__ - Step 4587: {'lr': 0.000499164678351667, 'samples': 220176, 'steps': 4586, 'loss/train': 2.0426909923553467} +07/25/2024 11:46:55 - INFO - __main__ - Step 4588: {'lr': 0.0004991642486227214, 'samples': 220224, 'steps': 4587, 'loss/train': 2.978501558303833} +07/25/2024 11:46:56 - INFO - __main__ - Step 4589: {'lr': 0.000499163818783453, 'samples': 220272, 'steps': 4588, 'loss/train': 2.4907000064849854} +07/25/2024 11:46:56 - INFO - __main__ - Step 4590: {'lr': 0.0004991633888338617, 'samples': 220320, 'steps': 4589, 'loss/train': 2.7376139163970947} +07/25/2024 11:46:56 - INFO - __main__ - Step 4591: {'lr': 0.0004991629587739479, 'samples': 220368, 'steps': 4590, 'loss/train': 2.22939395904541} +07/25/2024 11:46:56 - INFO - __main__ - Step 4592: {'lr': 0.0004991625286037117, 'samples': 220416, 'steps': 4591, 'loss/train': 2.501666307449341} +07/25/2024 11:46:57 - INFO - __main__ - Step 4593: {'lr': 0.0004991620983231533, 'samples': 220464, 'steps': 4592, 'loss/train': 1.9664111137390137} +07/25/2024 11:46:57 - INFO - __main__ - Step 4594: {'lr': 0.0004991616679322729, 'samples': 220512, 'steps': 4593, 'loss/train': 2.9606990814208984} +07/25/2024 11:46:57 - INFO - __main__ - Step 4595: {'lr': 0.0004991612374310708, 'samples': 220560, 'steps': 4594, 'loss/train': 2.8470921516418457} +07/25/2024 11:46:58 - INFO - __main__ - Step 4596: {'lr': 0.000499160806819547, 'samples': 220608, 'steps': 4595, 'loss/train': 2.947896957397461} +07/25/2024 11:46:58 - INFO - __main__ - Step 4597: {'lr': 0.0004991603760977017, 'samples': 220656, 'steps': 4596, 'loss/train': 6.087181568145752} +07/25/2024 11:46:58 - INFO - __main__ - Step 4598: {'lr': 0.0004991599452655352, 'samples': 220704, 'steps': 4597, 'loss/train': 2.269960641860962} +07/25/2024 11:46:58 - INFO - __main__ - Step 4599: {'lr': 0.0004991595143230477, 'samples': 220752, 'steps': 4598, 'loss/train': 2.194007158279419} +07/25/2024 11:46:59 - INFO - __main__ - Step 4600: {'lr': 0.0004991590832702394, 'samples': 220800, 'steps': 4599, 'loss/train': 2.8923120498657227} +07/25/2024 11:46:59 - INFO - __main__ - Step 4601: {'lr': 0.0004991586521071104, 'samples': 220848, 'steps': 4600, 'loss/train': 2.7755749225616455} +07/25/2024 11:46:59 - INFO - __main__ - Step 4602: {'lr': 0.0004991582208336609, 'samples': 220896, 'steps': 4601, 'loss/train': 2.9365718364715576} +07/25/2024 11:47:00 - INFO - __main__ - Step 4603: {'lr': 0.0004991577894498911, 'samples': 220944, 'steps': 4602, 'loss/train': 3.7345175743103027} +07/25/2024 11:47:00 - INFO - __main__ - Step 4604: {'lr': 0.0004991573579558014, 'samples': 220992, 'steps': 4603, 'loss/train': 2.97540545463562} +07/25/2024 11:47:00 - INFO - __main__ - Step 4605: {'lr': 0.0004991569263513915, 'samples': 221040, 'steps': 4604, 'loss/train': 0.5495402216911316} +07/25/2024 11:47:00 - INFO - __main__ - Step 4606: {'lr': 0.0004991564946366621, 'samples': 221088, 'steps': 4605, 'loss/train': 2.652998685836792} +07/25/2024 11:47:01 - INFO - __main__ - Step 4607: {'lr': 0.0004991560628116131, 'samples': 221136, 'steps': 4606, 'loss/train': 2.7092864513397217} +07/25/2024 11:47:01 - INFO - __main__ - Step 4608: {'lr': 0.0004991556308762448, 'samples': 221184, 'steps': 4607, 'loss/train': 2.8905527591705322} +07/25/2024 11:47:01 - INFO - __main__ - Step 4609: {'lr': 0.0004991551988305574, 'samples': 221232, 'steps': 4608, 'loss/train': 2.417262315750122} +07/25/2024 11:47:02 - INFO - __main__ - Step 4610: {'lr': 0.000499154766674551, 'samples': 221280, 'steps': 4609, 'loss/train': 2.4551453590393066} +07/25/2024 11:47:02 - INFO - __main__ - Step 4611: {'lr': 0.0004991543344082259, 'samples': 221328, 'steps': 4610, 'loss/train': 2.8602821826934814} +07/25/2024 11:47:02 - INFO - __main__ - Step 4612: {'lr': 0.0004991539020315823, 'samples': 221376, 'steps': 4611, 'loss/train': 2.6042003631591797} +07/25/2024 11:47:02 - INFO - __main__ - Step 4613: {'lr': 0.0004991534695446202, 'samples': 221424, 'steps': 4612, 'loss/train': 2.6886112689971924} +07/25/2024 11:47:03 - INFO - __main__ - Step 4614: {'lr': 0.0004991530369473399, 'samples': 221472, 'steps': 4613, 'loss/train': 2.084747791290283} +07/25/2024 11:47:03 - INFO - __main__ - Step 4615: {'lr': 0.0004991526042397418, 'samples': 221520, 'steps': 4614, 'loss/train': 2.378106117248535} +07/25/2024 11:47:03 - INFO - __main__ - Step 4616: {'lr': 0.0004991521714218258, 'samples': 221568, 'steps': 4615, 'loss/train': 3.641937255859375} +07/25/2024 11:47:04 - INFO - __main__ - Step 4617: {'lr': 0.0004991517384935922, 'samples': 221616, 'steps': 4616, 'loss/train': 2.47279953956604} +07/25/2024 11:47:04 - INFO - __main__ - Step 4618: {'lr': 0.0004991513054550411, 'samples': 221664, 'steps': 4617, 'loss/train': 2.8782827854156494} +07/25/2024 11:47:04 - INFO - __main__ - Step 4619: {'lr': 0.000499150872306173, 'samples': 221712, 'steps': 4618, 'loss/train': 2.935497283935547} +07/25/2024 11:47:04 - INFO - __main__ - Step 4620: {'lr': 0.0004991504390469877, 'samples': 221760, 'steps': 4619, 'loss/train': 2.0452120304107666} +07/25/2024 11:47:05 - INFO - __main__ - Step 4621: {'lr': 0.0004991500056774857, 'samples': 221808, 'steps': 4620, 'loss/train': 6.339592456817627} +07/25/2024 11:47:05 - INFO - __main__ - Step 4622: {'lr': 0.000499149572197667, 'samples': 221856, 'steps': 4621, 'loss/train': 2.6388626098632812} +07/25/2024 11:47:05 - INFO - __main__ - Step 4623: {'lr': 0.0004991491386075318, 'samples': 221904, 'steps': 4622, 'loss/train': 2.9731857776641846} +07/25/2024 11:47:06 - INFO - __main__ - Step 4624: {'lr': 0.0004991487049070803, 'samples': 221952, 'steps': 4623, 'loss/train': 2.8618385791778564} +07/25/2024 11:47:06 - INFO - __main__ - Step 4625: {'lr': 0.0004991482710963129, 'samples': 222000, 'steps': 4624, 'loss/train': 2.857243299484253} +07/25/2024 11:47:06 - INFO - __main__ - Step 4626: {'lr': 0.0004991478371752294, 'samples': 222048, 'steps': 4625, 'loss/train': 2.6529600620269775} +07/25/2024 11:47:06 - INFO - __main__ - Step 4627: {'lr': 0.0004991474031438304, 'samples': 222096, 'steps': 4626, 'loss/train': 2.6131751537323} +07/25/2024 11:47:07 - INFO - __main__ - Step 4628: {'lr': 0.000499146969002116, 'samples': 222144, 'steps': 4627, 'loss/train': 2.213878870010376} +07/25/2024 11:47:07 - INFO - __main__ - Step 4629: {'lr': 0.0004991465347500861, 'samples': 222192, 'steps': 4628, 'loss/train': 0.6325451135635376} +07/25/2024 11:47:07 - INFO - __main__ - Step 4630: {'lr': 0.0004991461003877412, 'samples': 222240, 'steps': 4629, 'loss/train': 2.7209856510162354} +07/25/2024 11:47:07 - INFO - __main__ - Step 4631: {'lr': 0.0004991456659150814, 'samples': 222288, 'steps': 4630, 'loss/train': 2.992562770843506} +07/25/2024 11:47:08 - INFO - __main__ - Step 4632: {'lr': 0.0004991452313321068, 'samples': 222336, 'steps': 4631, 'loss/train': 2.2232940196990967} +07/25/2024 11:47:08 - INFO - __main__ - Step 4633: {'lr': 0.0004991447966388177, 'samples': 222384, 'steps': 4632, 'loss/train': 2.133465528488159} +07/25/2024 11:47:08 - INFO - __main__ - Step 4634: {'lr': 0.0004991443618352143, 'samples': 222432, 'steps': 4633, 'loss/train': 2.634204387664795} +07/25/2024 11:47:09 - INFO - __main__ - Step 4635: {'lr': 0.0004991439269212966, 'samples': 222480, 'steps': 4634, 'loss/train': 2.7132227420806885} +07/25/2024 11:47:09 - INFO - __main__ - Step 4636: {'lr': 0.0004991434918970653, 'samples': 222528, 'steps': 4635, 'loss/train': 5.983707904815674} +07/25/2024 11:47:09 - INFO - __main__ - Step 4637: {'lr': 0.0004991430567625199, 'samples': 222576, 'steps': 4636, 'loss/train': 2.71185040473938} +07/25/2024 11:47:09 - INFO - __main__ - Step 4638: {'lr': 0.0004991426215176611, 'samples': 222624, 'steps': 4637, 'loss/train': 2.512009620666504} +07/25/2024 11:47:10 - INFO - __main__ - Step 4639: {'lr': 0.0004991421861624889, 'samples': 222672, 'steps': 4638, 'loss/train': 2.4396374225616455} +07/25/2024 11:47:10 - INFO - __main__ - Step 4640: {'lr': 0.0004991417506970034, 'samples': 222720, 'steps': 4639, 'loss/train': 3.755009412765503} +07/25/2024 11:47:10 - INFO - __main__ - Step 4641: {'lr': 0.0004991413151212051, 'samples': 222768, 'steps': 4640, 'loss/train': 2.4583699703216553} +07/25/2024 11:47:11 - INFO - __main__ - Step 4642: {'lr': 0.0004991408794350939, 'samples': 222816, 'steps': 4641, 'loss/train': 2.7435195446014404} +07/25/2024 11:47:11 - INFO - __main__ - Step 4643: {'lr': 0.0004991404436386701, 'samples': 222864, 'steps': 4642, 'loss/train': 1.2493703365325928} +07/25/2024 11:47:11 - INFO - __main__ - Step 4644: {'lr': 0.0004991400077319339, 'samples': 222912, 'steps': 4643, 'loss/train': 2.5657341480255127} +07/25/2024 11:47:11 - INFO - __main__ - Step 4645: {'lr': 0.0004991395717148855, 'samples': 222960, 'steps': 4644, 'loss/train': 6.104331970214844} +07/25/2024 11:47:12 - INFO - __main__ - Step 4646: {'lr': 0.0004991391355875252, 'samples': 223008, 'steps': 4645, 'loss/train': 2.170628070831299} +07/25/2024 11:47:12 - INFO - __main__ - Step 4647: {'lr': 0.0004991386993498529, 'samples': 223056, 'steps': 4646, 'loss/train': 3.1422643661499023} +07/25/2024 11:47:12 - INFO - __main__ - Step 4648: {'lr': 0.000499138263001869, 'samples': 223104, 'steps': 4647, 'loss/train': 2.5211033821105957} +07/25/2024 11:47:13 - INFO - __main__ - Step 4649: {'lr': 0.0004991378265435737, 'samples': 223152, 'steps': 4648, 'loss/train': 2.564300775527954} +07/25/2024 11:47:13 - INFO - __main__ - Step 4650: {'lr': 0.0004991373899749671, 'samples': 223200, 'steps': 4649, 'loss/train': 2.640078067779541} +07/25/2024 11:47:13 - INFO - __main__ - Step 4651: {'lr': 0.0004991369532960495, 'samples': 223248, 'steps': 4650, 'loss/train': 2.8217976093292236} +07/25/2024 11:47:13 - INFO - __main__ - Step 4652: {'lr': 0.000499136516506821, 'samples': 223296, 'steps': 4651, 'loss/train': 1.8165088891983032} +07/25/2024 11:47:14 - INFO - __main__ - Step 4653: {'lr': 0.0004991360796072818, 'samples': 223344, 'steps': 4652, 'loss/train': 0.36734867095947266} +07/25/2024 11:47:14 - INFO - __main__ - Step 4654: {'lr': 0.0004991356425974321, 'samples': 223392, 'steps': 4653, 'loss/train': 2.7244536876678467} +07/25/2024 11:47:14 - INFO - __main__ - Step 4655: {'lr': 0.0004991352054772723, 'samples': 223440, 'steps': 4654, 'loss/train': 2.8108389377593994} +07/25/2024 11:47:15 - INFO - __main__ - Step 4656: {'lr': 0.0004991347682468023, 'samples': 223488, 'steps': 4655, 'loss/train': 2.8023860454559326} +07/25/2024 11:47:15 - INFO - __main__ - Step 4657: {'lr': 0.0004991343309060223, 'samples': 223536, 'steps': 4656, 'loss/train': 2.2007062435150146} +07/25/2024 11:47:15 - INFO - __main__ - Step 4658: {'lr': 0.0004991338934549328, 'samples': 223584, 'steps': 4657, 'loss/train': 2.592968702316284} +07/25/2024 11:47:15 - INFO - __main__ - Step 4659: {'lr': 0.0004991334558935337, 'samples': 223632, 'steps': 4658, 'loss/train': 2.161177635192871} +07/25/2024 11:47:16 - INFO - __main__ - Step 4660: {'lr': 0.0004991330182218252, 'samples': 223680, 'steps': 4659, 'loss/train': 6.063213348388672} +07/25/2024 11:47:16 - INFO - __main__ - Step 4661: {'lr': 0.0004991325804398077, 'samples': 223728, 'steps': 4660, 'loss/train': 2.237442970275879} +07/25/2024 11:47:16 - INFO - __main__ - Step 4662: {'lr': 0.0004991321425474812, 'samples': 223776, 'steps': 4661, 'loss/train': 1.2679671049118042} +07/25/2024 11:47:17 - INFO - __main__ - Step 4663: {'lr': 0.0004991317045448461, 'samples': 223824, 'steps': 4662, 'loss/train': 2.6560044288635254} +07/25/2024 11:47:17 - INFO - __main__ - Step 4664: {'lr': 0.0004991312664319024, 'samples': 223872, 'steps': 4663, 'loss/train': 3.1684439182281494} +07/25/2024 11:47:17 - INFO - __main__ - Step 4665: {'lr': 0.0004991308282086502, 'samples': 223920, 'steps': 4664, 'loss/train': 2.3435604572296143} +07/25/2024 11:47:17 - INFO - __main__ - Step 4666: {'lr': 0.00049913038987509, 'samples': 223968, 'steps': 4665, 'loss/train': 2.3787729740142822} +07/25/2024 11:47:18 - INFO - __main__ - Step 4667: {'lr': 0.0004991299514312217, 'samples': 224016, 'steps': 4666, 'loss/train': 3.1401219367980957} +07/25/2024 11:47:18 - INFO - __main__ - Step 4668: {'lr': 0.0004991295128770458, 'samples': 224064, 'steps': 4667, 'loss/train': 2.9219977855682373} +07/25/2024 11:47:18 - INFO - __main__ - Step 4669: {'lr': 0.0004991290742125623, 'samples': 224112, 'steps': 4668, 'loss/train': 6.055570602416992} +07/25/2024 11:47:19 - INFO - __main__ - Step 4670: {'lr': 0.0004991286354377715, 'samples': 224160, 'steps': 4669, 'loss/train': 2.180115222930908} +07/25/2024 11:47:19 - INFO - __main__ - Step 4671: {'lr': 0.0004991281965526734, 'samples': 224208, 'steps': 4670, 'loss/train': 2.4594929218292236} +07/25/2024 11:47:19 - INFO - __main__ - Step 4672: {'lr': 0.0004991277575572683, 'samples': 224256, 'steps': 4671, 'loss/train': 2.4358937740325928} +07/25/2024 11:47:19 - INFO - __main__ - Step 4673: {'lr': 0.0004991273184515565, 'samples': 224304, 'steps': 4672, 'loss/train': 2.5050208568573} +07/25/2024 11:47:20 - INFO - __main__ - Step 4674: {'lr': 0.000499126879235538, 'samples': 224352, 'steps': 4673, 'loss/train': 2.392235040664673} +07/25/2024 11:47:20 - INFO - __main__ - Step 4675: {'lr': 0.0004991264399092131, 'samples': 224400, 'steps': 4674, 'loss/train': 1.8538081645965576} +07/25/2024 11:47:20 - INFO - __main__ - Step 4676: {'lr': 0.0004991260004725821, 'samples': 224448, 'steps': 4675, 'loss/train': 1.5668935775756836} +07/25/2024 11:47:21 - INFO - __main__ - Step 4677: {'lr': 0.0004991255609256451, 'samples': 224496, 'steps': 4676, 'loss/train': 0.36547550559043884} +07/25/2024 11:47:21 - INFO - __main__ - Step 4678: {'lr': 0.0004991251212684021, 'samples': 224544, 'steps': 4677, 'loss/train': 2.079988479614258} +07/25/2024 11:47:21 - INFO - __main__ - Step 4679: {'lr': 0.0004991246815008537, 'samples': 224592, 'steps': 4678, 'loss/train': 1.9466177225112915} +07/25/2024 11:47:21 - INFO - __main__ - Step 4680: {'lr': 0.0004991242416229997, 'samples': 224640, 'steps': 4679, 'loss/train': 2.8788082599639893} +07/25/2024 11:47:22 - INFO - __main__ - Step 4681: {'lr': 0.0004991238016348405, 'samples': 224688, 'steps': 4680, 'loss/train': 2.448525905609131} +07/25/2024 11:47:22 - INFO - __main__ - Step 4682: {'lr': 0.0004991233615363764, 'samples': 224736, 'steps': 4681, 'loss/train': 2.348194122314453} +07/25/2024 11:47:22 - INFO - __main__ - Step 4683: {'lr': 0.0004991229213276073, 'samples': 224784, 'steps': 4682, 'loss/train': 1.8787463903427124} +07/25/2024 11:47:23 - INFO - __main__ - Step 4684: {'lr': 0.0004991224810085336, 'samples': 224832, 'steps': 4683, 'loss/train': 3.0221548080444336} +07/25/2024 11:47:23 - INFO - __main__ - Step 4685: {'lr': 0.0004991220405791554, 'samples': 224880, 'steps': 4684, 'loss/train': 2.128304958343506} +07/25/2024 11:47:23 - INFO - __main__ - Step 4686: {'lr': 0.000499121600039473, 'samples': 224928, 'steps': 4685, 'loss/train': 2.0483903884887695} +07/25/2024 11:47:23 - INFO - __main__ - Step 4687: {'lr': 0.0004991211593894866, 'samples': 224976, 'steps': 4686, 'loss/train': 2.2666258811950684} +07/25/2024 11:47:24 - INFO - __main__ - Step 4688: {'lr': 0.0004991207186291962, 'samples': 225024, 'steps': 4687, 'loss/train': 3.5704753398895264} +07/25/2024 11:47:24 - INFO - __main__ - Step 4689: {'lr': 0.0004991202777586022, 'samples': 225072, 'steps': 4688, 'loss/train': 1.6010955572128296} +07/25/2024 11:47:24 - INFO - __main__ - Step 4690: {'lr': 0.0004991198367777047, 'samples': 225120, 'steps': 4689, 'loss/train': 2.352433443069458} +07/25/2024 11:47:25 - INFO - __main__ - Step 4691: {'lr': 0.0004991193956865039, 'samples': 225168, 'steps': 4690, 'loss/train': 2.908397912979126} +07/25/2024 11:47:25 - INFO - __main__ - Step 4692: {'lr': 0.000499118954485, 'samples': 225216, 'steps': 4691, 'loss/train': 2.703108072280884} +07/25/2024 11:47:25 - INFO - __main__ - Step 4693: {'lr': 0.0004991185131731933, 'samples': 225264, 'steps': 4692, 'loss/train': 5.952783107757568} +07/25/2024 11:47:25 - INFO - __main__ - Step 4694: {'lr': 0.0004991180717510838, 'samples': 225312, 'steps': 4693, 'loss/train': 1.559508204460144} +07/25/2024 11:47:26 - INFO - __main__ - Step 4695: {'lr': 0.0004991176302186718, 'samples': 225360, 'steps': 4694, 'loss/train': 2.742868423461914} +07/25/2024 11:47:26 - INFO - __main__ - Step 4696: {'lr': 0.0004991171885759576, 'samples': 225408, 'steps': 4695, 'loss/train': 2.2429723739624023} +07/25/2024 11:47:26 - INFO - __main__ - Step 4697: {'lr': 0.0004991167468229411, 'samples': 225456, 'steps': 4696, 'loss/train': 2.2231876850128174} +07/25/2024 11:47:27 - INFO - __main__ - Step 4698: {'lr': 0.000499116304959623, 'samples': 225504, 'steps': 4697, 'loss/train': 1.7988475561141968} +07/25/2024 11:47:27 - INFO - __main__ - Step 4699: {'lr': 0.0004991158629860028, 'samples': 225552, 'steps': 4698, 'loss/train': 2.1744184494018555} +07/25/2024 11:47:27 - INFO - __main__ - Step 4700: {'lr': 0.0004991154209020814, 'samples': 225600, 'steps': 4699, 'loss/train': 2.177673816680908} +07/25/2024 11:47:27 - INFO - __main__ - Step 4701: {'lr': 0.0004991149787078585, 'samples': 225648, 'steps': 4700, 'loss/train': 0.32711440324783325} +07/25/2024 11:47:28 - INFO - __main__ - Step 4702: {'lr': 0.0004991145364033344, 'samples': 225696, 'steps': 4701, 'loss/train': 1.9090094566345215} +07/25/2024 11:47:28 - INFO - __main__ - Step 4703: {'lr': 0.0004991140939885095, 'samples': 225744, 'steps': 4702, 'loss/train': 2.1840579509735107} +07/25/2024 11:47:28 - INFO - __main__ - Step 4704: {'lr': 0.0004991136514633839, 'samples': 225792, 'steps': 4703, 'loss/train': 2.476607084274292} +07/25/2024 11:47:28 - INFO - __main__ - Step 4705: {'lr': 0.0004991132088279577, 'samples': 225840, 'steps': 4704, 'loss/train': 2.2713348865509033} +07/25/2024 11:47:29 - INFO - __main__ - Step 4706: {'lr': 0.0004991127660822311, 'samples': 225888, 'steps': 4705, 'loss/train': 2.3413755893707275} +07/25/2024 11:47:29 - INFO - __main__ - Step 4707: {'lr': 0.0004991123232262044, 'samples': 225936, 'steps': 4706, 'loss/train': 2.6933279037475586} +07/25/2024 11:47:29 - INFO - __main__ - Step 4708: {'lr': 0.0004991118802598777, 'samples': 225984, 'steps': 4707, 'loss/train': 1.97784423828125} +07/25/2024 11:47:30 - INFO - __main__ - Step 4709: {'lr': 0.0004991114371832513, 'samples': 226032, 'steps': 4708, 'loss/train': 2.682990074157715} +07/25/2024 11:47:30 - INFO - __main__ - Step 4710: {'lr': 0.0004991109939963252, 'samples': 226080, 'steps': 4709, 'loss/train': 2.221890926361084} +07/25/2024 11:47:30 - INFO - __main__ - Step 4711: {'lr': 0.0004991105506990998, 'samples': 226128, 'steps': 4710, 'loss/train': 2.5543529987335205} +07/25/2024 11:47:30 - INFO - __main__ - Step 4712: {'lr': 0.0004991101072915753, 'samples': 226176, 'steps': 4711, 'loss/train': 2.2692923545837402} +07/25/2024 11:47:31 - INFO - __main__ - Step 4713: {'lr': 0.0004991096637737518, 'samples': 226224, 'steps': 4712, 'loss/train': 0.9459828734397888} +07/25/2024 11:47:31 - INFO - __main__ - Step 4714: {'lr': 0.0004991092201456294, 'samples': 226272, 'steps': 4713, 'loss/train': 1.6398829221725464} +07/25/2024 11:47:31 - INFO - __main__ - Step 4715: {'lr': 0.0004991087764072087, 'samples': 226320, 'steps': 4714, 'loss/train': 2.3241264820098877} +07/25/2024 11:47:32 - INFO - __main__ - Step 4716: {'lr': 0.0004991083325584893, 'samples': 226368, 'steps': 4715, 'loss/train': 2.6082921028137207} +07/25/2024 11:47:32 - INFO - __main__ - Step 4717: {'lr': 0.0004991078885994718, 'samples': 226416, 'steps': 4716, 'loss/train': 5.9751691818237305} +07/25/2024 11:47:32 - INFO - __main__ - Step 4718: {'lr': 0.0004991074445301564, 'samples': 226464, 'steps': 4717, 'loss/train': 1.9250472784042358} +07/25/2024 11:47:32 - INFO - __main__ - Step 4719: {'lr': 0.0004991070003505432, 'samples': 226512, 'steps': 4718, 'loss/train': 2.3865089416503906} +07/25/2024 11:47:33 - INFO - __main__ - Step 4720: {'lr': 0.0004991065560606324, 'samples': 226560, 'steps': 4719, 'loss/train': 2.4917798042297363} +07/25/2024 11:47:33 - INFO - __main__ - Step 4721: {'lr': 0.0004991061116604241, 'samples': 226608, 'steps': 4720, 'loss/train': 1.8057780265808105} +07/25/2024 11:47:33 - INFO - __main__ - Step 4722: {'lr': 0.0004991056671499187, 'samples': 226656, 'steps': 4721, 'loss/train': 2.7318246364593506} +07/25/2024 11:47:34 - INFO - __main__ - Step 4723: {'lr': 0.0004991052225291161, 'samples': 226704, 'steps': 4722, 'loss/train': 2.441462516784668} +07/25/2024 11:47:34 - INFO - __main__ - Step 4724: {'lr': 0.0004991047777980169, 'samples': 226752, 'steps': 4723, 'loss/train': 2.5937511920928955} +07/25/2024 11:47:34 - INFO - __main__ - Step 4725: {'lr': 0.0004991043329566209, 'samples': 226800, 'steps': 4724, 'loss/train': 0.4862014055252075} +07/25/2024 11:47:34 - INFO - __main__ - Step 4726: {'lr': 0.0004991038880049287, 'samples': 226848, 'steps': 4725, 'loss/train': 1.837110996246338} +07/25/2024 11:47:35 - INFO - __main__ - Step 4727: {'lr': 0.0004991034429429401, 'samples': 226896, 'steps': 4726, 'loss/train': 2.039694309234619} +07/25/2024 11:47:35 - INFO - __main__ - Step 4728: {'lr': 0.0004991029977706556, 'samples': 226944, 'steps': 4727, 'loss/train': 2.6570796966552734} +07/25/2024 11:47:35 - INFO - __main__ - Step 4729: {'lr': 0.0004991025524880752, 'samples': 226992, 'steps': 4728, 'loss/train': 2.3738648891448975} +07/25/2024 11:47:36 - INFO - __main__ - Step 4730: {'lr': 0.0004991021070951991, 'samples': 227040, 'steps': 4729, 'loss/train': 2.328726291656494} +07/25/2024 11:47:36 - INFO - __main__ - Step 4731: {'lr': 0.0004991016615920276, 'samples': 227088, 'steps': 4730, 'loss/train': 3.185991048812866} +07/25/2024 11:47:36 - INFO - __main__ - Step 4732: {'lr': 0.000499101215978561, 'samples': 227136, 'steps': 4731, 'loss/train': 2.6475002765655518} +07/25/2024 11:47:36 - INFO - __main__ - Step 4733: {'lr': 0.0004991007702547992, 'samples': 227184, 'steps': 4732, 'loss/train': 2.7396466732025146} +07/25/2024 11:47:37 - INFO - __main__ - Step 4734: {'lr': 0.0004991003244207427, 'samples': 227232, 'steps': 4733, 'loss/train': 1.6794664859771729} +07/25/2024 11:47:37 - INFO - __main__ - Step 4735: {'lr': 0.0004990998784763915, 'samples': 227280, 'steps': 4734, 'loss/train': 2.9583017826080322} +07/25/2024 11:47:37 - INFO - __main__ - Step 4736: {'lr': 0.0004990994324217458, 'samples': 227328, 'steps': 4735, 'loss/train': 2.1798255443573} +07/25/2024 11:47:38 - INFO - __main__ - Step 4737: {'lr': 0.0004990989862568059, 'samples': 227376, 'steps': 4736, 'loss/train': 2.633652687072754} +07/25/2024 11:47:38 - INFO - __main__ - Step 4738: {'lr': 0.000499098539981572, 'samples': 227424, 'steps': 4737, 'loss/train': 1.1228920221328735} +07/25/2024 11:47:38 - INFO - __main__ - Step 4739: {'lr': 0.0004990980935960442, 'samples': 227472, 'steps': 4738, 'loss/train': 2.8798258304595947} +07/25/2024 11:47:38 - INFO - __main__ - Step 4740: {'lr': 0.0004990976471002227, 'samples': 227520, 'steps': 4739, 'loss/train': 2.532768726348877} +07/25/2024 11:47:39 - INFO - __main__ - Step 4741: {'lr': 0.0004990972004941077, 'samples': 227568, 'steps': 4740, 'loss/train': 6.036951065063477} +07/25/2024 11:47:39 - INFO - __main__ - Step 4742: {'lr': 0.0004990967537776996, 'samples': 227616, 'steps': 4741, 'loss/train': 2.256333112716675} +07/25/2024 11:47:39 - INFO - __main__ - Step 4743: {'lr': 0.0004990963069509982, 'samples': 227664, 'steps': 4742, 'loss/train': 2.5403401851654053} +07/25/2024 11:47:40 - INFO - __main__ - Step 4744: {'lr': 0.0004990958600140042, 'samples': 227712, 'steps': 4743, 'loss/train': 2.510362148284912} +07/25/2024 11:47:40 - INFO - __main__ - Step 4745: {'lr': 0.0004990954129667174, 'samples': 227760, 'steps': 4744, 'loss/train': 2.1437668800354004} +07/25/2024 11:47:40 - INFO - __main__ - Step 4746: {'lr': 0.0004990949658091381, 'samples': 227808, 'steps': 4745, 'loss/train': 2.5985186100006104} +07/25/2024 11:47:40 - INFO - __main__ - Step 4747: {'lr': 0.0004990945185412666, 'samples': 227856, 'steps': 4746, 'loss/train': 2.528024435043335} +07/25/2024 11:47:41 - INFO - __main__ - Step 4748: {'lr': 0.000499094071163103, 'samples': 227904, 'steps': 4747, 'loss/train': 2.2988884449005127} +07/25/2024 11:47:41 - INFO - __main__ - Step 4749: {'lr': 0.0004990936236746476, 'samples': 227952, 'steps': 4748, 'loss/train': 0.4553810954093933} +07/25/2024 11:47:41 - INFO - __main__ - Step 4750: {'lr': 0.0004990931760759004, 'samples': 228000, 'steps': 4749, 'loss/train': 2.1637682914733887} +07/25/2024 11:47:42 - INFO - __main__ - Step 4751: {'lr': 0.0004990927283668619, 'samples': 228048, 'steps': 4750, 'loss/train': 2.4468836784362793} +07/25/2024 11:47:42 - INFO - __main__ - Step 4752: {'lr': 0.0004990922805475318, 'samples': 228096, 'steps': 4751, 'loss/train': 2.5386712551116943} +07/25/2024 11:47:42 - INFO - __main__ - Step 4753: {'lr': 0.0004990918326179109, 'samples': 228144, 'steps': 4752, 'loss/train': 2.586390733718872} +07/25/2024 11:47:42 - INFO - __main__ - Step 4754: {'lr': 0.000499091384577999, 'samples': 228192, 'steps': 4753, 'loss/train': 2.0584850311279297} +07/25/2024 11:47:43 - INFO - __main__ - Step 4755: {'lr': 0.0004990909364277964, 'samples': 228240, 'steps': 4754, 'loss/train': 1.6980335712432861} +07/25/2024 11:47:43 - INFO - __main__ - Step 4756: {'lr': 0.0004990904881673034, 'samples': 228288, 'steps': 4755, 'loss/train': 2.884164333343506} +07/25/2024 11:47:43 - INFO - __main__ - Step 4757: {'lr': 0.00049909003979652, 'samples': 228336, 'steps': 4756, 'loss/train': 2.712632894515991} +07/25/2024 11:47:44 - INFO - __main__ - Step 4758: {'lr': 0.0004990895913154467, 'samples': 228384, 'steps': 4757, 'loss/train': 2.0155282020568848} +07/25/2024 11:47:44 - INFO - __main__ - Step 4759: {'lr': 0.0004990891427240834, 'samples': 228432, 'steps': 4758, 'loss/train': 2.569904088973999} +07/25/2024 11:47:44 - INFO - __main__ - Step 4760: {'lr': 0.0004990886940224303, 'samples': 228480, 'steps': 4759, 'loss/train': 2.100088357925415} +07/25/2024 11:47:44 - INFO - __main__ - Step 4761: {'lr': 0.0004990882452104878, 'samples': 228528, 'steps': 4760, 'loss/train': 1.4596260786056519} +07/25/2024 11:47:45 - INFO - __main__ - Step 4762: {'lr': 0.000499087796288256, 'samples': 228576, 'steps': 4761, 'loss/train': 1.4738401174545288} +07/25/2024 11:47:45 - INFO - __main__ - Step 4763: {'lr': 0.0004990873472557351, 'samples': 228624, 'steps': 4762, 'loss/train': 3.7662858963012695} +07/25/2024 11:47:45 - INFO - __main__ - Step 4764: {'lr': 0.0004990868981129253, 'samples': 228672, 'steps': 4763, 'loss/train': 2.4672369956970215} +07/25/2024 11:47:46 - INFO - __main__ - Step 4765: {'lr': 0.0004990864488598268, 'samples': 228720, 'steps': 4764, 'loss/train': 6.1408185958862305} +07/25/2024 11:47:46 - INFO - __main__ - Step 4766: {'lr': 0.0004990859994964397, 'samples': 228768, 'steps': 4765, 'loss/train': 2.308095693588257} +07/25/2024 11:47:46 - INFO - __main__ - Step 4767: {'lr': 0.0004990855500227644, 'samples': 228816, 'steps': 4766, 'loss/train': 2.2066667079925537} +07/25/2024 11:47:46 - INFO - __main__ - Step 4768: {'lr': 0.0004990851004388011, 'samples': 228864, 'steps': 4767, 'loss/train': 2.504829168319702} +07/25/2024 11:47:47 - INFO - __main__ - Step 4769: {'lr': 0.0004990846507445498, 'samples': 228912, 'steps': 4768, 'loss/train': 2.665959119796753} +07/25/2024 11:47:47 - INFO - __main__ - Step 4770: {'lr': 0.0004990842009400107, 'samples': 228960, 'steps': 4769, 'loss/train': 1.7528719902038574} +07/25/2024 11:47:47 - INFO - __main__ - Step 4771: {'lr': 0.0004990837510251843, 'samples': 229008, 'steps': 4770, 'loss/train': 2.403032064437866} +07/25/2024 11:47:48 - INFO - __main__ - Step 4772: {'lr': 0.0004990833010000705, 'samples': 229056, 'steps': 4771, 'loss/train': 1.4387954473495483} +07/25/2024 11:47:48 - INFO - __main__ - Step 4773: {'lr': 0.0004990828508646695, 'samples': 229104, 'steps': 4772, 'loss/train': 0.997597873210907} +07/25/2024 11:47:48 - INFO - __main__ - Step 4774: {'lr': 0.0004990824006189816, 'samples': 229152, 'steps': 4773, 'loss/train': 2.2011170387268066} +07/25/2024 11:47:48 - INFO - __main__ - Step 4775: {'lr': 0.0004990819502630071, 'samples': 229200, 'steps': 4774, 'loss/train': 1.1538870334625244} +07/25/2024 11:47:49 - INFO - __main__ - Step 4776: {'lr': 0.0004990814997967462, 'samples': 229248, 'steps': 4775, 'loss/train': 2.6639187335968018} +07/25/2024 11:47:49 - INFO - __main__ - Step 4777: {'lr': 0.0004990810492201987, 'samples': 229296, 'steps': 4776, 'loss/train': 2.901275873184204} +07/25/2024 11:47:49 - INFO - __main__ - Step 4778: {'lr': 0.0004990805985333653, 'samples': 229344, 'steps': 4777, 'loss/train': 2.4090349674224854} +07/25/2024 11:47:50 - INFO - __main__ - Step 4779: {'lr': 0.0004990801477362459, 'samples': 229392, 'steps': 4778, 'loss/train': 2.0063564777374268} +07/25/2024 11:47:50 - INFO - __main__ - Step 4780: {'lr': 0.0004990796968288408, 'samples': 229440, 'steps': 4779, 'loss/train': 2.3784399032592773} +07/25/2024 11:47:50 - INFO - __main__ - Step 4781: {'lr': 0.0004990792458111502, 'samples': 229488, 'steps': 4780, 'loss/train': 1.9998865127563477} +07/25/2024 11:47:50 - INFO - __main__ - Step 4782: {'lr': 0.0004990787946831743, 'samples': 229536, 'steps': 4781, 'loss/train': 1.4015945196151733} +07/25/2024 11:47:51 - INFO - __main__ - Step 4783: {'lr': 0.0004990783434449134, 'samples': 229584, 'steps': 4782, 'loss/train': 2.0769314765930176} +07/25/2024 11:47:51 - INFO - __main__ - Step 4784: {'lr': 0.0004990778920963674, 'samples': 229632, 'steps': 4783, 'loss/train': 3.8083252906799316} +07/25/2024 11:47:51 - INFO - __main__ - Step 4785: {'lr': 0.0004990774406375367, 'samples': 229680, 'steps': 4784, 'loss/train': 1.7711138725280762} +07/25/2024 11:47:51 - INFO - __main__ - Step 4786: {'lr': 0.0004990769890684216, 'samples': 229728, 'steps': 4785, 'loss/train': 0.9961721897125244} +07/25/2024 11:47:52 - INFO - __main__ - Step 4787: {'lr': 0.000499076537389022, 'samples': 229776, 'steps': 4786, 'loss/train': 2.3383443355560303} +07/25/2024 11:47:52 - INFO - __main__ - Step 4788: {'lr': 0.0004990760855993385, 'samples': 229824, 'steps': 4787, 'loss/train': 2.193126678466797} +07/25/2024 11:47:52 - INFO - __main__ - Step 4789: {'lr': 0.0004990756336993711, 'samples': 229872, 'steps': 4788, 'loss/train': 4.887250900268555} +07/25/2024 11:47:53 - INFO - __main__ - Step 4790: {'lr': 0.0004990751816891198, 'samples': 229920, 'steps': 4789, 'loss/train': 2.52506947517395} +07/25/2024 11:47:53 - INFO - __main__ - Step 4791: {'lr': 0.0004990747295685851, 'samples': 229968, 'steps': 4790, 'loss/train': 1.7816475629806519} +07/25/2024 11:47:53 - INFO - __main__ - Step 4792: {'lr': 0.0004990742773377671, 'samples': 230016, 'steps': 4791, 'loss/train': 1.825103998184204} +07/25/2024 11:47:53 - INFO - __main__ - Step 4793: {'lr': 0.000499073824996666, 'samples': 230064, 'steps': 4792, 'loss/train': 2.145195722579956} +07/25/2024 11:47:54 - INFO - __main__ - Step 4794: {'lr': 0.0004990733725452819, 'samples': 230112, 'steps': 4793, 'loss/train': 1.144053339958191} +07/25/2024 11:47:54 - INFO - __main__ - Step 4795: {'lr': 0.0004990729199836151, 'samples': 230160, 'steps': 4794, 'loss/train': 2.632172107696533} +07/25/2024 11:47:54 - INFO - __main__ - Step 4796: {'lr': 0.0004990724673116659, 'samples': 230208, 'steps': 4795, 'loss/train': 1.5498437881469727} +07/25/2024 11:47:55 - INFO - __main__ - Step 4797: {'lr': 0.0004990720145294343, 'samples': 230256, 'steps': 4796, 'loss/train': 2.5163819789886475} +07/25/2024 11:47:55 - INFO - __main__ - Step 4798: {'lr': 0.0004990715616369206, 'samples': 230304, 'steps': 4797, 'loss/train': 2.3939836025238037} +07/25/2024 11:47:55 - INFO - __main__ - Step 4799: {'lr': 0.000499071108634125, 'samples': 230352, 'steps': 4798, 'loss/train': 2.3635847568511963} +07/25/2024 11:47:55 - INFO - __main__ - Step 4800: {'lr': 0.0004990706555210477, 'samples': 230400, 'steps': 4799, 'loss/train': 1.951266884803772} +07/25/2024 11:47:56 - INFO - __main__ - Step 4801: {'lr': 0.0004990702022976889, 'samples': 230448, 'steps': 4800, 'loss/train': 2.596346139907837} +07/25/2024 11:47:56 - INFO - __main__ - Step 4802: {'lr': 0.0004990697489640488, 'samples': 230496, 'steps': 4801, 'loss/train': 2.371201753616333} +07/25/2024 11:47:56 - INFO - __main__ - Step 4803: {'lr': 0.0004990692955201276, 'samples': 230544, 'steps': 4802, 'loss/train': 2.6161117553710938} +07/25/2024 11:47:57 - INFO - __main__ - Step 4804: {'lr': 0.0004990688419659254, 'samples': 230592, 'steps': 4803, 'loss/train': 2.7534613609313965} +07/25/2024 11:47:57 - INFO - __main__ - Step 4805: {'lr': 0.0004990683883014426, 'samples': 230640, 'steps': 4804, 'loss/train': 2.2356138229370117} +07/25/2024 11:47:57 - INFO - __main__ - Step 4806: {'lr': 0.0004990679345266793, 'samples': 230688, 'steps': 4805, 'loss/train': 1.8996334075927734} +07/25/2024 11:47:57 - INFO - __main__ - Step 4807: {'lr': 0.0004990674806416356, 'samples': 230736, 'steps': 4806, 'loss/train': 1.6716653108596802} +07/25/2024 11:47:58 - INFO - __main__ - Step 4808: {'lr': 0.0004990670266463118, 'samples': 230784, 'steps': 4807, 'loss/train': 2.0296173095703125} +07/25/2024 11:47:58 - INFO - __main__ - Step 4809: {'lr': 0.0004990665725407084, 'samples': 230832, 'steps': 4808, 'loss/train': 1.283721685409546} +07/25/2024 11:47:58 - INFO - __main__ - Step 4810: {'lr': 0.0004990661183248249, 'samples': 230880, 'steps': 4809, 'loss/train': 2.021515130996704} +07/25/2024 11:47:59 - INFO - __main__ - Step 4811: {'lr': 0.0004990656639986621, 'samples': 230928, 'steps': 4810, 'loss/train': 2.7082502841949463} +07/25/2024 11:47:59 - INFO - __main__ - Step 4812: {'lr': 0.0004990652095622199, 'samples': 230976, 'steps': 4811, 'loss/train': 1.8762781620025635} +07/25/2024 11:47:59 - INFO - __main__ - Step 4813: {'lr': 0.0004990647550154987, 'samples': 231024, 'steps': 4812, 'loss/train': 2.118734359741211} +07/25/2024 11:47:59 - INFO - __main__ - Step 4814: {'lr': 0.0004990643003584986, 'samples': 231072, 'steps': 4813, 'loss/train': 2.175614356994629} +07/25/2024 11:48:00 - INFO - __main__ - Step 4815: {'lr': 0.0004990638455912198, 'samples': 231120, 'steps': 4814, 'loss/train': 2.3384814262390137} +07/25/2024 11:48:00 - INFO - __main__ - Step 4816: {'lr': 0.0004990633907136625, 'samples': 231168, 'steps': 4815, 'loss/train': 1.6844173669815063} +07/25/2024 11:48:00 - INFO - __main__ - Step 4817: {'lr': 0.0004990629357258269, 'samples': 231216, 'steps': 4816, 'loss/train': 1.9284883737564087} +07/25/2024 11:48:01 - INFO - __main__ - Step 4818: {'lr': 0.0004990624806277132, 'samples': 231264, 'steps': 4817, 'loss/train': 2.1102030277252197} +07/25/2024 11:48:01 - INFO - __main__ - Step 4819: {'lr': 0.0004990620254193216, 'samples': 231312, 'steps': 4818, 'loss/train': 2.3962812423706055} +07/25/2024 11:48:01 - INFO - __main__ - Step 4820: {'lr': 0.0004990615701006523, 'samples': 231360, 'steps': 4819, 'loss/train': 2.4142086505889893} +07/25/2024 11:48:01 - INFO - __main__ - Step 4821: {'lr': 0.0004990611146717056, 'samples': 231408, 'steps': 4820, 'loss/train': 2.752311944961548} +07/25/2024 11:48:02 - INFO - __main__ - Step 4822: {'lr': 0.0004990606591324815, 'samples': 231456, 'steps': 4821, 'loss/train': 2.175144910812378} +07/25/2024 11:48:02 - INFO - __main__ - Step 4823: {'lr': 0.0004990602034829804, 'samples': 231504, 'steps': 4822, 'loss/train': 1.770632028579712} +07/25/2024 11:48:02 - INFO - __main__ - Step 4824: {'lr': 0.0004990597477232024, 'samples': 231552, 'steps': 4823, 'loss/train': 2.358905076980591} +07/25/2024 11:48:03 - INFO - __main__ - Step 4825: {'lr': 0.0004990592918531478, 'samples': 231600, 'steps': 4824, 'loss/train': 1.8145036697387695} +07/25/2024 11:48:03 - INFO - __main__ - Step 4826: {'lr': 0.0004990588358728166, 'samples': 231648, 'steps': 4825, 'loss/train': 2.189002275466919} +07/25/2024 11:48:03 - INFO - __main__ - Step 4827: {'lr': 0.0004990583797822092, 'samples': 231696, 'steps': 4826, 'loss/train': 2.9024453163146973} +07/25/2024 11:48:03 - INFO - __main__ - Step 4828: {'lr': 0.0004990579235813257, 'samples': 231744, 'steps': 4827, 'loss/train': 2.447606325149536} +07/25/2024 11:48:04 - INFO - __main__ - Step 4829: {'lr': 0.0004990574672701664, 'samples': 231792, 'steps': 4828, 'loss/train': 2.501610040664673} +07/25/2024 11:48:04 - INFO - __main__ - Step 4830: {'lr': 0.0004990570108487313, 'samples': 231840, 'steps': 4829, 'loss/train': 1.744626522064209} +07/25/2024 11:48:04 - INFO - __main__ - Step 4831: {'lr': 0.0004990565543170208, 'samples': 231888, 'steps': 4830, 'loss/train': 2.438584089279175} +07/25/2024 11:48:05 - INFO - __main__ - Step 4832: {'lr': 0.000499056097675035, 'samples': 231936, 'steps': 4831, 'loss/train': 2.100817918777466} +07/25/2024 11:48:05 - INFO - __main__ - Step 4833: {'lr': 0.0004990556409227743, 'samples': 231984, 'steps': 4832, 'loss/train': 1.3986241817474365} +07/25/2024 11:48:05 - INFO - __main__ - Step 4834: {'lr': 0.0004990551840602385, 'samples': 232032, 'steps': 4833, 'loss/train': 2.586958408355713} +07/25/2024 11:48:05 - INFO - __main__ - Step 4835: {'lr': 0.0004990547270874282, 'samples': 232080, 'steps': 4834, 'loss/train': 3.019242286682129} +07/25/2024 11:48:06 - INFO - __main__ - Step 4836: {'lr': 0.0004990542700043434, 'samples': 232128, 'steps': 4835, 'loss/train': 1.9374796152114868} +07/25/2024 11:48:06 - INFO - __main__ - Step 4837: {'lr': 0.0004990538128109844, 'samples': 232176, 'steps': 4836, 'loss/train': 2.633469820022583} +07/25/2024 11:48:06 - INFO - __main__ - Step 4838: {'lr': 0.0004990533555073513, 'samples': 232224, 'steps': 4837, 'loss/train': 2.0167651176452637} +07/25/2024 11:48:07 - INFO - __main__ - Step 4839: {'lr': 0.0004990528980934444, 'samples': 232272, 'steps': 4838, 'loss/train': 2.6061782836914062} +07/25/2024 11:48:07 - INFO - __main__ - Step 4840: {'lr': 0.0004990524405692637, 'samples': 232320, 'steps': 4839, 'loss/train': 1.6116878986358643} +07/25/2024 11:48:07 - INFO - __main__ - Step 4841: {'lr': 0.0004990519829348096, 'samples': 232368, 'steps': 4840, 'loss/train': 2.5324201583862305} +07/25/2024 11:48:07 - INFO - __main__ - Step 4842: {'lr': 0.0004990515251900824, 'samples': 232416, 'steps': 4841, 'loss/train': 2.1538748741149902} +07/25/2024 11:48:08 - INFO - __main__ - Step 4843: {'lr': 0.0004990510673350821, 'samples': 232464, 'steps': 4842, 'loss/train': 2.802924871444702} +07/25/2024 11:48:08 - INFO - __main__ - Step 4844: {'lr': 0.000499050609369809, 'samples': 232512, 'steps': 4843, 'loss/train': 2.110649824142456} +07/25/2024 11:48:08 - INFO - __main__ - Step 4845: {'lr': 0.0004990501512942633, 'samples': 232560, 'steps': 4844, 'loss/train': 3.1050989627838135} +07/25/2024 11:48:09 - INFO - __main__ - Step 4846: {'lr': 0.000499049693108445, 'samples': 232608, 'steps': 4845, 'loss/train': 2.1126210689544678} +07/25/2024 11:48:09 - INFO - __main__ - Step 4847: {'lr': 0.0004990492348123546, 'samples': 232656, 'steps': 4846, 'loss/train': 2.1054787635803223} +07/25/2024 11:48:09 - INFO - __main__ - Step 4848: {'lr': 0.0004990487764059921, 'samples': 232704, 'steps': 4847, 'loss/train': 2.0236339569091797} +07/25/2024 11:48:09 - INFO - __main__ - Step 4849: {'lr': 0.0004990483178893579, 'samples': 232752, 'steps': 4848, 'loss/train': 2.342905282974243} +07/25/2024 11:48:10 - INFO - __main__ - Step 4850: {'lr': 0.000499047859262452, 'samples': 232800, 'steps': 4849, 'loss/train': 2.184983968734741} +07/25/2024 11:48:10 - INFO - __main__ - Step 4851: {'lr': 0.0004990474005252746, 'samples': 232848, 'steps': 4850, 'loss/train': 2.8223330974578857} +07/25/2024 11:48:10 - INFO - __main__ - Step 4852: {'lr': 0.0004990469416778262, 'samples': 232896, 'steps': 4851, 'loss/train': 2.4526383876800537} +07/25/2024 11:48:11 - INFO - __main__ - Step 4853: {'lr': 0.0004990464827201067, 'samples': 232944, 'steps': 4852, 'loss/train': 2.884377956390381} +07/25/2024 11:48:11 - INFO - __main__ - Step 4854: {'lr': 0.0004990460236521163, 'samples': 232992, 'steps': 4853, 'loss/train': 5.630037784576416} +07/25/2024 11:48:11 - INFO - __main__ - Step 4855: {'lr': 0.0004990455644738554, 'samples': 233040, 'steps': 4854, 'loss/train': 2.163583278656006} +07/25/2024 11:48:11 - INFO - __main__ - Step 4856: {'lr': 0.000499045105185324, 'samples': 233088, 'steps': 4855, 'loss/train': 2.3345303535461426} +07/25/2024 11:48:12 - INFO - __main__ - Step 4857: {'lr': 0.0004990446457865225, 'samples': 233136, 'steps': 4856, 'loss/train': 1.301830768585205} +07/25/2024 11:48:12 - INFO - __main__ - Step 4858: {'lr': 0.0004990441862774511, 'samples': 233184, 'steps': 4857, 'loss/train': 2.3534843921661377} +07/25/2024 11:48:12 - INFO - __main__ - Step 4859: {'lr': 0.0004990437266581097, 'samples': 233232, 'steps': 4858, 'loss/train': 2.108100652694702} +07/25/2024 11:48:12 - INFO - __main__ - Step 4860: {'lr': 0.0004990432669284988, 'samples': 233280, 'steps': 4859, 'loss/train': 1.9627467393875122} +07/25/2024 11:48:13 - INFO - __main__ - Step 4861: {'lr': 0.0004990428070886186, 'samples': 233328, 'steps': 4860, 'loss/train': 2.3011107444763184} +07/25/2024 11:48:13 - INFO - __main__ - Step 4862: {'lr': 0.0004990423471384692, 'samples': 233376, 'steps': 4861, 'loss/train': 2.3456318378448486} +07/25/2024 11:48:13 - INFO - __main__ - Step 4863: {'lr': 0.0004990418870780508, 'samples': 233424, 'steps': 4862, 'loss/train': 2.5629842281341553} +07/25/2024 11:48:14 - INFO - __main__ - Step 4864: {'lr': 0.0004990414269073635, 'samples': 233472, 'steps': 4863, 'loss/train': 2.2264561653137207} +07/25/2024 11:48:14 - INFO - __main__ - Step 4865: {'lr': 0.0004990409666264079, 'samples': 233520, 'steps': 4864, 'loss/train': 2.341332197189331} +07/25/2024 11:48:14 - INFO - __main__ - Step 4866: {'lr': 0.0004990405062351837, 'samples': 233568, 'steps': 4865, 'loss/train': 1.8534971475601196} +07/25/2024 11:48:14 - INFO - __main__ - Step 4867: {'lr': 0.0004990400457336914, 'samples': 233616, 'steps': 4866, 'loss/train': 2.0882389545440674} +07/25/2024 11:48:15 - INFO - __main__ - Step 4868: {'lr': 0.0004990395851219312, 'samples': 233664, 'steps': 4867, 'loss/train': 1.964776873588562} +07/25/2024 11:48:15 - INFO - __main__ - Step 4869: {'lr': 0.0004990391243999033, 'samples': 233712, 'steps': 4868, 'loss/train': 2.436593532562256} +07/25/2024 11:48:15 - INFO - __main__ - Step 4870: {'lr': 0.0004990386635676077, 'samples': 233760, 'steps': 4869, 'loss/train': 1.773805022239685} +07/25/2024 11:48:16 - INFO - __main__ - Step 4871: {'lr': 0.0004990382026250448, 'samples': 233808, 'steps': 4870, 'loss/train': 2.6540820598602295} +07/25/2024 11:48:16 - INFO - __main__ - Step 4872: {'lr': 0.0004990377415722149, 'samples': 233856, 'steps': 4871, 'loss/train': 2.9543538093566895} +07/25/2024 11:48:16 - INFO - __main__ - Step 4873: {'lr': 0.0004990372804091179, 'samples': 233904, 'steps': 4872, 'loss/train': 1.8977177143096924} +07/25/2024 11:48:16 - INFO - __main__ - Step 4874: {'lr': 0.0004990368191357542, 'samples': 233952, 'steps': 4873, 'loss/train': 2.3047640323638916} +07/25/2024 11:48:17 - INFO - __main__ - Step 4875: {'lr': 0.000499036357752124, 'samples': 234000, 'steps': 4874, 'loss/train': 1.3891572952270508} +07/25/2024 11:48:17 - INFO - __main__ - Step 4876: {'lr': 0.0004990358962582276, 'samples': 234048, 'steps': 4875, 'loss/train': 2.463857889175415} +07/25/2024 11:48:17 - INFO - __main__ - Step 4877: {'lr': 0.000499035434654065, 'samples': 234096, 'steps': 4876, 'loss/train': 2.447978973388672} +07/25/2024 11:48:18 - INFO - __main__ - Step 4878: {'lr': 0.0004990349729396364, 'samples': 234144, 'steps': 4877, 'loss/train': 3.5688717365264893} +07/25/2024 11:48:18 - INFO - __main__ - Step 4879: {'lr': 0.0004990345111149422, 'samples': 234192, 'steps': 4878, 'loss/train': 1.9035056829452515} +07/25/2024 11:48:18 - INFO - __main__ - Step 4880: {'lr': 0.0004990340491799824, 'samples': 234240, 'steps': 4879, 'loss/train': 2.4827587604522705} +07/25/2024 11:48:18 - INFO - __main__ - Step 4881: {'lr': 0.0004990335871347573, 'samples': 234288, 'steps': 4880, 'loss/train': 1.025644302368164} +07/25/2024 11:48:19 - INFO - __main__ - Step 4882: {'lr': 0.0004990331249792672, 'samples': 234336, 'steps': 4881, 'loss/train': 2.463878870010376} +07/25/2024 11:48:19 - INFO - __main__ - Step 4883: {'lr': 0.0004990326627135123, 'samples': 234384, 'steps': 4882, 'loss/train': 1.9751965999603271} +07/25/2024 11:48:19 - INFO - __main__ - Step 4884: {'lr': 0.0004990322003374925, 'samples': 234432, 'steps': 4883, 'loss/train': 3.3487956523895264} +07/25/2024 11:48:20 - INFO - __main__ - Step 4885: {'lr': 0.0004990317378512082, 'samples': 234480, 'steps': 4884, 'loss/train': 2.845632791519165} +07/25/2024 11:48:20 - INFO - __main__ - Step 4886: {'lr': 0.0004990312752546598, 'samples': 234528, 'steps': 4885, 'loss/train': 2.2152466773986816} +07/25/2024 11:48:20 - INFO - __main__ - Step 4887: {'lr': 0.0004990308125478473, 'samples': 234576, 'steps': 4886, 'loss/train': 2.4589908123016357} +07/25/2024 11:48:20 - INFO - __main__ - Step 4888: {'lr': 0.0004990303497307708, 'samples': 234624, 'steps': 4887, 'loss/train': 2.024174690246582} +07/25/2024 11:48:21 - INFO - __main__ - Step 4889: {'lr': 0.0004990298868034307, 'samples': 234672, 'steps': 4888, 'loss/train': 2.391920804977417} +07/25/2024 11:48:21 - INFO - __main__ - Step 4890: {'lr': 0.0004990294237658272, 'samples': 234720, 'steps': 4889, 'loss/train': 1.8084849119186401} +07/25/2024 11:48:21 - INFO - __main__ - Step 4891: {'lr': 0.0004990289606179604, 'samples': 234768, 'steps': 4890, 'loss/train': 1.8361009359359741} +07/25/2024 11:48:22 - INFO - __main__ - Step 4892: {'lr': 0.0004990284973598306, 'samples': 234816, 'steps': 4891, 'loss/train': 2.5216548442840576} +07/25/2024 11:48:22 - INFO - __main__ - Step 4893: {'lr': 0.000499028033991438, 'samples': 234864, 'steps': 4892, 'loss/train': 2.5423030853271484} +07/25/2024 11:48:22 - INFO - __main__ - Step 4894: {'lr': 0.0004990275705127827, 'samples': 234912, 'steps': 4893, 'loss/train': 2.4786300659179688} +07/25/2024 11:48:22 - INFO - __main__ - Step 4895: {'lr': 0.0004990271069238649, 'samples': 234960, 'steps': 4894, 'loss/train': 2.3886241912841797} +07/25/2024 11:48:23 - INFO - __main__ - Step 4896: {'lr': 0.000499026643224685, 'samples': 235008, 'steps': 4895, 'loss/train': 2.882108211517334} +07/25/2024 11:48:23 - INFO - __main__ - Step 4897: {'lr': 0.0004990261794152429, 'samples': 235056, 'steps': 4896, 'loss/train': 1.397897720336914} +07/25/2024 11:48:23 - INFO - __main__ - Step 4898: {'lr': 0.0004990257154955392, 'samples': 235104, 'steps': 4897, 'loss/train': 1.9911210536956787} +07/25/2024 11:48:24 - INFO - __main__ - Step 4899: {'lr': 0.0004990252514655738, 'samples': 235152, 'steps': 4898, 'loss/train': 0.48979219794273376} +07/25/2024 11:48:24 - INFO - __main__ - Step 4900: {'lr': 0.000499024787325347, 'samples': 235200, 'steps': 4899, 'loss/train': 1.9585392475128174} +07/25/2024 11:48:24 - INFO - __main__ - Step 4901: {'lr': 0.000499024323074859, 'samples': 235248, 'steps': 4900, 'loss/train': 2.7274489402770996} +07/25/2024 11:48:24 - INFO - __main__ - Step 4902: {'lr': 0.00049902385871411, 'samples': 235296, 'steps': 4901, 'loss/train': 2.556117296218872} +07/25/2024 11:48:25 - INFO - __main__ - Step 4903: {'lr': 0.0004990233942431002, 'samples': 235344, 'steps': 4902, 'loss/train': 2.5100252628326416} +07/25/2024 11:48:25 - INFO - __main__ - Step 4904: {'lr': 0.0004990229296618298, 'samples': 235392, 'steps': 4903, 'loss/train': 2.2274510860443115} +07/25/2024 11:48:25 - INFO - __main__ - Step 4905: {'lr': 0.0004990224649702991, 'samples': 235440, 'steps': 4904, 'loss/train': 1.4421437978744507} +07/25/2024 11:48:26 - INFO - __main__ - Step 4906: {'lr': 0.0004990220001685082, 'samples': 235488, 'steps': 4905, 'loss/train': 2.594426155090332} +07/25/2024 11:48:26 - INFO - __main__ - Step 4907: {'lr': 0.0004990215352564573, 'samples': 235536, 'steps': 4906, 'loss/train': 2.3642756938934326} +07/25/2024 11:48:26 - INFO - __main__ - Step 4908: {'lr': 0.0004990210702341466, 'samples': 235584, 'steps': 4907, 'loss/train': 2.6753482818603516} +07/25/2024 11:48:26 - INFO - __main__ - Step 4909: {'lr': 0.0004990206051015765, 'samples': 235632, 'steps': 4908, 'loss/train': 2.402113437652588} +07/25/2024 11:48:27 - INFO - __main__ - Step 4910: {'lr': 0.000499020139858747, 'samples': 235680, 'steps': 4909, 'loss/train': 2.5409886837005615} +07/25/2024 11:48:27 - INFO - __main__ - Step 4911: {'lr': 0.0004990196745056583, 'samples': 235728, 'steps': 4910, 'loss/train': 2.224931478500366} +07/25/2024 11:48:27 - INFO - __main__ - Step 4912: {'lr': 0.0004990192090423107, 'samples': 235776, 'steps': 4911, 'loss/train': 2.172131061553955} +07/25/2024 11:48:28 - INFO - __main__ - Step 4913: {'lr': 0.0004990187434687043, 'samples': 235824, 'steps': 4912, 'loss/train': 2.2488064765930176} +07/25/2024 11:48:28 - INFO - __main__ - Step 4914: {'lr': 0.0004990182777848395, 'samples': 235872, 'steps': 4913, 'loss/train': 1.584761381149292} +07/25/2024 11:48:28 - INFO - __main__ - Step 4915: {'lr': 0.0004990178119907162, 'samples': 235920, 'steps': 4914, 'loss/train': 2.7336714267730713} +07/25/2024 11:48:28 - INFO - __main__ - Step 4916: {'lr': 0.0004990173460863351, 'samples': 235968, 'steps': 4915, 'loss/train': 1.8080403804779053} +07/25/2024 11:48:29 - INFO - __main__ - Step 4917: {'lr': 0.0004990168800716959, 'samples': 236016, 'steps': 4916, 'loss/train': 2.686276435852051} +07/25/2024 11:48:29 - INFO - __main__ - Step 4918: {'lr': 0.000499016413946799, 'samples': 236064, 'steps': 4917, 'loss/train': 1.6162887811660767} +07/25/2024 11:48:29 - INFO - __main__ - Step 4919: {'lr': 0.0004990159477116446, 'samples': 236112, 'steps': 4918, 'loss/train': 2.2323265075683594} +07/25/2024 11:48:30 - INFO - __main__ - Step 4920: {'lr': 0.0004990154813662329, 'samples': 236160, 'steps': 4919, 'loss/train': 2.7604942321777344} +07/25/2024 11:48:30 - INFO - __main__ - Step 4921: {'lr': 0.0004990150149105642, 'samples': 236208, 'steps': 4920, 'loss/train': 1.9993125200271606} +07/25/2024 11:48:30 - INFO - __main__ - Step 4922: {'lr': 0.0004990145483446386, 'samples': 236256, 'steps': 4921, 'loss/train': 2.242658853530884} +07/25/2024 11:48:30 - INFO - __main__ - Step 4923: {'lr': 0.0004990140816684563, 'samples': 236304, 'steps': 4922, 'loss/train': 0.3025057017803192} +07/25/2024 11:48:31 - INFO - __main__ - Step 4924: {'lr': 0.0004990136148820176, 'samples': 236352, 'steps': 4923, 'loss/train': 1.9385688304901123} +07/25/2024 11:48:31 - INFO - __main__ - Step 4925: {'lr': 0.0004990131479853226, 'samples': 236400, 'steps': 4924, 'loss/train': 2.3274288177490234} +07/25/2024 11:48:31 - INFO - __main__ - Step 4926: {'lr': 0.0004990126809783715, 'samples': 236448, 'steps': 4925, 'loss/train': 2.4665122032165527} +07/25/2024 11:48:32 - INFO - __main__ - Step 4927: {'lr': 0.0004990122138611647, 'samples': 236496, 'steps': 4926, 'loss/train': 2.6713266372680664} +07/25/2024 11:48:32 - INFO - __main__ - Step 4928: {'lr': 0.0004990117466337022, 'samples': 236544, 'steps': 4927, 'loss/train': 2.375392436981201} +07/25/2024 11:48:32 - INFO - __main__ - Step 4929: {'lr': 0.0004990112792959843, 'samples': 236592, 'steps': 4928, 'loss/train': 1.0123090744018555} +07/25/2024 11:48:32 - INFO - __main__ - Step 4930: {'lr': 0.0004990108118480112, 'samples': 236640, 'steps': 4929, 'loss/train': 2.6813013553619385} +07/25/2024 11:48:33 - INFO - __main__ - Step 4931: {'lr': 0.000499010344289783, 'samples': 236688, 'steps': 4930, 'loss/train': 1.590052604675293} +07/25/2024 11:48:33 - INFO - __main__ - Step 4932: {'lr': 0.0004990098766213001, 'samples': 236736, 'steps': 4931, 'loss/train': 2.315702438354492} +07/25/2024 11:48:33 - INFO - __main__ - Step 4933: {'lr': 0.0004990094088425625, 'samples': 236784, 'steps': 4932, 'loss/train': 1.9596667289733887} +07/25/2024 11:48:33 - INFO - __main__ - Step 4934: {'lr': 0.0004990089409535705, 'samples': 236832, 'steps': 4933, 'loss/train': 3.0427184104919434} +07/25/2024 11:48:34 - INFO - __main__ - Step 4935: {'lr': 0.0004990084729543244, 'samples': 236880, 'steps': 4934, 'loss/train': 2.4292984008789062} +07/25/2024 11:48:34 - INFO - __main__ - Step 4936: {'lr': 0.0004990080048448243, 'samples': 236928, 'steps': 4935, 'loss/train': 2.714674472808838} +07/25/2024 11:48:34 - INFO - __main__ - Step 4937: {'lr': 0.0004990075366250704, 'samples': 236976, 'steps': 4936, 'loss/train': 2.4683403968811035} +07/25/2024 11:48:35 - INFO - __main__ - Step 4938: {'lr': 0.0004990070682950631, 'samples': 237024, 'steps': 4937, 'loss/train': 2.41536545753479} +07/25/2024 11:48:35 - INFO - __main__ - Step 4939: {'lr': 0.0004990065998548023, 'samples': 237072, 'steps': 4938, 'loss/train': 2.7239797115325928} +07/25/2024 11:48:35 - INFO - __main__ - Step 4940: {'lr': 0.0004990061313042884, 'samples': 237120, 'steps': 4939, 'loss/train': 1.8738142251968384} +07/25/2024 11:48:36 - INFO - __main__ - Step 4941: {'lr': 0.0004990056626435215, 'samples': 237168, 'steps': 4940, 'loss/train': 2.1210010051727295} +07/25/2024 11:48:36 - INFO - __main__ - Step 4942: {'lr': 0.0004990051938725019, 'samples': 237216, 'steps': 4941, 'loss/train': 1.9886324405670166} +07/25/2024 11:48:36 - INFO - __main__ - Step 4943: {'lr': 0.0004990047249912299, 'samples': 237264, 'steps': 4942, 'loss/train': 2.704939603805542} +07/25/2024 11:48:36 - INFO - __main__ - Step 4944: {'lr': 0.0004990042559997053, 'samples': 237312, 'steps': 4943, 'loss/train': 3.387220621109009} +07/25/2024 11:48:37 - INFO - __main__ - Step 4945: {'lr': 0.0004990037868979289, 'samples': 237360, 'steps': 4944, 'loss/train': 2.4952340126037598} +07/25/2024 11:48:37 - INFO - __main__ - Step 4946: {'lr': 0.0004990033176859004, 'samples': 237408, 'steps': 4945, 'loss/train': 2.4477546215057373} +07/25/2024 11:48:37 - INFO - __main__ - Step 4947: {'lr': 0.0004990028483636203, 'samples': 237456, 'steps': 4946, 'loss/train': 0.30847540497779846} +07/25/2024 11:48:37 - INFO - __main__ - Step 4948: {'lr': 0.0004990023789310887, 'samples': 237504, 'steps': 4947, 'loss/train': 1.969844937324524} +07/25/2024 11:48:38 - INFO - __main__ - Step 4949: {'lr': 0.0004990019093883058, 'samples': 237552, 'steps': 4948, 'loss/train': 1.9585574865341187} +07/25/2024 11:48:38 - INFO - __main__ - Step 4950: {'lr': 0.0004990014397352719, 'samples': 237600, 'steps': 4949, 'loss/train': 2.6375319957733154} +07/25/2024 11:48:38 - INFO - __main__ - Step 4951: {'lr': 0.000499000969971987, 'samples': 237648, 'steps': 4950, 'loss/train': 1.6325933933258057} +07/25/2024 11:48:39 - INFO - __main__ - Step 4952: {'lr': 0.0004990005000984516, 'samples': 237696, 'steps': 4951, 'loss/train': 2.718658447265625} +07/25/2024 11:48:39 - INFO - __main__ - Step 4953: {'lr': 0.0004990000301146656, 'samples': 237744, 'steps': 4952, 'loss/train': 1.489514946937561} +07/25/2024 11:48:39 - INFO - __main__ - Step 4954: {'lr': 0.0004989995600206296, 'samples': 237792, 'steps': 4953, 'loss/train': 2.1646831035614014} +07/25/2024 11:48:39 - INFO - __main__ - Step 4955: {'lr': 0.0004989990898163434, 'samples': 237840, 'steps': 4954, 'loss/train': 2.502549648284912} +07/25/2024 11:48:40 - INFO - __main__ - Step 4956: {'lr': 0.0004989986195018073, 'samples': 237888, 'steps': 4955, 'loss/train': 2.386430501937866} +07/25/2024 11:48:40 - INFO - __main__ - Step 4957: {'lr': 0.0004989981490770217, 'samples': 237936, 'steps': 4956, 'loss/train': 1.9931451082229614} +07/25/2024 11:48:40 - INFO - __main__ - Step 4958: {'lr': 0.0004989976785419866, 'samples': 237984, 'steps': 4957, 'loss/train': 2.403510332107544} +07/25/2024 11:48:41 - INFO - __main__ - Step 4959: {'lr': 0.0004989972078967025, 'samples': 238032, 'steps': 4958, 'loss/train': 2.487332582473755} +07/25/2024 11:48:41 - INFO - __main__ - Step 4960: {'lr': 0.0004989967371411693, 'samples': 238080, 'steps': 4959, 'loss/train': 2.419773817062378} +07/25/2024 11:48:41 - INFO - __main__ - Step 4961: {'lr': 0.0004989962662753873, 'samples': 238128, 'steps': 4960, 'loss/train': 2.1982436180114746} +07/25/2024 11:48:41 - INFO - __main__ - Step 4962: {'lr': 0.0004989957952993568, 'samples': 238176, 'steps': 4961, 'loss/train': 2.6893012523651123} +07/25/2024 11:48:42 - INFO - __main__ - Step 4963: {'lr': 0.0004989953242130778, 'samples': 238224, 'steps': 4962, 'loss/train': 2.3627607822418213} +07/25/2024 11:48:42 - INFO - __main__ - Step 4964: {'lr': 0.0004989948530165508, 'samples': 238272, 'steps': 4963, 'loss/train': 1.7193280458450317} +07/25/2024 11:48:42 - INFO - __main__ - Step 4965: {'lr': 0.0004989943817097759, 'samples': 238320, 'steps': 4964, 'loss/train': 2.4434385299682617} +07/25/2024 11:48:43 - INFO - __main__ - Step 4966: {'lr': 0.000498993910292753, 'samples': 238368, 'steps': 4965, 'loss/train': 2.971900463104248} +07/25/2024 11:48:43 - INFO - __main__ - Step 4967: {'lr': 0.0004989934387654828, 'samples': 238416, 'steps': 4966, 'loss/train': 2.3622541427612305} +07/25/2024 11:48:43 - INFO - __main__ - Step 4968: {'lr': 0.0004989929671279652, 'samples': 238464, 'steps': 4967, 'loss/train': 2.364210367202759} +07/25/2024 11:48:43 - INFO - __main__ - Step 4969: {'lr': 0.0004989924953802005, 'samples': 238512, 'steps': 4968, 'loss/train': 3.3080832958221436} +07/25/2024 11:48:44 - INFO - __main__ - Step 4970: {'lr': 0.0004989920235221889, 'samples': 238560, 'steps': 4969, 'loss/train': 2.949087619781494} +07/25/2024 11:48:44 - INFO - __main__ - Step 4971: {'lr': 0.0004989915515539306, 'samples': 238608, 'steps': 4970, 'loss/train': 0.29710453748703003} +07/25/2024 11:48:44 - INFO - __main__ - Step 4972: {'lr': 0.0004989910794754259, 'samples': 238656, 'steps': 4971, 'loss/train': 1.8258572816848755} +07/25/2024 11:48:45 - INFO - __main__ - Step 4973: {'lr': 0.0004989906072866749, 'samples': 238704, 'steps': 4972, 'loss/train': 1.2950116395950317} +07/25/2024 11:48:45 - INFO - __main__ - Step 4974: {'lr': 0.0004989901349876778, 'samples': 238752, 'steps': 4973, 'loss/train': 2.3661625385284424} +07/25/2024 11:48:45 - INFO - __main__ - Step 4975: {'lr': 0.0004989896625784349, 'samples': 238800, 'steps': 4974, 'loss/train': 2.3183279037475586} +07/25/2024 11:48:45 - INFO - __main__ - Step 4976: {'lr': 0.0004989891900589462, 'samples': 238848, 'steps': 4975, 'loss/train': 2.700300693511963} +07/25/2024 11:48:46 - INFO - __main__ - Step 4977: {'lr': 0.0004989887174292122, 'samples': 238896, 'steps': 4976, 'loss/train': 1.8624839782714844} +07/25/2024 11:48:46 - INFO - __main__ - Step 4978: {'lr': 0.0004989882446892329, 'samples': 238944, 'steps': 4977, 'loss/train': 2.2088093757629395} +07/25/2024 11:48:46 - INFO - __main__ - Step 4979: {'lr': 0.0004989877718390086, 'samples': 238992, 'steps': 4978, 'loss/train': 2.5244925022125244} +07/25/2024 11:48:47 - INFO - __main__ - Step 4980: {'lr': 0.0004989872988785395, 'samples': 239040, 'steps': 4979, 'loss/train': 2.1328399181365967} +07/25/2024 11:48:47 - INFO - __main__ - Step 4981: {'lr': 0.0004989868258078259, 'samples': 239088, 'steps': 4980, 'loss/train': 2.024489641189575} +07/25/2024 11:48:47 - INFO - __main__ - Step 4982: {'lr': 0.0004989863526268678, 'samples': 239136, 'steps': 4981, 'loss/train': 2.4386839866638184} +07/25/2024 11:48:47 - INFO - __main__ - Step 4983: {'lr': 0.0004989858793356654, 'samples': 239184, 'steps': 4982, 'loss/train': 1.9628868103027344} +07/25/2024 11:48:48 - INFO - __main__ - Step 4984: {'lr': 0.0004989854059342193, 'samples': 239232, 'steps': 4983, 'loss/train': 1.986007809638977} +07/25/2024 11:48:48 - INFO - __main__ - Step 4985: {'lr': 0.0004989849324225292, 'samples': 239280, 'steps': 4984, 'loss/train': 2.816462278366089} +07/25/2024 11:48:48 - INFO - __main__ - Step 4986: {'lr': 0.0004989844588005958, 'samples': 239328, 'steps': 4985, 'loss/train': 2.4287381172180176} +07/25/2024 11:48:49 - INFO - __main__ - Step 4987: {'lr': 0.0004989839850684188, 'samples': 239376, 'steps': 4986, 'loss/train': 2.31201171875} +07/25/2024 11:48:49 - INFO - __main__ - Step 4988: {'lr': 0.0004989835112259988, 'samples': 239424, 'steps': 4987, 'loss/train': 2.0159249305725098} +07/25/2024 11:48:49 - INFO - __main__ - Step 4989: {'lr': 0.0004989830372733358, 'samples': 239472, 'steps': 4988, 'loss/train': 2.246677875518799} +07/25/2024 11:48:49 - INFO - __main__ - Step 4990: {'lr': 0.0004989825632104301, 'samples': 239520, 'steps': 4989, 'loss/train': 1.908808946609497} +07/25/2024 11:48:50 - INFO - __main__ - Step 4991: {'lr': 0.000498982089037282, 'samples': 239568, 'steps': 4990, 'loss/train': 2.1664810180664062} +07/25/2024 11:48:50 - INFO - __main__ - Step 4992: {'lr': 0.0004989816147538916, 'samples': 239616, 'steps': 4991, 'loss/train': 1.1974366903305054} +07/25/2024 11:48:50 - INFO - __main__ - Step 4993: {'lr': 0.000498981140360259, 'samples': 239664, 'steps': 4992, 'loss/train': 1.7014418840408325} +07/25/2024 11:48:51 - INFO - __main__ - Step 4994: {'lr': 0.0004989806658563847, 'samples': 239712, 'steps': 4993, 'loss/train': 2.0412111282348633} +07/25/2024 11:48:51 - INFO - __main__ - Step 4995: {'lr': 0.0004989801912422685, 'samples': 239760, 'steps': 4994, 'loss/train': 0.2689169645309448} +07/25/2024 11:48:51 - INFO - __main__ - Step 4996: {'lr': 0.0004989797165179111, 'samples': 239808, 'steps': 4995, 'loss/train': 2.6634654998779297} +07/25/2024 11:48:51 - INFO - __main__ - Step 4997: {'lr': 0.0004989792416833122, 'samples': 239856, 'steps': 4996, 'loss/train': 1.6820489168167114} +07/25/2024 11:48:52 - INFO - __main__ - Step 4998: {'lr': 0.0004989787667384725, 'samples': 239904, 'steps': 4997, 'loss/train': 2.4913418292999268} +07/25/2024 11:48:52 - INFO - __main__ - Step 4999: {'lr': 0.0004989782916833918, 'samples': 239952, 'steps': 4998, 'loss/train': 2.361237049102783} +07/25/2024 11:48:52 - INFO - __main__ - Step 5000: {'lr': 0.0004989778165180707, 'samples': 240000, 'steps': 4999, 'loss/train': 2.5403690338134766} +07/25/2024 11:48:53 - INFO - __main__ - Step 5001: {'lr': 0.000498977341242509, 'samples': 240048, 'steps': 5000, 'loss/train': 2.840543746948242} +07/25/2024 11:48:53 - INFO - __main__ - Step 5002: {'lr': 0.0004989768658567072, 'samples': 240096, 'steps': 5001, 'loss/train': 2.7575573921203613} +07/25/2024 11:48:53 - INFO - __main__ - Step 5003: {'lr': 0.0004989763903606654, 'samples': 240144, 'steps': 5002, 'loss/train': 1.8742660284042358} +07/25/2024 11:48:53 - INFO - __main__ - Step 5004: {'lr': 0.0004989759147543838, 'samples': 240192, 'steps': 5003, 'loss/train': 0.9823793768882751} +07/25/2024 11:48:54 - INFO - __main__ - Step 5005: {'lr': 0.0004989754390378627, 'samples': 240240, 'steps': 5004, 'loss/train': 2.4278242588043213} +07/25/2024 11:48:54 - INFO - __main__ - Step 5006: {'lr': 0.0004989749632111021, 'samples': 240288, 'steps': 5005, 'loss/train': 2.428830146789551} +07/25/2024 11:48:54 - INFO - __main__ - Step 5007: {'lr': 0.0004989744872741026, 'samples': 240336, 'steps': 5006, 'loss/train': 2.370645761489868} +07/25/2024 11:48:54 - INFO - __main__ - Step 5008: {'lr': 0.0004989740112268641, 'samples': 240384, 'steps': 5007, 'loss/train': 1.9268656969070435} +07/25/2024 11:48:55 - INFO - __main__ - Step 5009: {'lr': 0.0004989735350693868, 'samples': 240432, 'steps': 5008, 'loss/train': 2.355038642883301} +07/25/2024 11:48:55 - INFO - __main__ - Step 5010: {'lr': 0.000498973058801671, 'samples': 240480, 'steps': 5009, 'loss/train': 2.7661242485046387} +07/25/2024 11:48:55 - INFO - __main__ - Step 5011: {'lr': 0.000498972582423717, 'samples': 240528, 'steps': 5010, 'loss/train': 2.44722580909729} +07/25/2024 11:48:56 - INFO - __main__ - Step 5012: {'lr': 0.0004989721059355248, 'samples': 240576, 'steps': 5011, 'loss/train': 2.302764892578125} +07/25/2024 11:48:56 - INFO - __main__ - Step 5013: {'lr': 0.0004989716293370949, 'samples': 240624, 'steps': 5012, 'loss/train': 2.0867245197296143} +07/25/2024 11:48:56 - INFO - __main__ - Step 5014: {'lr': 0.0004989711526284271, 'samples': 240672, 'steps': 5013, 'loss/train': 1.3965389728546143} +07/25/2024 11:48:56 - INFO - __main__ - Step 5015: {'lr': 0.000498970675809522, 'samples': 240720, 'steps': 5014, 'loss/train': 2.4958505630493164} +07/25/2024 11:48:57 - INFO - __main__ - Step 5016: {'lr': 0.0004989701988803797, 'samples': 240768, 'steps': 5015, 'loss/train': 2.291652202606201} +07/25/2024 11:48:57 - INFO - __main__ - Step 5017: {'lr': 0.0004989697218410002, 'samples': 240816, 'steps': 5016, 'loss/train': 2.169736623764038} +07/25/2024 11:48:57 - INFO - __main__ - Step 5018: {'lr': 0.0004989692446913841, 'samples': 240864, 'steps': 5017, 'loss/train': 2.1018226146698} +07/25/2024 11:48:58 - INFO - __main__ - Step 5019: {'lr': 0.0004989687674315312, 'samples': 240912, 'steps': 5018, 'loss/train': 1.2173761129379272} +07/25/2024 11:48:58 - INFO - __main__ - Step 5020: {'lr': 0.000498968290061442, 'samples': 240960, 'steps': 5019, 'loss/train': 2.3103034496307373} +07/25/2024 11:48:58 - INFO - __main__ - Step 5021: {'lr': 0.0004989678125811167, 'samples': 241008, 'steps': 5020, 'loss/train': 1.916985034942627} +07/25/2024 11:48:58 - INFO - __main__ - Step 5022: {'lr': 0.0004989673349905553, 'samples': 241056, 'steps': 5021, 'loss/train': 2.1031088829040527} +07/25/2024 11:48:59 - INFO - __main__ - Step 5023: {'lr': 0.0004989668572897581, 'samples': 241104, 'steps': 5022, 'loss/train': 2.0643622875213623} +07/25/2024 11:48:59 - INFO - __main__ - Step 5024: {'lr': 0.0004989663794787255, 'samples': 241152, 'steps': 5023, 'loss/train': 2.0347561836242676} +07/25/2024 11:48:59 - INFO - __main__ - Step 5025: {'lr': 0.0004989659015574575, 'samples': 241200, 'steps': 5024, 'loss/train': 2.1667110919952393} +07/25/2024 11:49:00 - INFO - __main__ - Step 5026: {'lr': 0.0004989654235259543, 'samples': 241248, 'steps': 5025, 'loss/train': 2.504117965698242} +07/25/2024 11:49:00 - INFO - __main__ - Step 5027: {'lr': 0.0004989649453842163, 'samples': 241296, 'steps': 5026, 'loss/train': 2.7798547744750977} +07/25/2024 11:49:00 - INFO - __main__ - Step 5028: {'lr': 0.0004989644671322435, 'samples': 241344, 'steps': 5027, 'loss/train': 0.2708657383918762} +07/25/2024 11:49:00 - INFO - __main__ - Step 5029: {'lr': 0.0004989639887700363, 'samples': 241392, 'steps': 5028, 'loss/train': 2.034572124481201} +07/25/2024 11:49:01 - INFO - __main__ - Step 5030: {'lr': 0.0004989635102975948, 'samples': 241440, 'steps': 5029, 'loss/train': 2.4135847091674805} +07/25/2024 11:49:01 - INFO - __main__ - Step 5031: {'lr': 0.0004989630317149192, 'samples': 241488, 'steps': 5030, 'loss/train': 2.598674774169922} +07/25/2024 11:49:01 - INFO - __main__ - Step 5032: {'lr': 0.0004989625530220097, 'samples': 241536, 'steps': 5031, 'loss/train': 1.669040322303772} +07/25/2024 11:49:02 - INFO - __main__ - Step 5033: {'lr': 0.0004989620742188667, 'samples': 241584, 'steps': 5032, 'loss/train': 2.205273389816284} +07/25/2024 11:49:02 - INFO - __main__ - Step 5034: {'lr': 0.0004989615953054901, 'samples': 241632, 'steps': 5033, 'loss/train': 2.5328116416931152} +07/25/2024 11:49:02 - INFO - __main__ - Step 5035: {'lr': 0.0004989611162818804, 'samples': 241680, 'steps': 5034, 'loss/train': 2.675851583480835} +07/25/2024 11:49:02 - INFO - __main__ - Step 5036: {'lr': 0.0004989606371480377, 'samples': 241728, 'steps': 5035, 'loss/train': 2.189134359359741} +07/25/2024 11:49:03 - INFO - __main__ - Step 5037: {'lr': 0.0004989601579039622, 'samples': 241776, 'steps': 5036, 'loss/train': 2.4665966033935547} +07/25/2024 11:49:03 - INFO - __main__ - Step 5038: {'lr': 0.0004989596785496541, 'samples': 241824, 'steps': 5037, 'loss/train': 1.758175253868103} +07/25/2024 11:49:03 - INFO - __main__ - Step 5039: {'lr': 0.0004989591990851135, 'samples': 241872, 'steps': 5038, 'loss/train': 2.5426204204559326} +07/25/2024 11:49:04 - INFO - __main__ - Step 5040: {'lr': 0.0004989587195103409, 'samples': 241920, 'steps': 5039, 'loss/train': 1.907364845275879} +07/25/2024 11:49:04 - INFO - __main__ - Step 5041: {'lr': 0.0004989582398253363, 'samples': 241968, 'steps': 5040, 'loss/train': 2.2652392387390137} +07/25/2024 11:49:04 - INFO - __main__ - Step 5042: {'lr': 0.0004989577600301, 'samples': 242016, 'steps': 5041, 'loss/train': 2.349416494369507} +07/25/2024 11:49:04 - INFO - __main__ - Step 5043: {'lr': 0.0004989572801246321, 'samples': 242064, 'steps': 5042, 'loss/train': 2.414151430130005} +07/25/2024 11:49:05 - INFO - __main__ - Step 5044: {'lr': 0.000498956800108933, 'samples': 242112, 'steps': 5043, 'loss/train': 2.5960676670074463} +07/25/2024 11:49:05 - INFO - __main__ - Step 5045: {'lr': 0.0004989563199830028, 'samples': 242160, 'steps': 5044, 'loss/train': 1.910558819770813} +07/25/2024 11:49:05 - INFO - __main__ - Step 5046: {'lr': 0.0004989558397468417, 'samples': 242208, 'steps': 5045, 'loss/train': 1.8968133926391602} +07/25/2024 11:49:06 - INFO - __main__ - Step 5047: {'lr': 0.0004989553594004499, 'samples': 242256, 'steps': 5046, 'loss/train': 2.8205223083496094} +07/25/2024 11:49:06 - INFO - __main__ - Step 5048: {'lr': 0.0004989548789438277, 'samples': 242304, 'steps': 5047, 'loss/train': 2.2846245765686035} +07/25/2024 11:49:06 - INFO - __main__ - Step 5049: {'lr': 0.0004989543983769751, 'samples': 242352, 'steps': 5048, 'loss/train': 2.27638840675354} +07/25/2024 11:49:06 - INFO - __main__ - Step 5050: {'lr': 0.0004989539176998926, 'samples': 242400, 'steps': 5049, 'loss/train': 2.1291146278381348} +07/25/2024 11:49:07 - INFO - __main__ - Step 5051: {'lr': 0.0004989534369125802, 'samples': 242448, 'steps': 5050, 'loss/train': 2.3016905784606934} +07/25/2024 11:49:07 - INFO - __main__ - Step 5052: {'lr': 0.0004989529560150383, 'samples': 242496, 'steps': 5051, 'loss/train': 0.34193333983421326} +07/25/2024 11:49:07 - INFO - __main__ - Step 5053: {'lr': 0.0004989524750072669, 'samples': 242544, 'steps': 5052, 'loss/train': 1.9067734479904175} +07/25/2024 11:49:08 - INFO - __main__ - Step 5054: {'lr': 0.0004989519938892663, 'samples': 242592, 'steps': 5053, 'loss/train': 2.265615224838257} +07/25/2024 11:49:08 - INFO - __main__ - Step 5055: {'lr': 0.0004989515126610369, 'samples': 242640, 'steps': 5054, 'loss/train': 2.144408702850342} +07/25/2024 11:49:08 - INFO - __main__ - Step 5056: {'lr': 0.0004989510313225787, 'samples': 242688, 'steps': 5055, 'loss/train': 1.6376217603683472} +07/25/2024 11:49:08 - INFO - __main__ - Step 5057: {'lr': 0.0004989505498738919, 'samples': 242736, 'steps': 5056, 'loss/train': 2.4709572792053223} +07/25/2024 11:49:09 - INFO - __main__ - Step 5058: {'lr': 0.0004989500683149768, 'samples': 242784, 'steps': 5057, 'loss/train': 2.043492555618286} +07/25/2024 11:49:09 - INFO - __main__ - Step 5059: {'lr': 0.0004989495866458335, 'samples': 242832, 'steps': 5058, 'loss/train': 2.3227407932281494} +07/25/2024 11:49:09 - INFO - __main__ - Step 5060: {'lr': 0.0004989491048664624, 'samples': 242880, 'steps': 5059, 'loss/train': 2.1980693340301514} +07/25/2024 11:49:10 - INFO - __main__ - Step 5061: {'lr': 0.0004989486229768636, 'samples': 242928, 'steps': 5060, 'loss/train': 2.3788905143737793} +07/25/2024 11:49:10 - INFO - __main__ - Step 5062: {'lr': 0.0004989481409770373, 'samples': 242976, 'steps': 5061, 'loss/train': 2.0039639472961426} +07/25/2024 11:49:10 - INFO - __main__ - Step 5063: {'lr': 0.0004989476588669837, 'samples': 243024, 'steps': 5062, 'loss/train': 2.3959217071533203} +07/25/2024 11:49:10 - INFO - __main__ - Step 5064: {'lr': 0.000498947176646703, 'samples': 243072, 'steps': 5063, 'loss/train': 2.429384469985962} +07/25/2024 11:49:11 - INFO - __main__ - Step 5065: {'lr': 0.0004989466943161957, 'samples': 243120, 'steps': 5064, 'loss/train': 1.9427108764648438} +07/25/2024 11:49:11 - INFO - __main__ - Step 5066: {'lr': 0.0004989462118754616, 'samples': 243168, 'steps': 5065, 'loss/train': 2.641836166381836} +07/25/2024 11:49:11 - INFO - __main__ - Step 5067: {'lr': 0.0004989457293245011, 'samples': 243216, 'steps': 5066, 'loss/train': 2.4138541221618652} +07/25/2024 11:49:12 - INFO - __main__ - Step 5068: {'lr': 0.0004989452466633144, 'samples': 243264, 'steps': 5067, 'loss/train': 1.8364237546920776} +07/25/2024 11:49:12 - INFO - __main__ - Step 5069: {'lr': 0.0004989447638919017, 'samples': 243312, 'steps': 5068, 'loss/train': 2.4296298027038574} +07/25/2024 11:49:12 - INFO - __main__ - Step 5070: {'lr': 0.0004989442810102634, 'samples': 243360, 'steps': 5069, 'loss/train': 2.199169158935547} +07/25/2024 11:49:12 - INFO - __main__ - Step 5071: {'lr': 0.0004989437980183994, 'samples': 243408, 'steps': 5070, 'loss/train': 2.044943332672119} +07/25/2024 11:49:13 - INFO - __main__ - Step 5072: {'lr': 0.0004989433149163101, 'samples': 243456, 'steps': 5071, 'loss/train': 2.3585152626037598} +07/25/2024 11:49:13 - INFO - __main__ - Step 5073: {'lr': 0.0004989428317039957, 'samples': 243504, 'steps': 5072, 'loss/train': 2.4576306343078613} +07/25/2024 11:49:13 - INFO - __main__ - Step 5074: {'lr': 0.0004989423483814563, 'samples': 243552, 'steps': 5073, 'loss/train': 1.6016595363616943} +07/25/2024 11:49:14 - INFO - __main__ - Step 5075: {'lr': 0.0004989418649486923, 'samples': 243600, 'steps': 5074, 'loss/train': 2.343148946762085} +07/25/2024 11:49:14 - INFO - __main__ - Step 5076: {'lr': 0.0004989413814057038, 'samples': 243648, 'steps': 5075, 'loss/train': 0.3069329559803009} +07/25/2024 11:49:14 - INFO - __main__ - Step 5077: {'lr': 0.0004989408977524911, 'samples': 243696, 'steps': 5076, 'loss/train': 1.8257054090499878} +07/25/2024 11:49:14 - INFO - __main__ - Step 5078: {'lr': 0.0004989404139890542, 'samples': 243744, 'steps': 5077, 'loss/train': 1.3740496635437012} +07/25/2024 11:49:15 - INFO - __main__ - Step 5079: {'lr': 0.0004989399301153936, 'samples': 243792, 'steps': 5078, 'loss/train': 2.0266616344451904} +07/25/2024 11:49:15 - INFO - __main__ - Step 5080: {'lr': 0.0004989394461315092, 'samples': 243840, 'steps': 5079, 'loss/train': 1.153383493423462} +07/25/2024 11:49:15 - INFO - __main__ - Step 5081: {'lr': 0.0004989389620374016, 'samples': 243888, 'steps': 5080, 'loss/train': 2.6235318183898926} +07/25/2024 11:49:15 - INFO - __main__ - Step 5082: {'lr': 0.0004989384778330705, 'samples': 243936, 'steps': 5081, 'loss/train': 2.404179096221924} +07/25/2024 11:49:16 - INFO - __main__ - Step 5083: {'lr': 0.0004989379935185167, 'samples': 243984, 'steps': 5082, 'loss/train': 2.1318604946136475} +07/25/2024 11:49:16 - INFO - __main__ - Step 5084: {'lr': 0.00049893750909374, 'samples': 244032, 'steps': 5083, 'loss/train': 2.6748578548431396} +07/25/2024 11:49:16 - INFO - __main__ - Step 5085: {'lr': 0.0004989370245587408, 'samples': 244080, 'steps': 5084, 'loss/train': 2.054917335510254} +07/25/2024 11:49:17 - INFO - __main__ - Step 5086: {'lr': 0.0004989365399135192, 'samples': 244128, 'steps': 5085, 'loss/train': 2.535310745239258} +07/25/2024 11:49:17 - INFO - __main__ - Step 5087: {'lr': 0.0004989360551580757, 'samples': 244176, 'steps': 5086, 'loss/train': 2.353254795074463} +07/25/2024 11:49:17 - INFO - __main__ - Step 5088: {'lr': 0.00049893557029241, 'samples': 244224, 'steps': 5087, 'loss/train': 2.354034900665283} +07/25/2024 11:49:17 - INFO - __main__ - Step 5089: {'lr': 0.0004989350853165227, 'samples': 244272, 'steps': 5088, 'loss/train': 2.0271739959716797} +07/25/2024 11:49:18 - INFO - __main__ - Step 5090: {'lr': 0.0004989346002304139, 'samples': 244320, 'steps': 5089, 'loss/train': 2.0477986335754395} +07/25/2024 11:49:18 - INFO - __main__ - Step 5091: {'lr': 0.000498934115034084, 'samples': 244368, 'steps': 5090, 'loss/train': 2.665469169616699} +07/25/2024 11:49:18 - INFO - __main__ - Step 5092: {'lr': 0.0004989336297275328, 'samples': 244416, 'steps': 5091, 'loss/train': 1.7386640310287476} +07/25/2024 11:49:19 - INFO - __main__ - Step 5093: {'lr': 0.0004989331443107608, 'samples': 244464, 'steps': 5092, 'loss/train': 2.3546085357666016} +07/25/2024 11:49:19 - INFO - __main__ - Step 5094: {'lr': 0.0004989326587837683, 'samples': 244512, 'steps': 5093, 'loss/train': 2.624849319458008} +07/25/2024 11:49:19 - INFO - __main__ - Step 5095: {'lr': 0.0004989321731465554, 'samples': 244560, 'steps': 5094, 'loss/train': 2.039267063140869} +07/25/2024 11:49:19 - INFO - __main__ - Step 5096: {'lr': 0.0004989316873991223, 'samples': 244608, 'steps': 5095, 'loss/train': 2.301259756088257} +07/25/2024 11:49:20 - INFO - __main__ - Step 5097: {'lr': 0.000498931201541469, 'samples': 244656, 'steps': 5096, 'loss/train': 2.106670379638672} +07/25/2024 11:49:20 - INFO - __main__ - Step 5098: {'lr': 0.0004989307155735962, 'samples': 244704, 'steps': 5097, 'loss/train': 2.017437696456909} +07/25/2024 11:49:20 - INFO - __main__ - Step 5099: {'lr': 0.0004989302294955038, 'samples': 244752, 'steps': 5098, 'loss/train': 2.2713260650634766} +07/25/2024 11:49:21 - INFO - __main__ - Step 5100: {'lr': 0.0004989297433071921, 'samples': 244800, 'steps': 5099, 'loss/train': 0.26187366247177124} +07/25/2024 11:49:21 - INFO - __main__ - Step 5101: {'lr': 0.0004989292570086612, 'samples': 244848, 'steps': 5100, 'loss/train': 1.8836474418640137} +07/25/2024 11:49:21 - INFO - __main__ - Step 5102: {'lr': 0.0004989287705999114, 'samples': 244896, 'steps': 5101, 'loss/train': 1.8627448081970215} +07/25/2024 11:49:21 - INFO - __main__ - Step 5103: {'lr': 0.000498928284080943, 'samples': 244944, 'steps': 5102, 'loss/train': 2.0959408283233643} +07/25/2024 11:49:22 - INFO - __main__ - Step 5104: {'lr': 0.0004989277974517561, 'samples': 244992, 'steps': 5103, 'loss/train': 1.7193840742111206} +07/25/2024 11:49:22 - INFO - __main__ - Step 5105: {'lr': 0.0004989273107123508, 'samples': 245040, 'steps': 5104, 'loss/train': 2.237032651901245} +07/25/2024 11:49:22 - INFO - __main__ - Step 5106: {'lr': 0.0004989268238627277, 'samples': 245088, 'steps': 5105, 'loss/train': 2.382434368133545} +07/25/2024 11:49:23 - INFO - __main__ - Step 5107: {'lr': 0.0004989263369028867, 'samples': 245136, 'steps': 5106, 'loss/train': 2.0725760459899902} +07/25/2024 11:49:23 - INFO - __main__ - Step 5108: {'lr': 0.0004989258498328279, 'samples': 245184, 'steps': 5107, 'loss/train': 2.0780482292175293} +07/25/2024 11:49:23 - INFO - __main__ - Step 5109: {'lr': 0.000498925362652552, 'samples': 245232, 'steps': 5108, 'loss/train': 2.04944109916687} +07/25/2024 11:49:23 - INFO - __main__ - Step 5110: {'lr': 0.0004989248753620588, 'samples': 245280, 'steps': 5109, 'loss/train': 2.292900800704956} +07/25/2024 11:49:24 - INFO - __main__ - Step 5111: {'lr': 0.0004989243879613486, 'samples': 245328, 'steps': 5110, 'loss/train': 2.7408838272094727} +07/25/2024 11:49:24 - INFO - __main__ - Step 5112: {'lr': 0.0004989239004504217, 'samples': 245376, 'steps': 5111, 'loss/train': 1.8050743341445923} +07/25/2024 11:49:24 - INFO - __main__ - Step 5113: {'lr': 0.0004989234128292782, 'samples': 245424, 'steps': 5112, 'loss/train': 2.336508274078369} +07/25/2024 11:49:25 - INFO - __main__ - Step 5114: {'lr': 0.0004989229250979185, 'samples': 245472, 'steps': 5113, 'loss/train': 2.7533535957336426} +07/25/2024 11:49:25 - INFO - __main__ - Step 5115: {'lr': 0.0004989224372563425, 'samples': 245520, 'steps': 5114, 'loss/train': 2.2391161918640137} +07/25/2024 11:49:25 - INFO - __main__ - Step 5116: {'lr': 0.0004989219493045508, 'samples': 245568, 'steps': 5115, 'loss/train': 1.9729444980621338} +07/25/2024 11:49:25 - INFO - __main__ - Step 5117: {'lr': 0.0004989214612425434, 'samples': 245616, 'steps': 5116, 'loss/train': 2.3423428535461426} +07/25/2024 11:49:26 - INFO - __main__ - Step 5118: {'lr': 0.0004989209730703206, 'samples': 245664, 'steps': 5117, 'loss/train': 1.9697405099868774} +07/25/2024 11:49:26 - INFO - __main__ - Step 5119: {'lr': 0.0004989204847878825, 'samples': 245712, 'steps': 5118, 'loss/train': 2.623321056365967} +07/25/2024 11:49:26 - INFO - __main__ - Step 5120: {'lr': 0.0004989199963952295, 'samples': 245760, 'steps': 5119, 'loss/train': 2.437471628189087} +07/25/2024 11:49:27 - INFO - __main__ - Step 5121: {'lr': 0.0004989195078923615, 'samples': 245808, 'steps': 5120, 'loss/train': 1.1078234910964966} +07/25/2024 11:49:27 - INFO - __main__ - Step 5122: {'lr': 0.000498919019279279, 'samples': 245856, 'steps': 5121, 'loss/train': 2.097297191619873} +07/25/2024 11:49:27 - INFO - __main__ - Step 5123: {'lr': 0.0004989185305559822, 'samples': 245904, 'steps': 5122, 'loss/train': 1.7287943363189697} +07/25/2024 11:49:27 - INFO - __main__ - Step 5124: {'lr': 0.0004989180417224712, 'samples': 245952, 'steps': 5123, 'loss/train': 1.7633498907089233} +07/25/2024 11:49:28 - INFO - __main__ - Step 5125: {'lr': 0.0004989175527787463, 'samples': 246000, 'steps': 5124, 'loss/train': 2.605008840560913} +07/25/2024 11:49:28 - INFO - __main__ - Step 5126: {'lr': 0.0004989170637248076, 'samples': 246048, 'steps': 5125, 'loss/train': 2.376581907272339} +07/25/2024 11:49:28 - INFO - __main__ - Step 5127: {'lr': 0.0004989165745606554, 'samples': 246096, 'steps': 5126, 'loss/train': 2.3696858882904053} +07/25/2024 11:49:29 - INFO - __main__ - Step 5128: {'lr': 0.00049891608528629, 'samples': 246144, 'steps': 5127, 'loss/train': 1.9155782461166382} +07/25/2024 11:49:29 - INFO - __main__ - Step 5129: {'lr': 0.0004989155959017115, 'samples': 246192, 'steps': 5128, 'loss/train': 1.930078148841858} +07/25/2024 11:49:29 - INFO - __main__ - Step 5130: {'lr': 0.0004989151064069202, 'samples': 246240, 'steps': 5129, 'loss/train': 1.6650573015213013} +07/25/2024 11:49:29 - INFO - __main__ - Step 5131: {'lr': 0.0004989146168019162, 'samples': 246288, 'steps': 5130, 'loss/train': 1.7944221496582031} +07/25/2024 11:49:30 - INFO - __main__ - Step 5132: {'lr': 0.0004989141270866998, 'samples': 246336, 'steps': 5131, 'loss/train': 2.50073504447937} +07/25/2024 11:49:30 - INFO - __main__ - Step 5133: {'lr': 0.0004989136372612711, 'samples': 246384, 'steps': 5132, 'loss/train': 2.4562132358551025} +07/25/2024 11:49:30 - INFO - __main__ - Step 5134: {'lr': 0.0004989131473256306, 'samples': 246432, 'steps': 5133, 'loss/train': 2.6324596405029297} +07/25/2024 11:49:31 - INFO - __main__ - Step 5135: {'lr': 0.0004989126572797781, 'samples': 246480, 'steps': 5134, 'loss/train': 2.118579149246216} +07/25/2024 11:49:31 - INFO - __main__ - Step 5136: {'lr': 0.0004989121671237143, 'samples': 246528, 'steps': 5135, 'loss/train': 2.3580384254455566} +07/25/2024 11:49:31 - INFO - __main__ - Step 5137: {'lr': 0.0004989116768574391, 'samples': 246576, 'steps': 5136, 'loss/train': 2.3293023109436035} +07/25/2024 11:49:31 - INFO - __main__ - Step 5138: {'lr': 0.0004989111864809526, 'samples': 246624, 'steps': 5137, 'loss/train': 1.9546360969543457} +07/25/2024 11:49:32 - INFO - __main__ - Step 5139: {'lr': 0.0004989106959942555, 'samples': 246672, 'steps': 5138, 'loss/train': 2.4875857830047607} +07/25/2024 11:49:32 - INFO - __main__ - Step 5140: {'lr': 0.0004989102053973475, 'samples': 246720, 'steps': 5139, 'loss/train': 2.0127482414245605} +07/25/2024 11:49:32 - INFO - __main__ - Step 5141: {'lr': 0.0004989097146902292, 'samples': 246768, 'steps': 5140, 'loss/train': 2.4675846099853516} +07/25/2024 11:49:33 - INFO - __main__ - Step 5142: {'lr': 0.0004989092238729004, 'samples': 246816, 'steps': 5141, 'loss/train': 2.2882745265960693} +07/25/2024 11:49:33 - INFO - __main__ - Step 5143: {'lr': 0.0004989087329453618, 'samples': 246864, 'steps': 5142, 'loss/train': 2.394383668899536} +07/25/2024 11:49:33 - INFO - __main__ - Step 5144: {'lr': 0.0004989082419076134, 'samples': 246912, 'steps': 5143, 'loss/train': 2.4591526985168457} +07/25/2024 11:49:33 - INFO - __main__ - Step 5145: {'lr': 0.0004989077507596552, 'samples': 246960, 'steps': 5144, 'loss/train': 2.5074102878570557} +07/25/2024 11:49:34 - INFO - __main__ - Step 5146: {'lr': 0.0004989072595014877, 'samples': 247008, 'steps': 5145, 'loss/train': 1.9927610158920288} +07/25/2024 11:49:34 - INFO - __main__ - Step 5147: {'lr': 0.0004989067681331111, 'samples': 247056, 'steps': 5146, 'loss/train': 1.973064661026001} +07/25/2024 11:49:34 - INFO - __main__ - Step 5148: {'lr': 0.0004989062766545255, 'samples': 247104, 'steps': 5147, 'loss/train': 2.0619888305664062} +07/25/2024 11:49:34 - INFO - __main__ - Step 5149: {'lr': 0.0004989057850657313, 'samples': 247152, 'steps': 5148, 'loss/train': 2.1563034057617188} +07/25/2024 11:49:35 - INFO - __main__ - Step 5150: {'lr': 0.0004989052933667284, 'samples': 247200, 'steps': 5149, 'loss/train': 1.9134719371795654} +07/25/2024 11:49:35 - INFO - __main__ - Step 5151: {'lr': 0.0004989048015575173, 'samples': 247248, 'steps': 5150, 'loss/train': 1.8935760259628296} +07/25/2024 11:49:35 - INFO - __main__ - Step 5152: {'lr': 0.0004989043096380981, 'samples': 247296, 'steps': 5151, 'loss/train': 2.5401875972747803} +07/25/2024 11:49:36 - INFO - __main__ - Step 5153: {'lr': 0.0004989038176084711, 'samples': 247344, 'steps': 5152, 'loss/train': 1.8011600971221924} +07/25/2024 11:49:36 - INFO - __main__ - Step 5154: {'lr': 0.0004989033254686365, 'samples': 247392, 'steps': 5153, 'loss/train': 2.1335816383361816} +07/25/2024 11:49:36 - INFO - __main__ - Step 5155: {'lr': 0.0004989028332185945, 'samples': 247440, 'steps': 5154, 'loss/train': 2.1070175170898438} +07/25/2024 11:49:36 - INFO - __main__ - Step 5156: {'lr': 0.0004989023408583452, 'samples': 247488, 'steps': 5155, 'loss/train': 1.8512372970581055} +07/25/2024 11:49:37 - INFO - __main__ - Step 5157: {'lr': 0.0004989018483878889, 'samples': 247536, 'steps': 5156, 'loss/train': 2.5679848194122314} +07/25/2024 11:49:37 - INFO - __main__ - Step 5158: {'lr': 0.0004989013558072259, 'samples': 247584, 'steps': 5157, 'loss/train': 2.535346746444702} +07/25/2024 11:49:37 - INFO - __main__ - Step 5159: {'lr': 0.0004989008631163563, 'samples': 247632, 'steps': 5158, 'loss/train': 2.7086679935455322} +07/25/2024 11:49:38 - INFO - __main__ - Step 5160: {'lr': 0.0004989003703152804, 'samples': 247680, 'steps': 5159, 'loss/train': 2.4284446239471436} +07/25/2024 11:49:38 - INFO - __main__ - Step 5161: {'lr': 0.0004988998774039984, 'samples': 247728, 'steps': 5160, 'loss/train': 2.2114667892456055} +07/25/2024 11:49:38 - INFO - __main__ - Step 5162: {'lr': 0.0004988993843825104, 'samples': 247776, 'steps': 5161, 'loss/train': 1.980140209197998} +07/25/2024 11:49:38 - INFO - __main__ - Step 5163: {'lr': 0.000498898891250817, 'samples': 247824, 'steps': 5162, 'loss/train': 2.971572160720825} +07/25/2024 11:49:39 - INFO - __main__ - Step 5164: {'lr': 0.0004988983980089179, 'samples': 247872, 'steps': 5163, 'loss/train': 2.2964963912963867} +07/25/2024 11:49:39 - INFO - __main__ - Step 5165: {'lr': 0.0004988979046568136, 'samples': 247920, 'steps': 5164, 'loss/train': 2.1774208545684814} +07/25/2024 11:49:39 - INFO - __main__ - Step 5166: {'lr': 0.0004988974111945043, 'samples': 247968, 'steps': 5165, 'loss/train': 2.3385722637176514} +07/25/2024 11:49:40 - INFO - __main__ - Step 5167: {'lr': 0.0004988969176219903, 'samples': 248016, 'steps': 5166, 'loss/train': 2.113541841506958} +07/25/2024 11:49:40 - INFO - __main__ - Step 5168: {'lr': 0.0004988964239392717, 'samples': 248064, 'steps': 5167, 'loss/train': 1.6340086460113525} +07/25/2024 11:49:40 - INFO - __main__ - Step 5169: {'lr': 0.0004988959301463486, 'samples': 248112, 'steps': 5168, 'loss/train': 2.2889862060546875} +07/25/2024 11:49:40 - INFO - __main__ - Step 5170: {'lr': 0.0004988954362432214, 'samples': 248160, 'steps': 5169, 'loss/train': 1.3773244619369507} +07/25/2024 11:49:41 - INFO - __main__ - Step 5171: {'lr': 0.0004988949422298904, 'samples': 248208, 'steps': 5170, 'loss/train': 1.9566879272460938} +07/25/2024 11:49:41 - INFO - __main__ - Step 5172: {'lr': 0.0004988944481063556, 'samples': 248256, 'steps': 5171, 'loss/train': 2.5473074913024902} +07/25/2024 11:49:41 - INFO - __main__ - Step 5173: {'lr': 0.0004988939538726172, 'samples': 248304, 'steps': 5172, 'loss/train': 2.1359033584594727} +07/25/2024 11:49:42 - INFO - __main__ - Step 5174: {'lr': 0.0004988934595286757, 'samples': 248352, 'steps': 5173, 'loss/train': 1.982467532157898} +07/25/2024 11:49:42 - INFO - __main__ - Step 5175: {'lr': 0.000498892965074531, 'samples': 248400, 'steps': 5174, 'loss/train': 2.299055814743042} +07/25/2024 11:49:42 - INFO - __main__ - Step 5176: {'lr': 0.0004988924705101837, 'samples': 248448, 'steps': 5175, 'loss/train': 2.38177490234375} +07/25/2024 11:49:42 - INFO - __main__ - Step 5177: {'lr': 0.0004988919758356337, 'samples': 248496, 'steps': 5176, 'loss/train': 1.9902746677398682} +07/25/2024 11:49:43 - INFO - __main__ - Step 5178: {'lr': 0.0004988914810508812, 'samples': 248544, 'steps': 5177, 'loss/train': 2.238668918609619} +07/25/2024 11:49:43 - INFO - __main__ - Step 5179: {'lr': 0.0004988909861559265, 'samples': 248592, 'steps': 5178, 'loss/train': 2.6699817180633545} +07/25/2024 11:49:43 - INFO - __main__ - Step 5180: {'lr': 0.00049889049115077, 'samples': 248640, 'steps': 5179, 'loss/train': 2.436572790145874} +07/25/2024 11:49:44 - INFO - __main__ - Step 5181: {'lr': 0.0004988899960354118, 'samples': 248688, 'steps': 5180, 'loss/train': 1.7710180282592773} +07/25/2024 11:49:44 - INFO - __main__ - Step 5182: {'lr': 0.0004988895008098519, 'samples': 248736, 'steps': 5181, 'loss/train': 2.1805996894836426} +07/25/2024 11:49:44 - INFO - __main__ - Step 5183: {'lr': 0.0004988890054740908, 'samples': 248784, 'steps': 5182, 'loss/train': 2.6028332710266113} +07/25/2024 11:49:44 - INFO - __main__ - Step 5184: {'lr': 0.0004988885100281285, 'samples': 248832, 'steps': 5183, 'loss/train': 2.4386584758758545} +07/25/2024 11:49:45 - INFO - __main__ - Step 5185: {'lr': 0.0004988880144719656, 'samples': 248880, 'steps': 5184, 'loss/train': 1.926903247833252} +07/25/2024 11:49:45 - INFO - __main__ - Step 5186: {'lr': 0.0004988875188056018, 'samples': 248928, 'steps': 5185, 'loss/train': 1.9766713380813599} +07/25/2024 11:49:45 - INFO - __main__ - Step 5187: {'lr': 0.0004988870230290377, 'samples': 248976, 'steps': 5186, 'loss/train': 2.2116613388061523} +07/25/2024 11:49:46 - INFO - __main__ - Step 5188: {'lr': 0.0004988865271422734, 'samples': 249024, 'steps': 5187, 'loss/train': 2.579035758972168} +07/25/2024 11:49:46 - INFO - __main__ - Step 5189: {'lr': 0.0004988860311453091, 'samples': 249072, 'steps': 5188, 'loss/train': 1.892147421836853} +07/25/2024 11:49:46 - INFO - __main__ - Step 5190: {'lr': 0.000498885535038145, 'samples': 249120, 'steps': 5189, 'loss/train': 1.1182268857955933} +07/25/2024 11:49:46 - INFO - __main__ - Step 5191: {'lr': 0.0004988850388207815, 'samples': 249168, 'steps': 5190, 'loss/train': 2.435643434524536} +07/25/2024 11:49:47 - INFO - __main__ - Step 5192: {'lr': 0.0004988845424932185, 'samples': 249216, 'steps': 5191, 'loss/train': 1.400998592376709} +07/25/2024 11:49:47 - INFO - __main__ - Step 5193: {'lr': 0.0004988840460554565, 'samples': 249264, 'steps': 5192, 'loss/train': 2.075918674468994} +07/25/2024 11:49:47 - INFO - __main__ - Step 5194: {'lr': 0.0004988835495074956, 'samples': 249312, 'steps': 5193, 'loss/train': 2.1886346340179443} +07/25/2024 11:49:48 - INFO - __main__ - Step 5195: {'lr': 0.000498883052849336, 'samples': 249360, 'steps': 5194, 'loss/train': 3.2297096252441406} +07/25/2024 11:49:48 - INFO - __main__ - Step 5196: {'lr': 0.000498882556080978, 'samples': 249408, 'steps': 5195, 'loss/train': 0.48535823822021484} +07/25/2024 11:49:48 - INFO - __main__ - Step 5197: {'lr': 0.0004988820592024218, 'samples': 249456, 'steps': 5196, 'loss/train': 2.188481330871582} +07/25/2024 11:49:48 - INFO - __main__ - Step 5198: {'lr': 0.0004988815622136676, 'samples': 249504, 'steps': 5197, 'loss/train': 1.7064707279205322} +07/25/2024 11:49:49 - INFO - __main__ - Step 5199: {'lr': 0.0004988810651147155, 'samples': 249552, 'steps': 5198, 'loss/train': 1.8134695291519165} +07/25/2024 11:49:49 - INFO - __main__ - Step 5200: {'lr': 0.000498880567905566, 'samples': 249600, 'steps': 5199, 'loss/train': 2.4116086959838867} +07/25/2024 11:49:49 - INFO - __main__ - Step 5201: {'lr': 0.0004988800705862191, 'samples': 249648, 'steps': 5200, 'loss/train': 2.611421823501587} +07/25/2024 11:49:50 - INFO - __main__ - Step 5202: {'lr': 0.000498879573156675, 'samples': 249696, 'steps': 5201, 'loss/train': 2.1462204456329346} +07/25/2024 11:49:50 - INFO - __main__ - Step 5203: {'lr': 0.0004988790756169341, 'samples': 249744, 'steps': 5202, 'loss/train': 3.0629444122314453} +07/25/2024 11:49:50 - INFO - __main__ - Step 5204: {'lr': 0.0004988785779669964, 'samples': 249792, 'steps': 5203, 'loss/train': 1.6763336658477783} +07/25/2024 11:49:50 - INFO - __main__ - Step 5205: {'lr': 0.0004988780802068624, 'samples': 249840, 'steps': 5204, 'loss/train': 2.45881986618042} +07/25/2024 11:49:51 - INFO - __main__ - Step 5206: {'lr': 0.0004988775823365321, 'samples': 249888, 'steps': 5205, 'loss/train': 1.2589750289916992} +07/25/2024 11:49:51 - INFO - __main__ - Step 5207: {'lr': 0.0004988770843560058, 'samples': 249936, 'steps': 5206, 'loss/train': 2.2101943492889404} +07/25/2024 11:49:51 - INFO - __main__ - Step 5208: {'lr': 0.0004988765862652836, 'samples': 249984, 'steps': 5207, 'loss/train': 2.4605326652526855} +07/25/2024 11:49:52 - INFO - __main__ - Step 5209: {'lr': 0.0004988760880643659, 'samples': 250032, 'steps': 5208, 'loss/train': 2.45807147026062} +07/25/2024 11:49:52 - INFO - __main__ - Step 5210: {'lr': 0.0004988755897532528, 'samples': 250080, 'steps': 5209, 'loss/train': 0.5860806703567505} +07/25/2024 11:49:52 - INFO - __main__ - Step 5211: {'lr': 0.0004988750913319446, 'samples': 250128, 'steps': 5210, 'loss/train': 1.2844244241714478} +07/25/2024 11:49:52 - INFO - __main__ - Step 5212: {'lr': 0.0004988745928004415, 'samples': 250176, 'steps': 5211, 'loss/train': 2.2740671634674072} +07/25/2024 11:49:53 - INFO - __main__ - Step 5213: {'lr': 0.0004988740941587436, 'samples': 250224, 'steps': 5212, 'loss/train': 2.3038814067840576} +07/25/2024 11:49:53 - INFO - __main__ - Step 5214: {'lr': 0.0004988735954068513, 'samples': 250272, 'steps': 5213, 'loss/train': 2.2774085998535156} +07/25/2024 11:49:53 - INFO - __main__ - Step 5215: {'lr': 0.0004988730965447647, 'samples': 250320, 'steps': 5214, 'loss/train': 2.6173670291900635} +07/25/2024 11:49:54 - INFO - __main__ - Step 5216: {'lr': 0.0004988725975724841, 'samples': 250368, 'steps': 5215, 'loss/train': 2.3399572372436523} +07/25/2024 11:49:54 - INFO - __main__ - Step 5217: {'lr': 0.0004988720984900098, 'samples': 250416, 'steps': 5216, 'loss/train': 2.2749083042144775} +07/25/2024 11:49:54 - INFO - __main__ - Step 5218: {'lr': 0.0004988715992973417, 'samples': 250464, 'steps': 5217, 'loss/train': 2.190289258956909} +07/25/2024 11:49:54 - INFO - __main__ - Step 5219: {'lr': 0.0004988710999944804, 'samples': 250512, 'steps': 5218, 'loss/train': 1.7698544263839722} +07/25/2024 11:49:55 - INFO - __main__ - Step 5220: {'lr': 0.0004988706005814258, 'samples': 250560, 'steps': 5219, 'loss/train': 0.34608736634254456} +07/25/2024 11:49:55 - INFO - __main__ - Step 5221: {'lr': 0.0004988701010581784, 'samples': 250608, 'steps': 5220, 'loss/train': 1.005584955215454} +07/25/2024 11:49:55 - INFO - __main__ - Step 5222: {'lr': 0.0004988696014247382, 'samples': 250656, 'steps': 5221, 'loss/train': 1.6211925745010376} +07/25/2024 11:49:55 - INFO - __main__ - Step 5223: {'lr': 0.0004988691016811056, 'samples': 250704, 'steps': 5222, 'loss/train': 1.3650627136230469} +07/25/2024 11:49:56 - INFO - __main__ - Step 5224: {'lr': 0.0004988686018272807, 'samples': 250752, 'steps': 5223, 'loss/train': 2.2846603393554688} +07/25/2024 11:49:56 - INFO - __main__ - Step 5225: {'lr': 0.0004988681018632638, 'samples': 250800, 'steps': 5224, 'loss/train': 2.39262056350708} +07/25/2024 11:49:56 - INFO - __main__ - Step 5226: {'lr': 0.000498867601789055, 'samples': 250848, 'steps': 5225, 'loss/train': 2.5858688354492188} +07/25/2024 11:49:57 - INFO - __main__ - Step 5227: {'lr': 0.0004988671016046547, 'samples': 250896, 'steps': 5226, 'loss/train': 2.340394973754883} +07/25/2024 11:49:57 - INFO - __main__ - Step 5228: {'lr': 0.0004988666013100629, 'samples': 250944, 'steps': 5227, 'loss/train': 0.6587827205657959} +07/25/2024 11:49:57 - INFO - __main__ - Step 5229: {'lr': 0.0004988661009052799, 'samples': 250992, 'steps': 5228, 'loss/train': 2.362729549407959} +07/25/2024 11:49:57 - INFO - __main__ - Step 5230: {'lr': 0.0004988656003903061, 'samples': 251040, 'steps': 5229, 'loss/train': 2.072247266769409} +07/25/2024 11:49:58 - INFO - __main__ - Step 5231: {'lr': 0.0004988650997651416, 'samples': 251088, 'steps': 5230, 'loss/train': 2.2173616886138916} +07/25/2024 11:49:58 - INFO - __main__ - Step 5232: {'lr': 0.0004988645990297865, 'samples': 251136, 'steps': 5231, 'loss/train': 2.1912126541137695} +07/25/2024 11:49:58 - INFO - __main__ - Step 5233: {'lr': 0.0004988640981842411, 'samples': 251184, 'steps': 5232, 'loss/train': 2.7475855350494385} +07/25/2024 11:49:59 - INFO - __main__ - Step 5234: {'lr': 0.0004988635972285058, 'samples': 251232, 'steps': 5233, 'loss/train': 0.2517217993736267} +07/25/2024 11:49:59 - INFO - __main__ - Step 5235: {'lr': 0.0004988630961625806, 'samples': 251280, 'steps': 5234, 'loss/train': 1.8768925666809082} +07/25/2024 11:49:59 - INFO - __main__ - Step 5236: {'lr': 0.0004988625949864658, 'samples': 251328, 'steps': 5235, 'loss/train': 1.7105796337127686} +07/25/2024 11:49:59 - INFO - __main__ - Step 5237: {'lr': 0.0004988620937001615, 'samples': 251376, 'steps': 5236, 'loss/train': 2.4771180152893066} +07/25/2024 11:50:00 - INFO - __main__ - Step 5238: {'lr': 0.0004988615923036682, 'samples': 251424, 'steps': 5237, 'loss/train': 2.1790199279785156} +07/25/2024 11:50:00 - INFO - __main__ - Step 5239: {'lr': 0.0004988610907969859, 'samples': 251472, 'steps': 5238, 'loss/train': 2.127770185470581} +07/25/2024 11:50:00 - INFO - __main__ - Step 5240: {'lr': 0.0004988605891801149, 'samples': 251520, 'steps': 5239, 'loss/train': 2.2213690280914307} +07/25/2024 11:50:01 - INFO - __main__ - Step 5241: {'lr': 0.0004988600874530554, 'samples': 251568, 'steps': 5240, 'loss/train': 2.194070339202881} +07/25/2024 11:50:01 - INFO - __main__ - Step 5242: {'lr': 0.0004988595856158076, 'samples': 251616, 'steps': 5241, 'loss/train': 2.5336179733276367} +07/25/2024 11:50:01 - INFO - __main__ - Step 5243: {'lr': 0.0004988590836683718, 'samples': 251664, 'steps': 5242, 'loss/train': 2.2027647495269775} +07/25/2024 11:50:01 - INFO - __main__ - Step 5244: {'lr': 0.0004988585816107482, 'samples': 251712, 'steps': 5243, 'loss/train': 0.29968342185020447} +07/25/2024 11:50:02 - INFO - __main__ - Step 5245: {'lr': 0.0004988580794429368, 'samples': 251760, 'steps': 5244, 'loss/train': 1.296839952468872} +07/25/2024 11:50:02 - INFO - __main__ - Step 5246: {'lr': 0.0004988575771649381, 'samples': 251808, 'steps': 5245, 'loss/train': 1.5904605388641357} +07/25/2024 11:50:02 - INFO - __main__ - Step 5247: {'lr': 0.0004988570747767524, 'samples': 251856, 'steps': 5246, 'loss/train': 1.299341082572937} +07/25/2024 11:50:03 - INFO - __main__ - Step 5248: {'lr': 0.0004988565722783796, 'samples': 251904, 'steps': 5247, 'loss/train': 1.6990094184875488} +07/25/2024 11:50:03 - INFO - __main__ - Step 5249: {'lr': 0.0004988560696698201, 'samples': 251952, 'steps': 5248, 'loss/train': 2.3996477127075195} +07/25/2024 11:50:03 - INFO - __main__ - Step 5250: {'lr': 0.0004988555669510742, 'samples': 252000, 'steps': 5249, 'loss/train': 2.204681396484375} +07/25/2024 11:50:03 - INFO - __main__ - Step 5251: {'lr': 0.0004988550641221419, 'samples': 252048, 'steps': 5250, 'loss/train': 2.4109878540039062} +07/25/2024 11:50:04 - INFO - __main__ - Step 5252: {'lr': 0.0004988545611830236, 'samples': 252096, 'steps': 5251, 'loss/train': 2.011251926422119} +07/25/2024 11:50:04 - INFO - __main__ - Step 5253: {'lr': 0.0004988540581337195, 'samples': 252144, 'steps': 5252, 'loss/train': 2.106821298599243} +07/25/2024 11:50:04 - INFO - __main__ - Step 5254: {'lr': 0.0004988535549742298, 'samples': 252192, 'steps': 5253, 'loss/train': 2.453597068786621} +07/25/2024 11:50:05 - INFO - __main__ - Step 5255: {'lr': 0.0004988530517045546, 'samples': 252240, 'steps': 5254, 'loss/train': 1.7595683336257935} +07/25/2024 11:50:05 - INFO - __main__ - Step 5256: {'lr': 0.0004988525483246944, 'samples': 252288, 'steps': 5255, 'loss/train': 2.7381815910339355} +07/25/2024 11:50:05 - INFO - __main__ - Step 5257: {'lr': 0.0004988520448346492, 'samples': 252336, 'steps': 5256, 'loss/train': 2.3600471019744873} +07/25/2024 11:50:05 - INFO - __main__ - Step 5258: {'lr': 0.0004988515412344193, 'samples': 252384, 'steps': 5257, 'loss/train': 0.2513861060142517} +07/25/2024 11:50:06 - INFO - __main__ - Step 5259: {'lr': 0.0004988510375240049, 'samples': 252432, 'steps': 5258, 'loss/train': 2.431821346282959} +07/25/2024 11:50:06 - INFO - __main__ - Step 5260: {'lr': 0.0004988505337034062, 'samples': 252480, 'steps': 5259, 'loss/train': 2.4019041061401367} +07/25/2024 11:50:06 - INFO - __main__ - Step 5261: {'lr': 0.0004988500297726235, 'samples': 252528, 'steps': 5260, 'loss/train': 2.543900966644287} +07/25/2024 11:50:07 - INFO - __main__ - Step 5262: {'lr': 0.000498849525731657, 'samples': 252576, 'steps': 5261, 'loss/train': 1.9827932119369507} +07/25/2024 11:50:07 - INFO - __main__ - Step 5263: {'lr': 0.0004988490215805069, 'samples': 252624, 'steps': 5262, 'loss/train': 2.4021286964416504} +07/25/2024 11:50:07 - INFO - __main__ - Step 5264: {'lr': 0.0004988485173191734, 'samples': 252672, 'steps': 5263, 'loss/train': 2.1620404720306396} +07/25/2024 11:50:07 - INFO - __main__ - Step 5265: {'lr': 0.0004988480129476567, 'samples': 252720, 'steps': 5264, 'loss/train': 2.150264024734497} +07/25/2024 11:50:08 - INFO - __main__ - Step 5266: {'lr': 0.0004988475084659571, 'samples': 252768, 'steps': 5265, 'loss/train': 2.304629325866699} +07/25/2024 11:50:08 - INFO - __main__ - Step 5267: {'lr': 0.0004988470038740749, 'samples': 252816, 'steps': 5266, 'loss/train': 2.5314524173736572} +07/25/2024 11:50:08 - INFO - __main__ - Step 5268: {'lr': 0.0004988464991720101, 'samples': 252864, 'steps': 5267, 'loss/train': 0.3390090763568878} +07/25/2024 11:50:09 - INFO - __main__ - Step 5269: {'lr': 0.0004988459943597631, 'samples': 252912, 'steps': 5268, 'loss/train': 1.8893444538116455} +07/25/2024 11:50:09 - INFO - __main__ - Step 5270: {'lr': 0.000498845489437334, 'samples': 252960, 'steps': 5269, 'loss/train': 1.181835651397705} +07/25/2024 11:50:09 - INFO - __main__ - Step 5271: {'lr': 0.0004988449844047231, 'samples': 253008, 'steps': 5270, 'loss/train': 1.529634952545166} +07/25/2024 11:50:09 - INFO - __main__ - Step 5272: {'lr': 0.0004988444792619306, 'samples': 253056, 'steps': 5271, 'loss/train': 1.922347068786621} +07/25/2024 11:50:10 - INFO - __main__ - Step 5273: {'lr': 0.0004988439740089568, 'samples': 253104, 'steps': 5272, 'loss/train': 2.641486883163452} +07/25/2024 11:50:10 - INFO - __main__ - Step 5274: {'lr': 0.0004988434686458019, 'samples': 253152, 'steps': 5273, 'loss/train': 2.1294209957122803} +07/25/2024 11:50:10 - INFO - __main__ - Step 5275: {'lr': 0.000498842963172466, 'samples': 253200, 'steps': 5274, 'loss/train': 2.0500614643096924} +07/25/2024 11:50:11 - INFO - __main__ - Step 5276: {'lr': 0.0004988424575889495, 'samples': 253248, 'steps': 5275, 'loss/train': 1.9616526365280151} +07/25/2024 11:50:11 - INFO - __main__ - Step 5277: {'lr': 0.0004988419518952525, 'samples': 253296, 'steps': 5276, 'loss/train': 2.1482656002044678} +07/25/2024 11:50:11 - INFO - __main__ - Step 5278: {'lr': 0.0004988414460913752, 'samples': 253344, 'steps': 5277, 'loss/train': 2.3191967010498047} +07/25/2024 11:50:11 - INFO - __main__ - Step 5279: {'lr': 0.0004988409401773178, 'samples': 253392, 'steps': 5278, 'loss/train': 2.0731046199798584} +07/25/2024 11:50:12 - INFO - __main__ - Step 5280: {'lr': 0.0004988404341530807, 'samples': 253440, 'steps': 5279, 'loss/train': 1.999747395515442} +07/25/2024 11:50:12 - INFO - __main__ - Step 5281: {'lr': 0.000498839928018664, 'samples': 253488, 'steps': 5280, 'loss/train': 1.603522777557373} +07/25/2024 11:50:12 - INFO - __main__ - Step 5282: {'lr': 0.0004988394217740681, 'samples': 253536, 'steps': 5281, 'loss/train': 0.3202686607837677} +07/25/2024 11:50:13 - INFO - __main__ - Step 5283: {'lr': 0.0004988389154192929, 'samples': 253584, 'steps': 5282, 'loss/train': 2.208099603652954} +07/25/2024 11:50:13 - INFO - __main__ - Step 5284: {'lr': 0.000498838408954339, 'samples': 253632, 'steps': 5283, 'loss/train': 1.1501312255859375} +07/25/2024 11:50:13 - INFO - __main__ - Step 5285: {'lr': 0.0004988379023792062, 'samples': 253680, 'steps': 5284, 'loss/train': 2.1681134700775146} +07/25/2024 11:50:13 - INFO - __main__ - Step 5286: {'lr': 0.0004988373956938951, 'samples': 253728, 'steps': 5285, 'loss/train': 1.2977510690689087} +07/25/2024 11:50:14 - INFO - __main__ - Step 5287: {'lr': 0.0004988368888984056, 'samples': 253776, 'steps': 5286, 'loss/train': 2.1476783752441406} +07/25/2024 11:50:14 - INFO - __main__ - Step 5288: {'lr': 0.0004988363819927383, 'samples': 253824, 'steps': 5287, 'loss/train': 2.409252405166626} +07/25/2024 11:50:14 - INFO - __main__ - Step 5289: {'lr': 0.0004988358749768931, 'samples': 253872, 'steps': 5288, 'loss/train': 2.630723714828491} +07/25/2024 11:50:15 - INFO - __main__ - Step 5290: {'lr': 0.0004988353678508704, 'samples': 253920, 'steps': 5289, 'loss/train': 2.734774589538574} +07/25/2024 11:50:15 - INFO - __main__ - Step 5291: {'lr': 0.0004988348606146703, 'samples': 253968, 'steps': 5290, 'loss/train': 2.2583298683166504} +07/25/2024 11:50:15 - INFO - __main__ - Step 5292: {'lr': 0.0004988343532682932, 'samples': 254016, 'steps': 5291, 'loss/train': 0.2866945266723633} +07/25/2024 11:50:15 - INFO - __main__ - Step 5293: {'lr': 0.0004988338458117392, 'samples': 254064, 'steps': 5292, 'loss/train': 2.6914587020874023} +07/25/2024 11:50:16 - INFO - __main__ - Step 5294: {'lr': 0.0004988333382450085, 'samples': 254112, 'steps': 5293, 'loss/train': 1.0998282432556152} +07/25/2024 11:50:16 - INFO - __main__ - Step 5295: {'lr': 0.0004988328305681013, 'samples': 254160, 'steps': 5294, 'loss/train': 1.329351782798767} +07/25/2024 11:50:16 - INFO - __main__ - Step 5296: {'lr': 0.0004988323227810181, 'samples': 254208, 'steps': 5295, 'loss/train': 1.9340492486953735} +07/25/2024 11:50:17 - INFO - __main__ - Step 5297: {'lr': 0.0004988318148837587, 'samples': 254256, 'steps': 5296, 'loss/train': 1.9804648160934448} +07/25/2024 11:50:17 - INFO - __main__ - Step 5298: {'lr': 0.0004988313068763238, 'samples': 254304, 'steps': 5297, 'loss/train': 2.3140134811401367} +07/25/2024 11:50:17 - INFO - __main__ - Step 5299: {'lr': 0.0004988307987587132, 'samples': 254352, 'steps': 5298, 'loss/train': 2.281946897506714} +07/25/2024 11:50:17 - INFO - __main__ - Step 5300: {'lr': 0.0004988302905309272, 'samples': 254400, 'steps': 5299, 'loss/train': 1.8429707288742065} +07/25/2024 11:50:18 - INFO - __main__ - Step 5301: {'lr': 0.0004988297821929662, 'samples': 254448, 'steps': 5300, 'loss/train': 2.3728954792022705} +07/25/2024 11:50:18 - INFO - __main__ - Step 5302: {'lr': 0.0004988292737448304, 'samples': 254496, 'steps': 5301, 'loss/train': 2.2307584285736084} +07/25/2024 11:50:18 - INFO - __main__ - Step 5303: {'lr': 0.00049882876518652, 'samples': 254544, 'steps': 5302, 'loss/train': 2.1105902194976807} +07/25/2024 11:50:18 - INFO - __main__ - Step 5304: {'lr': 0.0004988282565180351, 'samples': 254592, 'steps': 5303, 'loss/train': 2.181485176086426} +07/25/2024 11:50:19 - INFO - __main__ - Step 5305: {'lr': 0.000498827747739376, 'samples': 254640, 'steps': 5304, 'loss/train': 2.0121536254882812} +07/25/2024 11:50:19 - INFO - __main__ - Step 5306: {'lr': 0.000498827238850543, 'samples': 254688, 'steps': 5305, 'loss/train': 0.24374555051326752} +07/25/2024 11:50:19 - INFO - __main__ - Step 5307: {'lr': 0.0004988267298515364, 'samples': 254736, 'steps': 5306, 'loss/train': 2.1482620239257812} +07/25/2024 11:50:20 - INFO - __main__ - Step 5308: {'lr': 0.0004988262207423561, 'samples': 254784, 'steps': 5307, 'loss/train': 1.560760259628296} +07/25/2024 11:50:20 - INFO - __main__ - Step 5309: {'lr': 0.0004988257115230026, 'samples': 254832, 'steps': 5308, 'loss/train': 1.7641334533691406} +07/25/2024 11:50:20 - INFO - __main__ - Step 5310: {'lr': 0.0004988252021934761, 'samples': 254880, 'steps': 5309, 'loss/train': 1.641309142112732} +07/25/2024 11:50:20 - INFO - __main__ - Step 5311: {'lr': 0.0004988246927537766, 'samples': 254928, 'steps': 5310, 'loss/train': 2.3040058612823486} +07/25/2024 11:50:21 - INFO - __main__ - Step 5312: {'lr': 0.0004988241832039047, 'samples': 254976, 'steps': 5311, 'loss/train': 2.639660120010376} +07/25/2024 11:50:21 - INFO - __main__ - Step 5313: {'lr': 0.0004988236735438602, 'samples': 255024, 'steps': 5312, 'loss/train': 2.090045213699341} +07/25/2024 11:50:21 - INFO - __main__ - Step 5314: {'lr': 0.0004988231637736438, 'samples': 255072, 'steps': 5313, 'loss/train': 2.4398250579833984} +07/25/2024 11:50:22 - INFO - __main__ - Step 5315: {'lr': 0.0004988226538932553, 'samples': 255120, 'steps': 5314, 'loss/train': 2.135904550552368} +07/25/2024 11:50:22 - INFO - __main__ - Step 5316: {'lr': 0.0004988221439026951, 'samples': 255168, 'steps': 5315, 'loss/train': 0.2576538324356079} +07/25/2024 11:50:22 - INFO - __main__ - Step 5317: {'lr': 0.0004988216338019636, 'samples': 255216, 'steps': 5316, 'loss/train': 2.2631235122680664} +07/25/2024 11:50:22 - INFO - __main__ - Step 5318: {'lr': 0.0004988211235910607, 'samples': 255264, 'steps': 5317, 'loss/train': 1.2186954021453857} +07/25/2024 11:50:23 - INFO - __main__ - Step 5319: {'lr': 0.0004988206132699867, 'samples': 255312, 'steps': 5318, 'loss/train': 1.3011001348495483} +07/25/2024 11:50:23 - INFO - __main__ - Step 5320: {'lr': 0.0004988201028387422, 'samples': 255360, 'steps': 5319, 'loss/train': 2.1185121536254883} +07/25/2024 11:50:23 - INFO - __main__ - Step 5321: {'lr': 0.0004988195922973269, 'samples': 255408, 'steps': 5320, 'loss/train': 1.8517789840698242} +07/25/2024 11:50:24 - INFO - __main__ - Step 5322: {'lr': 0.0004988190816457413, 'samples': 255456, 'steps': 5321, 'loss/train': 2.070641279220581} +07/25/2024 11:50:24 - INFO - __main__ - Step 5323: {'lr': 0.0004988185708839855, 'samples': 255504, 'steps': 5322, 'loss/train': 2.1083829402923584} +07/25/2024 11:50:24 - INFO - __main__ - Step 5324: {'lr': 0.0004988180600120599, 'samples': 255552, 'steps': 5323, 'loss/train': 2.0450220108032227} +07/25/2024 11:50:24 - INFO - __main__ - Step 5325: {'lr': 0.0004988175490299646, 'samples': 255600, 'steps': 5324, 'loss/train': 2.5885329246520996} +07/25/2024 11:50:25 - INFO - __main__ - Step 5326: {'lr': 0.0004988170379377, 'samples': 255648, 'steps': 5325, 'loss/train': 2.341860294342041} +07/25/2024 11:50:25 - INFO - __main__ - Step 5327: {'lr': 0.000498816526735266, 'samples': 255696, 'steps': 5326, 'loss/train': 1.6115890741348267} +07/25/2024 11:50:25 - INFO - __main__ - Step 5328: {'lr': 0.0004988160154226632, 'samples': 255744, 'steps': 5327, 'loss/train': 1.576189398765564} +07/25/2024 11:50:26 - INFO - __main__ - Step 5329: {'lr': 0.0004988155039998915, 'samples': 255792, 'steps': 5328, 'loss/train': 1.7625147104263306} +07/25/2024 11:50:26 - INFO - __main__ - Step 5330: {'lr': 0.0004988149924669513, 'samples': 255840, 'steps': 5329, 'loss/train': 0.2413676679134369} +07/25/2024 11:50:26 - INFO - __main__ - Step 5331: {'lr': 0.0004988144808238427, 'samples': 255888, 'steps': 5330, 'loss/train': 1.775091528892517} +07/25/2024 11:50:26 - INFO - __main__ - Step 5332: {'lr': 0.0004988139690705662, 'samples': 255936, 'steps': 5331, 'loss/train': 1.5828275680541992} +07/25/2024 11:50:27 - INFO - __main__ - Step 5333: {'lr': 0.0004988134572071217, 'samples': 255984, 'steps': 5332, 'loss/train': 1.3747411966323853} +07/25/2024 11:50:27 - INFO - __main__ - Step 5334: {'lr': 0.0004988129452335096, 'samples': 256032, 'steps': 5333, 'loss/train': 1.9498372077941895} +07/25/2024 11:50:27 - INFO - __main__ - Step 5335: {'lr': 0.0004988124331497302, 'samples': 256080, 'steps': 5334, 'loss/train': 2.1488356590270996} +07/25/2024 11:50:28 - INFO - __main__ - Step 5336: {'lr': 0.0004988119209557836, 'samples': 256128, 'steps': 5335, 'loss/train': 2.1214537620544434} +07/25/2024 11:50:28 - INFO - __main__ - Step 5337: {'lr': 0.00049881140865167, 'samples': 256176, 'steps': 5336, 'loss/train': 2.300506114959717} +07/25/2024 11:50:28 - INFO - __main__ - Step 5338: {'lr': 0.0004988108962373897, 'samples': 256224, 'steps': 5337, 'loss/train': 2.258598566055298} +07/25/2024 11:50:28 - INFO - __main__ - Step 5339: {'lr': 0.0004988103837129429, 'samples': 256272, 'steps': 5338, 'loss/train': 2.0884618759155273} +07/25/2024 11:50:29 - INFO - __main__ - Step 5340: {'lr': 0.0004988098710783297, 'samples': 256320, 'steps': 5339, 'loss/train': 0.23416540026664734} +07/25/2024 11:50:29 - INFO - __main__ - Step 5341: {'lr': 0.0004988093583335507, 'samples': 256368, 'steps': 5340, 'loss/train': 2.2193491458892822} +07/25/2024 11:50:29 - INFO - __main__ - Step 5342: {'lr': 0.0004988088454786057, 'samples': 256416, 'steps': 5341, 'loss/train': 1.096705675125122} +07/25/2024 11:50:30 - INFO - __main__ - Step 5343: {'lr': 0.0004988083325134953, 'samples': 256464, 'steps': 5342, 'loss/train': 1.564125418663025} +07/25/2024 11:50:30 - INFO - __main__ - Step 5344: {'lr': 0.0004988078194382194, 'samples': 256512, 'steps': 5343, 'loss/train': 2.5638210773468018} +07/25/2024 11:50:30 - INFO - __main__ - Step 5345: {'lr': 0.0004988073062527783, 'samples': 256560, 'steps': 5344, 'loss/train': 2.2877142429351807} +07/25/2024 11:50:30 - INFO - __main__ - Step 5346: {'lr': 0.0004988067929571724, 'samples': 256608, 'steps': 5345, 'loss/train': 2.0569939613342285} +07/25/2024 11:50:31 - INFO - __main__ - Step 5347: {'lr': 0.0004988062795514019, 'samples': 256656, 'steps': 5346, 'loss/train': 1.150599479675293} +07/25/2024 11:50:31 - INFO - __main__ - Step 5348: {'lr': 0.0004988057660354669, 'samples': 256704, 'steps': 5347, 'loss/train': 2.301309823989868} +07/25/2024 11:50:31 - INFO - __main__ - Step 5349: {'lr': 0.0004988052524093676, 'samples': 256752, 'steps': 5348, 'loss/train': 2.30468487739563} +07/25/2024 11:50:32 - INFO - __main__ - Step 5350: {'lr': 0.0004988047386731044, 'samples': 256800, 'steps': 5349, 'loss/train': 2.3178532123565674} +07/25/2024 11:50:32 - INFO - __main__ - Step 5351: {'lr': 0.0004988042248266774, 'samples': 256848, 'steps': 5350, 'loss/train': 1.5533419847488403} +07/25/2024 11:50:32 - INFO - __main__ - Step 5352: {'lr': 0.0004988037108700868, 'samples': 256896, 'steps': 5351, 'loss/train': 2.562481164932251} +07/25/2024 11:50:32 - INFO - __main__ - Step 5353: {'lr': 0.000498803196803333, 'samples': 256944, 'steps': 5352, 'loss/train': 1.9689502716064453} +07/25/2024 11:50:33 - INFO - __main__ - Step 5354: {'lr': 0.000498802682626416, 'samples': 256992, 'steps': 5353, 'loss/train': 0.2678755521774292} +07/25/2024 11:50:33 - INFO - __main__ - Step 5355: {'lr': 0.0004988021683393362, 'samples': 257040, 'steps': 5354, 'loss/train': 1.920213222503662} +07/25/2024 11:50:33 - INFO - __main__ - Step 5356: {'lr': 0.0004988016539420937, 'samples': 257088, 'steps': 5355, 'loss/train': 2.059217691421509} +07/25/2024 11:50:33 - INFO - __main__ - Step 5357: {'lr': 0.0004988011394346889, 'samples': 257136, 'steps': 5356, 'loss/train': 1.330137014389038} +07/25/2024 11:50:34 - INFO - __main__ - Step 5358: {'lr': 0.000498800624817122, 'samples': 257184, 'steps': 5357, 'loss/train': 2.2612364292144775} +07/25/2024 11:50:34 - INFO - __main__ - Step 5359: {'lr': 0.000498800110089393, 'samples': 257232, 'steps': 5358, 'loss/train': 2.4011430740356445} +07/25/2024 11:50:34 - INFO - __main__ - Step 5360: {'lr': 0.0004987995952515024, 'samples': 257280, 'steps': 5359, 'loss/train': 2.3263399600982666} +07/25/2024 11:50:35 - INFO - __main__ - Step 5361: {'lr': 0.0004987990803034503, 'samples': 257328, 'steps': 5360, 'loss/train': 1.9655388593673706} +07/25/2024 11:50:35 - INFO - __main__ - Step 5362: {'lr': 0.0004987985652452367, 'samples': 257376, 'steps': 5361, 'loss/train': 2.0538437366485596} +07/25/2024 11:50:35 - INFO - __main__ - Step 5363: {'lr': 0.0004987980500768623, 'samples': 257424, 'steps': 5362, 'loss/train': 2.05297589302063} +07/25/2024 11:50:35 - INFO - __main__ - Step 5364: {'lr': 0.0004987975347983271, 'samples': 257472, 'steps': 5363, 'loss/train': 0.3154360353946686} +07/25/2024 11:50:36 - INFO - __main__ - Step 5365: {'lr': 0.0004987970194096313, 'samples': 257520, 'steps': 5364, 'loss/train': 2.3104848861694336} +07/25/2024 11:50:36 - INFO - __main__ - Step 5366: {'lr': 0.000498796503910775, 'samples': 257568, 'steps': 5365, 'loss/train': 1.2871627807617188} +07/25/2024 11:50:36 - INFO - __main__ - Step 5367: {'lr': 0.0004987959883017588, 'samples': 257616, 'steps': 5366, 'loss/train': 1.3751684427261353} +07/25/2024 11:50:37 - INFO - __main__ - Step 5368: {'lr': 0.0004987954725825826, 'samples': 257664, 'steps': 5367, 'loss/train': 2.321523427963257} +07/25/2024 11:50:37 - INFO - __main__ - Step 5369: {'lr': 0.0004987949567532467, 'samples': 257712, 'steps': 5368, 'loss/train': 1.6355019807815552} +07/25/2024 11:50:37 - INFO - __main__ - Step 5370: {'lr': 0.0004987944408137514, 'samples': 257760, 'steps': 5369, 'loss/train': 2.0806939601898193} +07/25/2024 11:50:37 - INFO - __main__ - Step 5371: {'lr': 0.0004987939247640969, 'samples': 257808, 'steps': 5370, 'loss/train': 2.166724681854248} +07/25/2024 11:50:38 - INFO - __main__ - Step 5372: {'lr': 0.0004987934086042834, 'samples': 257856, 'steps': 5371, 'loss/train': 2.085383176803589} +07/25/2024 11:50:38 - INFO - __main__ - Step 5373: {'lr': 0.0004987928923343112, 'samples': 257904, 'steps': 5372, 'loss/train': 2.49841570854187} +07/25/2024 11:50:38 - INFO - __main__ - Step 5374: {'lr': 0.0004987923759541804, 'samples': 257952, 'steps': 5373, 'loss/train': 2.505544424057007} +07/25/2024 11:50:39 - INFO - __main__ - Step 5375: {'lr': 0.0004987918594638914, 'samples': 258000, 'steps': 5374, 'loss/train': 2.4933664798736572} +07/25/2024 11:50:39 - INFO - __main__ - Step 5376: {'lr': 0.0004987913428634443, 'samples': 258048, 'steps': 5375, 'loss/train': 2.5852534770965576} +07/25/2024 11:50:39 - INFO - __main__ - Step 5377: {'lr': 0.0004987908261528393, 'samples': 258096, 'steps': 5376, 'loss/train': 2.254215717315674} +07/25/2024 11:50:39 - INFO - __main__ - Step 5378: {'lr': 0.0004987903093320768, 'samples': 258144, 'steps': 5377, 'loss/train': 0.8158264756202698} +07/25/2024 11:50:40 - INFO - __main__ - Step 5379: {'lr': 0.0004987897924011569, 'samples': 258192, 'steps': 5378, 'loss/train': 2.7120108604431152} +07/25/2024 11:50:40 - INFO - __main__ - Step 5380: {'lr': 0.0004987892753600798, 'samples': 258240, 'steps': 5379, 'loss/train': 2.4117069244384766} +07/25/2024 11:50:40 - INFO - __main__ - Step 5381: {'lr': 0.0004987887582088458, 'samples': 258288, 'steps': 5380, 'loss/train': 1.1581335067749023} +07/25/2024 11:50:41 - INFO - __main__ - Step 5382: {'lr': 0.000498788240947455, 'samples': 258336, 'steps': 5381, 'loss/train': 1.1327415704727173} +07/25/2024 11:50:41 - INFO - __main__ - Step 5383: {'lr': 0.000498787723575908, 'samples': 258384, 'steps': 5382, 'loss/train': 1.935575008392334} +07/25/2024 11:50:41 - INFO - __main__ - Step 5384: {'lr': 0.0004987872060942045, 'samples': 258432, 'steps': 5383, 'loss/train': 1.822447657585144} +07/25/2024 11:50:41 - INFO - __main__ - Step 5385: {'lr': 0.0004987866885023451, 'samples': 258480, 'steps': 5384, 'loss/train': 2.485260248184204} +07/25/2024 11:50:42 - INFO - __main__ - Step 5386: {'lr': 0.0004987861708003299, 'samples': 258528, 'steps': 5385, 'loss/train': 2.3292102813720703} +07/25/2024 11:50:42 - INFO - __main__ - Step 5387: {'lr': 0.0004987856529881593, 'samples': 258576, 'steps': 5386, 'loss/train': 2.445741653442383} +07/25/2024 11:50:42 - INFO - __main__ - Step 5388: {'lr': 0.0004987851350658333, 'samples': 258624, 'steps': 5387, 'loss/train': 0.2206447571516037} +07/25/2024 11:50:43 - INFO - __main__ - Step 5389: {'lr': 0.0004987846170333521, 'samples': 258672, 'steps': 5388, 'loss/train': 3.096534490585327} +07/25/2024 11:50:43 - INFO - __main__ - Step 5390: {'lr': 0.0004987840988907162, 'samples': 258720, 'steps': 5389, 'loss/train': 2.046398639678955} +07/25/2024 11:50:43 - INFO - __main__ - Step 5391: {'lr': 0.0004987835806379256, 'samples': 258768, 'steps': 5390, 'loss/train': 1.139352798461914} +07/25/2024 11:50:43 - INFO - __main__ - Step 5392: {'lr': 0.0004987830622749806, 'samples': 258816, 'steps': 5391, 'loss/train': 1.862979769706726} +07/25/2024 11:50:44 - INFO - __main__ - Step 5393: {'lr': 0.0004987825438018814, 'samples': 258864, 'steps': 5392, 'loss/train': 0.97737056016922} +07/25/2024 11:50:44 - INFO - __main__ - Step 5394: {'lr': 0.0004987820252186282, 'samples': 258912, 'steps': 5393, 'loss/train': 1.4680688381195068} +07/25/2024 11:50:44 - INFO - __main__ - Step 5395: {'lr': 0.0004987815065252215, 'samples': 258960, 'steps': 5394, 'loss/train': 2.5455968379974365} +07/25/2024 11:50:45 - INFO - __main__ - Step 5396: {'lr': 0.0004987809877216612, 'samples': 259008, 'steps': 5395, 'loss/train': 2.037200689315796} +07/25/2024 11:50:45 - INFO - __main__ - Step 5397: {'lr': 0.0004987804688079475, 'samples': 259056, 'steps': 5396, 'loss/train': 2.2164220809936523} +07/25/2024 11:50:45 - INFO - __main__ - Step 5398: {'lr': 0.000498779949784081, 'samples': 259104, 'steps': 5397, 'loss/train': 2.3196237087249756} +07/25/2024 11:50:45 - INFO - __main__ - Step 5399: {'lr': 0.0004987794306500617, 'samples': 259152, 'steps': 5398, 'loss/train': 2.262186050415039} +07/25/2024 11:50:46 - INFO - __main__ - Step 5400: {'lr': 0.0004987789114058897, 'samples': 259200, 'steps': 5399, 'loss/train': 1.947948694229126} +07/25/2024 11:50:46 - INFO - __main__ - Step 5401: {'lr': 0.0004987783920515655, 'samples': 259248, 'steps': 5400, 'loss/train': 2.9048843383789062} +07/25/2024 11:50:46 - INFO - __main__ - Step 5402: {'lr': 0.0004987778725870892, 'samples': 259296, 'steps': 5401, 'loss/train': 2.7604827880859375} +07/25/2024 11:50:47 - INFO - __main__ - Step 5403: {'lr': 0.000498777353012461, 'samples': 259344, 'steps': 5402, 'loss/train': 2.8044209480285645} +07/25/2024 11:50:47 - INFO - __main__ - Step 5404: {'lr': 0.0004987768333276811, 'samples': 259392, 'steps': 5403, 'loss/train': 2.3008458614349365} +07/25/2024 11:50:47 - INFO - __main__ - Step 5405: {'lr': 0.0004987763135327498, 'samples': 259440, 'steps': 5404, 'loss/train': 1.1037423610687256} +07/25/2024 11:50:47 - INFO - __main__ - Step 5406: {'lr': 0.0004987757936276673, 'samples': 259488, 'steps': 5405, 'loss/train': 2.5467312335968018} +07/25/2024 11:50:48 - INFO - __main__ - Step 5407: {'lr': 0.000498775273612434, 'samples': 259536, 'steps': 5406, 'loss/train': 1.6324015855789185} +07/25/2024 11:50:48 - INFO - __main__ - Step 5408: {'lr': 0.0004987747534870498, 'samples': 259584, 'steps': 5407, 'loss/train': 2.425797462463379} +07/25/2024 11:50:48 - INFO - __main__ - Step 5409: {'lr': 0.0004987742332515151, 'samples': 259632, 'steps': 5408, 'loss/train': 2.5141208171844482} +07/25/2024 11:50:49 - INFO - __main__ - Step 5410: {'lr': 0.0004987737129058303, 'samples': 259680, 'steps': 5409, 'loss/train': 2.8367388248443604} +07/25/2024 11:50:49 - INFO - __main__ - Step 5411: {'lr': 0.0004987731924499954, 'samples': 259728, 'steps': 5410, 'loss/train': 2.2988429069519043} +07/25/2024 11:50:49 - INFO - __main__ - Step 5412: {'lr': 0.0004987726718840106, 'samples': 259776, 'steps': 5411, 'loss/train': 2.692996025085449} +07/25/2024 11:50:49 - INFO - __main__ - Step 5413: {'lr': 0.0004987721512078764, 'samples': 259824, 'steps': 5412, 'loss/train': 3.3953511714935303} +07/25/2024 11:50:50 - INFO - __main__ - Step 5414: {'lr': 0.0004987716304215928, 'samples': 259872, 'steps': 5413, 'loss/train': 2.089643716812134} +07/25/2024 11:50:50 - INFO - __main__ - Step 5415: {'lr': 0.0004987711095251601, 'samples': 259920, 'steps': 5414, 'loss/train': 1.2229148149490356} +07/25/2024 11:50:50 - INFO - __main__ - Step 5416: {'lr': 0.0004987705885185784, 'samples': 259968, 'steps': 5415, 'loss/train': 1.916619896888733} +07/25/2024 11:50:51 - INFO - __main__ - Step 5417: {'lr': 0.0004987700674018483, 'samples': 260016, 'steps': 5416, 'loss/train': 1.1144018173217773} +07/25/2024 11:50:51 - INFO - __main__ - Step 5418: {'lr': 0.0004987695461749695, 'samples': 260064, 'steps': 5417, 'loss/train': 1.9220997095108032} +07/25/2024 11:50:51 - INFO - __main__ - Step 5419: {'lr': 0.0004987690248379427, 'samples': 260112, 'steps': 5418, 'loss/train': 2.4740493297576904} +07/25/2024 11:50:51 - INFO - __main__ - Step 5420: {'lr': 0.0004987685033907679, 'samples': 260160, 'steps': 5419, 'loss/train': 1.6986198425292969} +07/25/2024 11:50:52 - INFO - __main__ - Step 5421: {'lr': 0.0004987679818334454, 'samples': 260208, 'steps': 5420, 'loss/train': 2.513417959213257} +07/25/2024 11:50:52 - INFO - __main__ - Step 5422: {'lr': 0.0004987674601659754, 'samples': 260256, 'steps': 5421, 'loss/train': 1.8216004371643066} +07/25/2024 11:50:52 - INFO - __main__ - Step 5423: {'lr': 0.0004987669383883581, 'samples': 260304, 'steps': 5422, 'loss/train': 2.47189998626709} +07/25/2024 11:50:53 - INFO - __main__ - Step 5424: {'lr': 0.0004987664165005938, 'samples': 260352, 'steps': 5423, 'loss/train': 2.4370784759521484} +07/25/2024 11:50:53 - INFO - __main__ - Step 5425: {'lr': 0.0004987658945026827, 'samples': 260400, 'steps': 5424, 'loss/train': 2.3633077144622803} +07/25/2024 11:50:53 - INFO - __main__ - Step 5426: {'lr': 0.0004987653723946251, 'samples': 260448, 'steps': 5425, 'loss/train': 2.463966131210327} +07/25/2024 11:50:53 - INFO - __main__ - Step 5427: {'lr': 0.0004987648501764211, 'samples': 260496, 'steps': 5426, 'loss/train': 2.9544801712036133} +07/25/2024 11:50:54 - INFO - __main__ - Step 5428: {'lr': 0.0004987643278480709, 'samples': 260544, 'steps': 5427, 'loss/train': 2.527195692062378} +07/25/2024 11:50:54 - INFO - __main__ - Step 5429: {'lr': 0.0004987638054095749, 'samples': 260592, 'steps': 5428, 'loss/train': 1.5369971990585327} +07/25/2024 11:50:54 - INFO - __main__ - Step 5430: {'lr': 0.0004987632828609333, 'samples': 260640, 'steps': 5429, 'loss/train': 2.2713229656219482} +07/25/2024 11:50:55 - INFO - __main__ - Step 5431: {'lr': 0.0004987627602021463, 'samples': 260688, 'steps': 5430, 'loss/train': 1.542920708656311} +07/25/2024 11:50:55 - INFO - __main__ - Step 5432: {'lr': 0.0004987622374332141, 'samples': 260736, 'steps': 5431, 'loss/train': 2.394089460372925} +07/25/2024 11:50:55 - INFO - __main__ - Step 5433: {'lr': 0.0004987617145541369, 'samples': 260784, 'steps': 5432, 'loss/train': 1.416930079460144} +07/25/2024 11:50:55 - INFO - __main__ - Step 5434: {'lr': 0.000498761191564915, 'samples': 260832, 'steps': 5433, 'loss/train': 2.3367440700531006} +07/25/2024 11:50:56 - INFO - __main__ - Step 5435: {'lr': 0.0004987606684655485, 'samples': 260880, 'steps': 5434, 'loss/train': 2.285501718521118} +07/25/2024 11:50:56 - INFO - __main__ - Step 5436: {'lr': 0.000498760145256038, 'samples': 260928, 'steps': 5435, 'loss/train': 2.6971943378448486} +07/25/2024 11:50:56 - INFO - __main__ - Step 5437: {'lr': 0.0004987596219363833, 'samples': 260976, 'steps': 5436, 'loss/train': 2.0528409481048584} +07/25/2024 11:50:57 - INFO - __main__ - Step 5438: {'lr': 0.0004987590985065848, 'samples': 261024, 'steps': 5437, 'loss/train': 2.0653562545776367} +07/25/2024 11:50:57 - INFO - __main__ - Step 5439: {'lr': 0.0004987585749666428, 'samples': 261072, 'steps': 5438, 'loss/train': 1.5310420989990234} +07/25/2024 11:50:57 - INFO - __main__ - Step 5440: {'lr': 0.0004987580513165575, 'samples': 261120, 'steps': 5439, 'loss/train': 0.9195838570594788} +07/25/2024 11:50:57 - INFO - __main__ - Step 5441: {'lr': 0.0004987575275563291, 'samples': 261168, 'steps': 5440, 'loss/train': 2.0418834686279297} +07/25/2024 11:50:58 - INFO - __main__ - Step 5442: {'lr': 0.0004987570036859579, 'samples': 261216, 'steps': 5441, 'loss/train': 1.6861339807510376} +07/25/2024 11:50:58 - INFO - __main__ - Step 5443: {'lr': 0.0004987564797054439, 'samples': 261264, 'steps': 5442, 'loss/train': 2.42826509475708} +07/25/2024 11:50:58 - INFO - __main__ - Step 5444: {'lr': 0.0004987559556147876, 'samples': 261312, 'steps': 5443, 'loss/train': 2.3015825748443604} +07/25/2024 11:50:58 - INFO - __main__ - Step 5445: {'lr': 0.0004987554314139892, 'samples': 261360, 'steps': 5444, 'loss/train': 2.190152883529663} +07/25/2024 11:50:59 - INFO - __main__ - Step 5446: {'lr': 0.0004987549071030487, 'samples': 261408, 'steps': 5445, 'loss/train': 1.9063903093338013} +07/25/2024 11:50:59 - INFO - __main__ - Step 5447: {'lr': 0.0004987543826819666, 'samples': 261456, 'steps': 5446, 'loss/train': 2.08368182182312} +07/25/2024 11:50:59 - INFO - __main__ - Step 5448: {'lr': 0.0004987538581507431, 'samples': 261504, 'steps': 5447, 'loss/train': 2.7756142616271973} +07/25/2024 11:51:00 - INFO - __main__ - Step 5449: {'lr': 0.0004987533335093782, 'samples': 261552, 'steps': 5448, 'loss/train': 2.2324507236480713} +07/25/2024 11:51:00 - INFO - __main__ - Step 5450: {'lr': 0.0004987528087578723, 'samples': 261600, 'steps': 5449, 'loss/train': 3.9817349910736084} +07/25/2024 11:51:00 - INFO - __main__ - Step 5451: {'lr': 0.0004987522838962257, 'samples': 261648, 'steps': 5450, 'loss/train': 2.6403086185455322} +07/25/2024 11:51:00 - INFO - __main__ - Step 5452: {'lr': 0.0004987517589244386, 'samples': 261696, 'steps': 5451, 'loss/train': 2.802910089492798} +07/25/2024 11:51:01 - INFO - __main__ - Step 5453: {'lr': 0.0004987512338425112, 'samples': 261744, 'steps': 5452, 'loss/train': 2.002866744995117} +07/25/2024 11:51:01 - INFO - __main__ - Step 5454: {'lr': 0.0004987507086504436, 'samples': 261792, 'steps': 5453, 'loss/train': 2.110844373703003} +07/25/2024 11:51:01 - INFO - __main__ - Step 5455: {'lr': 0.0004987501833482363, 'samples': 261840, 'steps': 5454, 'loss/train': 2.2108347415924072} +07/25/2024 11:51:02 - INFO - __main__ - Step 5456: {'lr': 0.0004987496579358892, 'samples': 261888, 'steps': 5455, 'loss/train': 2.1725361347198486} +07/25/2024 11:51:02 - INFO - __main__ - Step 5457: {'lr': 0.0004987491324134029, 'samples': 261936, 'steps': 5456, 'loss/train': 1.9855867624282837} +07/25/2024 11:51:02 - INFO - __main__ - Step 5458: {'lr': 0.0004987486067807773, 'samples': 261984, 'steps': 5457, 'loss/train': 2.320099353790283} +07/25/2024 11:51:02 - INFO - __main__ - Step 5459: {'lr': 0.0004987480810380129, 'samples': 262032, 'steps': 5458, 'loss/train': 2.215087890625} +07/25/2024 11:51:03 - INFO - __main__ - Step 5460: {'lr': 0.0004987475551851098, 'samples': 262080, 'steps': 5459, 'loss/train': 2.84696888923645} +07/25/2024 11:51:03 - INFO - __main__ - Step 5461: {'lr': 0.0004987470292220682, 'samples': 262128, 'steps': 5460, 'loss/train': 1.5547080039978027} +07/25/2024 11:51:03 - INFO - __main__ - Step 5462: {'lr': 0.0004987465031488884, 'samples': 262176, 'steps': 5461, 'loss/train': 1.7450720071792603} +07/25/2024 11:51:04 - INFO - __main__ - Step 5463: {'lr': 0.0004987459769655708, 'samples': 262224, 'steps': 5462, 'loss/train': 1.3994896411895752} +07/25/2024 11:51:04 - INFO - __main__ - Step 5464: {'lr': 0.0004987454506721153, 'samples': 262272, 'steps': 5463, 'loss/train': 0.534654974937439} +07/25/2024 11:51:04 - INFO - __main__ - Step 5465: {'lr': 0.0004987449242685223, 'samples': 262320, 'steps': 5464, 'loss/train': 2.161468744277954} +07/25/2024 11:51:04 - INFO - __main__ - Step 5466: {'lr': 0.000498744397754792, 'samples': 262368, 'steps': 5465, 'loss/train': 2.062575340270996} +07/25/2024 11:51:05 - INFO - __main__ - Step 5467: {'lr': 0.0004987438711309246, 'samples': 262416, 'steps': 5466, 'loss/train': 2.0247838497161865} +07/25/2024 11:51:05 - INFO - __main__ - Step 5468: {'lr': 0.0004987433443969204, 'samples': 262464, 'steps': 5467, 'loss/train': 2.1597986221313477} +07/25/2024 11:51:05 - INFO - __main__ - Step 5469: {'lr': 0.0004987428175527796, 'samples': 262512, 'steps': 5468, 'loss/train': 2.2903192043304443} +07/25/2024 11:51:06 - INFO - __main__ - Step 5470: {'lr': 0.0004987422905985026, 'samples': 262560, 'steps': 5469, 'loss/train': 1.9763691425323486} +07/25/2024 11:51:06 - INFO - __main__ - Step 5471: {'lr': 0.0004987417635340894, 'samples': 262608, 'steps': 5470, 'loss/train': 2.1869053840637207} +07/25/2024 11:51:06 - INFO - __main__ - Step 5472: {'lr': 0.0004987412363595403, 'samples': 262656, 'steps': 5471, 'loss/train': 1.5450434684753418} +07/25/2024 11:51:06 - INFO - __main__ - Step 5473: {'lr': 0.0004987407090748555, 'samples': 262704, 'steps': 5472, 'loss/train': 1.9781168699264526} +07/25/2024 11:51:07 - INFO - __main__ - Step 5474: {'lr': 0.0004987401816800353, 'samples': 262752, 'steps': 5473, 'loss/train': 2.881056308746338} +07/25/2024 11:51:07 - INFO - __main__ - Step 5475: {'lr': 0.00049873965417508, 'samples': 262800, 'steps': 5474, 'loss/train': 2.7952823638916016} +07/25/2024 11:51:07 - INFO - __main__ - Step 5476: {'lr': 0.0004987391265599897, 'samples': 262848, 'steps': 5475, 'loss/train': 2.714543342590332} +07/25/2024 11:51:08 - INFO - __main__ - Step 5477: {'lr': 0.0004987385988347647, 'samples': 262896, 'steps': 5476, 'loss/train': 2.2856922149658203} +07/25/2024 11:51:08 - INFO - __main__ - Step 5478: {'lr': 0.0004987380709994052, 'samples': 262944, 'steps': 5477, 'loss/train': 2.2547109127044678} +07/25/2024 11:51:08 - INFO - __main__ - Step 5479: {'lr': 0.0004987375430539115, 'samples': 262992, 'steps': 5478, 'loss/train': 2.133100748062134} +07/25/2024 11:51:08 - INFO - __main__ - Step 5480: {'lr': 0.0004987370149982837, 'samples': 263040, 'steps': 5479, 'loss/train': 2.283583402633667} +07/25/2024 11:51:09 - INFO - __main__ - Step 5481: {'lr': 0.0004987364868325222, 'samples': 263088, 'steps': 5480, 'loss/train': 1.5090022087097168} +07/25/2024 11:51:09 - INFO - __main__ - Step 5482: {'lr': 0.000498735958556627, 'samples': 263136, 'steps': 5481, 'loss/train': 2.231238603591919} +07/25/2024 11:51:09 - INFO - __main__ - Step 5483: {'lr': 0.0004987354301705987, 'samples': 263184, 'steps': 5482, 'loss/train': 2.070805788040161} +07/25/2024 11:51:10 - INFO - __main__ - Step 5484: {'lr': 0.0004987349016744373, 'samples': 263232, 'steps': 5483, 'loss/train': 2.5845882892608643} +07/25/2024 11:51:10 - INFO - __main__ - Step 5485: {'lr': 0.0004987343730681428, 'samples': 263280, 'steps': 5484, 'loss/train': 1.5874979496002197} +07/25/2024 11:51:10 - INFO - __main__ - Step 5486: {'lr': 0.0004987338443517159, 'samples': 263328, 'steps': 5485, 'loss/train': 3.34409499168396} +07/25/2024 11:51:10 - INFO - __main__ - Step 5487: {'lr': 0.0004987333155251567, 'samples': 263376, 'steps': 5486, 'loss/train': 2.0151216983795166} +07/25/2024 11:51:11 - INFO - __main__ - Step 5488: {'lr': 0.0004987327865884653, 'samples': 263424, 'steps': 5487, 'loss/train': 1.1044464111328125} +07/25/2024 11:51:11 - INFO - __main__ - Step 5489: {'lr': 0.0004987322575416419, 'samples': 263472, 'steps': 5488, 'loss/train': 2.7236082553863525} +07/25/2024 11:51:11 - INFO - __main__ - Step 5490: {'lr': 0.0004987317283846869, 'samples': 263520, 'steps': 5489, 'loss/train': 2.5503151416778564} +07/25/2024 11:51:12 - INFO - __main__ - Step 5491: {'lr': 0.0004987311991176003, 'samples': 263568, 'steps': 5490, 'loss/train': 0.8991191983222961} +07/25/2024 11:51:12 - INFO - __main__ - Step 5492: {'lr': 0.0004987306697403826, 'samples': 263616, 'steps': 5491, 'loss/train': 2.615952968597412} +07/25/2024 11:51:12 - INFO - __main__ - Step 5493: {'lr': 0.000498730140253034, 'samples': 263664, 'steps': 5492, 'loss/train': 1.9434990882873535} +07/25/2024 11:51:12 - INFO - __main__ - Step 5494: {'lr': 0.0004987296106555546, 'samples': 263712, 'steps': 5493, 'loss/train': 2.4149067401885986} +07/25/2024 11:51:13 - INFO - __main__ - Step 5495: {'lr': 0.0004987290809479447, 'samples': 263760, 'steps': 5494, 'loss/train': 2.1307411193847656} +07/25/2024 11:51:13 - INFO - __main__ - Step 5496: {'lr': 0.0004987285511302046, 'samples': 263808, 'steps': 5495, 'loss/train': 2.285365104675293} +07/25/2024 11:51:13 - INFO - __main__ - Step 5497: {'lr': 0.0004987280212023344, 'samples': 263856, 'steps': 5496, 'loss/train': 1.4652372598648071} +07/25/2024 11:51:14 - INFO - __main__ - Step 5498: {'lr': 0.0004987274911643344, 'samples': 263904, 'steps': 5497, 'loss/train': 2.559221029281616} +07/25/2024 11:51:14 - INFO - __main__ - Step 5499: {'lr': 0.0004987269610162049, 'samples': 263952, 'steps': 5498, 'loss/train': 1.752759575843811} +07/25/2024 11:51:14 - INFO - __main__ - Step 5500: {'lr': 0.0004987264307579459, 'samples': 264000, 'steps': 5499, 'loss/train': 0.9026399850845337} +07/25/2024 11:51:14 - INFO - __main__ - Step 5501: {'lr': 0.0004987259003895579, 'samples': 264048, 'steps': 5500, 'loss/train': 2.194716453552246} +07/25/2024 11:51:15 - INFO - __main__ - Step 5502: {'lr': 0.0004987253699110411, 'samples': 264096, 'steps': 5501, 'loss/train': 2.3731627464294434} +07/25/2024 11:51:15 - INFO - __main__ - Step 5503: {'lr': 0.0004987248393223956, 'samples': 264144, 'steps': 5502, 'loss/train': 2.1294898986816406} +07/25/2024 11:51:15 - INFO - __main__ - Step 5504: {'lr': 0.0004987243086236219, 'samples': 264192, 'steps': 5503, 'loss/train': 2.5099754333496094} +07/25/2024 11:51:16 - INFO - __main__ - Step 5505: {'lr': 0.0004987237778147198, 'samples': 264240, 'steps': 5504, 'loss/train': 1.4825232028961182} +07/25/2024 11:51:16 - INFO - __main__ - Step 5506: {'lr': 0.0004987232468956898, 'samples': 264288, 'steps': 5505, 'loss/train': 1.8180077075958252} +07/25/2024 11:51:16 - INFO - __main__ - Step 5507: {'lr': 0.0004987227158665322, 'samples': 264336, 'steps': 5506, 'loss/train': 2.3496439456939697} +07/25/2024 11:51:16 - INFO - __main__ - Step 5508: {'lr': 0.0004987221847272472, 'samples': 264384, 'steps': 5507, 'loss/train': 2.4491240978240967} +07/25/2024 11:51:17 - INFO - __main__ - Step 5509: {'lr': 0.0004987216534778349, 'samples': 264432, 'steps': 5508, 'loss/train': 1.0153928995132446} +07/25/2024 11:51:17 - INFO - __main__ - Step 5510: {'lr': 0.0004987211221182957, 'samples': 264480, 'steps': 5509, 'loss/train': 2.0177066326141357} +07/25/2024 11:51:17 - INFO - __main__ - Step 5511: {'lr': 0.0004987205906486296, 'samples': 264528, 'steps': 5510, 'loss/train': 2.3543169498443604} +07/25/2024 11:51:17 - INFO - __main__ - Step 5512: {'lr': 0.0004987200590688372, 'samples': 264576, 'steps': 5511, 'loss/train': 1.7475922107696533} +07/25/2024 11:51:18 - INFO - __main__ - Step 5513: {'lr': 0.0004987195273789184, 'samples': 264624, 'steps': 5512, 'loss/train': 2.164266586303711} +07/25/2024 11:51:18 - INFO - __main__ - Step 5514: {'lr': 0.0004987189955788736, 'samples': 264672, 'steps': 5513, 'loss/train': 2.4219231605529785} +07/25/2024 11:51:18 - INFO - __main__ - Step 5515: {'lr': 0.000498718463668703, 'samples': 264720, 'steps': 5514, 'loss/train': 0.35539907217025757} +07/25/2024 11:51:19 - INFO - __main__ - Step 5516: {'lr': 0.0004987179316484068, 'samples': 264768, 'steps': 5515, 'loss/train': 1.8922662734985352} +07/25/2024 11:51:19 - INFO - __main__ - Step 5517: {'lr': 0.0004987173995179853, 'samples': 264816, 'steps': 5516, 'loss/train': 2.47784423828125} +07/25/2024 11:51:19 - INFO - __main__ - Step 5518: {'lr': 0.0004987168672774387, 'samples': 264864, 'steps': 5517, 'loss/train': 2.0272839069366455} +07/25/2024 11:51:19 - INFO - __main__ - Step 5519: {'lr': 0.0004987163349267672, 'samples': 264912, 'steps': 5518, 'loss/train': 2.259631395339966} +07/25/2024 11:51:20 - INFO - __main__ - Step 5520: {'lr': 0.0004987158024659711, 'samples': 264960, 'steps': 5519, 'loss/train': 1.9774937629699707} +07/25/2024 11:51:20 - INFO - __main__ - Step 5521: {'lr': 0.0004987152698950506, 'samples': 265008, 'steps': 5520, 'loss/train': 2.4044101238250732} +07/25/2024 11:51:20 - INFO - __main__ - Step 5522: {'lr': 0.0004987147372140059, 'samples': 265056, 'steps': 5521, 'loss/train': 1.794130802154541} +07/25/2024 11:51:21 - INFO - __main__ - Step 5523: {'lr': 0.0004987142044228374, 'samples': 265104, 'steps': 5522, 'loss/train': 2.1220462322235107} +07/25/2024 11:51:21 - INFO - __main__ - Step 5524: {'lr': 0.0004987136715215452, 'samples': 265152, 'steps': 5523, 'loss/train': 2.2016656398773193} +07/25/2024 11:51:21 - INFO - __main__ - Step 5525: {'lr': 0.0004987131385101295, 'samples': 265200, 'steps': 5524, 'loss/train': 2.100659132003784} +07/25/2024 11:51:21 - INFO - __main__ - Step 5526: {'lr': 0.0004987126053885906, 'samples': 265248, 'steps': 5525, 'loss/train': 1.8051388263702393} +07/25/2024 11:51:22 - INFO - __main__ - Step 5527: {'lr': 0.0004987120721569288, 'samples': 265296, 'steps': 5526, 'loss/train': 2.4901838302612305} +07/25/2024 11:51:22 - INFO - __main__ - Step 5528: {'lr': 0.0004987115388151442, 'samples': 265344, 'steps': 5527, 'loss/train': 2.4141736030578613} +07/25/2024 11:51:22 - INFO - __main__ - Step 5529: {'lr': 0.0004987110053632371, 'samples': 265392, 'steps': 5528, 'loss/train': 1.8839610815048218} +07/25/2024 11:51:23 - INFO - __main__ - Step 5530: {'lr': 0.0004987104718012077, 'samples': 265440, 'steps': 5529, 'loss/train': 2.29361891746521} +07/25/2024 11:51:23 - INFO - __main__ - Step 5531: {'lr': 0.0004987099381290563, 'samples': 265488, 'steps': 5530, 'loss/train': 2.7331955432891846} +07/25/2024 11:51:23 - INFO - __main__ - Step 5532: {'lr': 0.0004987094043467831, 'samples': 265536, 'steps': 5531, 'loss/train': 2.2047524452209473} +07/25/2024 11:51:23 - INFO - __main__ - Step 5533: {'lr': 0.0004987088704543884, 'samples': 265584, 'steps': 5532, 'loss/train': 1.1730228662490845} +07/25/2024 11:51:24 - INFO - __main__ - Step 5534: {'lr': 0.0004987083364518723, 'samples': 265632, 'steps': 5533, 'loss/train': 1.2710028886795044} +07/25/2024 11:51:24 - INFO - __main__ - Step 5535: {'lr': 0.0004987078023392352, 'samples': 265680, 'steps': 5534, 'loss/train': 2.4551515579223633} +07/25/2024 11:51:24 - INFO - __main__ - Step 5536: {'lr': 0.0004987072681164772, 'samples': 265728, 'steps': 5535, 'loss/train': 2.532744884490967} +07/25/2024 11:51:25 - INFO - __main__ - Step 5537: {'lr': 0.0004987067337835987, 'samples': 265776, 'steps': 5536, 'loss/train': 2.0518972873687744} +07/25/2024 11:51:25 - INFO - __main__ - Step 5538: {'lr': 0.0004987061993405997, 'samples': 265824, 'steps': 5537, 'loss/train': 1.8040238618850708} +07/25/2024 11:51:25 - INFO - __main__ - Step 5539: {'lr': 0.0004987056647874806, 'samples': 265872, 'steps': 5538, 'loss/train': 0.3500082790851593} +07/25/2024 11:51:25 - INFO - __main__ - Step 5540: {'lr': 0.0004987051301242416, 'samples': 265920, 'steps': 5539, 'loss/train': 0.854072630405426} +07/25/2024 11:51:26 - INFO - __main__ - Step 5541: {'lr': 0.0004987045953508828, 'samples': 265968, 'steps': 5540, 'loss/train': 2.036391258239746} +07/25/2024 11:51:26 - INFO - __main__ - Step 5542: {'lr': 0.0004987040604674048, 'samples': 266016, 'steps': 5541, 'loss/train': 2.1713719367980957} +07/25/2024 11:51:26 - INFO - __main__ - Step 5543: {'lr': 0.0004987035254738075, 'samples': 266064, 'steps': 5542, 'loss/train': 1.9711310863494873} +07/25/2024 11:51:27 - INFO - __main__ - Step 5544: {'lr': 0.0004987029903700912, 'samples': 266112, 'steps': 5543, 'loss/train': 2.151136875152588} +07/25/2024 11:51:27 - INFO - __main__ - Step 5545: {'lr': 0.0004987024551562564, 'samples': 266160, 'steps': 5544, 'loss/train': 2.08474063873291} +07/25/2024 11:51:27 - INFO - __main__ - Step 5546: {'lr': 0.0004987019198323028, 'samples': 266208, 'steps': 5545, 'loss/train': 2.277874708175659} +07/25/2024 11:51:27 - INFO - __main__ - Step 5547: {'lr': 0.0004987013843982312, 'samples': 266256, 'steps': 5546, 'loss/train': 2.231757164001465} +07/25/2024 11:51:28 - INFO - __main__ - Step 5548: {'lr': 0.0004987008488540416, 'samples': 266304, 'steps': 5547, 'loss/train': 2.181365966796875} +07/25/2024 11:51:28 - INFO - __main__ - Step 5549: {'lr': 0.0004987003131997341, 'samples': 266352, 'steps': 5548, 'loss/train': 2.194937229156494} +07/25/2024 11:51:28 - INFO - __main__ - Step 5550: {'lr': 0.0004986997774353092, 'samples': 266400, 'steps': 5549, 'loss/train': 2.2097954750061035} +07/25/2024 11:51:29 - INFO - __main__ - Step 5551: {'lr': 0.0004986992415607668, 'samples': 266448, 'steps': 5550, 'loss/train': 2.1391398906707764} +07/25/2024 11:51:29 - INFO - __main__ - Step 5552: {'lr': 0.0004986987055761075, 'samples': 266496, 'steps': 5551, 'loss/train': 2.6045215129852295} +07/25/2024 11:51:29 - INFO - __main__ - Step 5553: {'lr': 0.0004986981694813314, 'samples': 266544, 'steps': 5552, 'loss/train': 1.0382227897644043} +07/25/2024 11:51:29 - INFO - __main__ - Step 5554: {'lr': 0.0004986976332764387, 'samples': 266592, 'steps': 5553, 'loss/train': 2.002166986465454} +07/25/2024 11:51:30 - INFO - __main__ - Step 5555: {'lr': 0.0004986970969614296, 'samples': 266640, 'steps': 5554, 'loss/train': 2.4607112407684326} +07/25/2024 11:51:30 - INFO - __main__ - Step 5556: {'lr': 0.0004986965605363044, 'samples': 266688, 'steps': 5555, 'loss/train': 2.389190912246704} +07/25/2024 11:51:30 - INFO - __main__ - Step 5557: {'lr': 0.0004986960240010634, 'samples': 266736, 'steps': 5556, 'loss/train': 1.3263583183288574} +07/25/2024 11:51:31 - INFO - __main__ - Step 5558: {'lr': 0.0004986954873557067, 'samples': 266784, 'steps': 5557, 'loss/train': 1.429139256477356} +07/25/2024 11:51:31 - INFO - __main__ - Step 5559: {'lr': 0.0004986949506002346, 'samples': 266832, 'steps': 5558, 'loss/train': 2.0771586894989014} +07/25/2024 11:51:31 - INFO - __main__ - Step 5560: {'lr': 0.0004986944137346474, 'samples': 266880, 'steps': 5559, 'loss/train': 2.230008125305176} +07/25/2024 11:51:31 - INFO - __main__ - Step 5561: {'lr': 0.0004986938767589453, 'samples': 266928, 'steps': 5560, 'loss/train': 2.3467235565185547} +07/25/2024 11:51:32 - INFO - __main__ - Step 5562: {'lr': 0.0004986933396731284, 'samples': 266976, 'steps': 5561, 'loss/train': 1.9879374504089355} +07/25/2024 11:51:32 - INFO - __main__ - Step 5563: {'lr': 0.0004986928024771972, 'samples': 267024, 'steps': 5562, 'loss/train': 0.30018696188926697} +07/25/2024 11:51:32 - INFO - __main__ - Step 5564: {'lr': 0.0004986922651711518, 'samples': 267072, 'steps': 5563, 'loss/train': 2.1310811042785645} +07/25/2024 11:51:33 - INFO - __main__ - Step 5565: {'lr': 0.0004986917277549923, 'samples': 267120, 'steps': 5564, 'loss/train': 1.6644067764282227} +07/25/2024 11:51:33 - INFO - __main__ - Step 5566: {'lr': 0.0004986911902287192, 'samples': 267168, 'steps': 5565, 'loss/train': 2.760545253753662} +07/25/2024 11:51:33 - INFO - __main__ - Step 5567: {'lr': 0.0004986906525923325, 'samples': 267216, 'steps': 5566, 'loss/train': 2.303407669067383} +07/25/2024 11:51:33 - INFO - __main__ - Step 5568: {'lr': 0.0004986901148458326, 'samples': 267264, 'steps': 5567, 'loss/train': 1.5954289436340332} +07/25/2024 11:51:34 - INFO - __main__ - Step 5569: {'lr': 0.0004986895769892197, 'samples': 267312, 'steps': 5568, 'loss/train': 2.341801404953003} +07/25/2024 11:51:34 - INFO - __main__ - Step 5570: {'lr': 0.000498689039022494, 'samples': 267360, 'steps': 5569, 'loss/train': 1.0580116510391235} +07/25/2024 11:51:34 - INFO - __main__ - Step 5571: {'lr': 0.0004986885009456558, 'samples': 267408, 'steps': 5570, 'loss/train': 1.6792899370193481} +07/25/2024 11:51:35 - INFO - __main__ - Step 5572: {'lr': 0.0004986879627587052, 'samples': 267456, 'steps': 5571, 'loss/train': 2.4355781078338623} +07/25/2024 11:51:35 - INFO - __main__ - Step 5573: {'lr': 0.0004986874244616426, 'samples': 267504, 'steps': 5572, 'loss/train': 2.298274517059326} +07/25/2024 11:51:35 - INFO - __main__ - Step 5574: {'lr': 0.0004986868860544682, 'samples': 267552, 'steps': 5573, 'loss/train': 2.726754665374756} +07/25/2024 11:51:35 - INFO - __main__ - Step 5575: {'lr': 0.0004986863475371822, 'samples': 267600, 'steps': 5574, 'loss/train': 1.8186312913894653} +07/25/2024 11:51:36 - INFO - __main__ - Step 5576: {'lr': 0.0004986858089097849, 'samples': 267648, 'steps': 5575, 'loss/train': 2.4069316387176514} +07/25/2024 11:51:36 - INFO - __main__ - Step 5577: {'lr': 0.0004986852701722764, 'samples': 267696, 'steps': 5576, 'loss/train': 1.7536160945892334} +07/25/2024 11:51:36 - INFO - __main__ - Step 5578: {'lr': 0.0004986847313246571, 'samples': 267744, 'steps': 5577, 'loss/train': 1.829189419746399} +07/25/2024 11:51:37 - INFO - __main__ - Step 5579: {'lr': 0.0004986841923669271, 'samples': 267792, 'steps': 5578, 'loss/train': 2.031369924545288} +07/25/2024 11:51:37 - INFO - __main__ - Step 5580: {'lr': 0.0004986836532990867, 'samples': 267840, 'steps': 5579, 'loss/train': 2.4704346656799316} +07/25/2024 11:51:37 - INFO - __main__ - Step 5581: {'lr': 0.0004986831141211362, 'samples': 267888, 'steps': 5580, 'loss/train': 1.9917701482772827} +07/25/2024 11:51:37 - INFO - __main__ - Step 5582: {'lr': 0.0004986825748330759, 'samples': 267936, 'steps': 5581, 'loss/train': 2.123387098312378} +07/25/2024 11:51:38 - INFO - __main__ - Step 5583: {'lr': 0.0004986820354349057, 'samples': 267984, 'steps': 5582, 'loss/train': 1.8427573442459106} +07/25/2024 11:51:38 - INFO - __main__ - Step 5584: {'lr': 0.0004986814959266261, 'samples': 268032, 'steps': 5583, 'loss/train': 2.736349105834961} +07/25/2024 11:51:38 - INFO - __main__ - Step 5585: {'lr': 0.0004986809563082375, 'samples': 268080, 'steps': 5584, 'loss/train': 2.439271926879883} +07/25/2024 11:51:38 - INFO - __main__ - Step 5586: {'lr': 0.0004986804165797397, 'samples': 268128, 'steps': 5585, 'loss/train': 2.001441240310669} +07/25/2024 11:51:39 - INFO - __main__ - Step 5587: {'lr': 0.0004986798767411334, 'samples': 268176, 'steps': 5586, 'loss/train': 0.5955881476402283} +07/25/2024 11:51:39 - INFO - __main__ - Step 5588: {'lr': 0.0004986793367924184, 'samples': 268224, 'steps': 5587, 'loss/train': 2.1421451568603516} +07/25/2024 11:51:39 - INFO - __main__ - Step 5589: {'lr': 0.0004986787967335952, 'samples': 268272, 'steps': 5588, 'loss/train': 2.3373773097991943} +07/25/2024 11:51:40 - INFO - __main__ - Step 5590: {'lr': 0.0004986782565646642, 'samples': 268320, 'steps': 5589, 'loss/train': 2.158889055252075} +07/25/2024 11:51:40 - INFO - __main__ - Step 5591: {'lr': 0.0004986777162856253, 'samples': 268368, 'steps': 5590, 'loss/train': 2.1448922157287598} +07/25/2024 11:51:40 - INFO - __main__ - Step 5592: {'lr': 0.0004986771758964789, 'samples': 268416, 'steps': 5591, 'loss/train': 2.4536428451538086} +07/25/2024 11:51:40 - INFO - __main__ - Step 5593: {'lr': 0.0004986766353972252, 'samples': 268464, 'steps': 5592, 'loss/train': 2.3981196880340576} +07/25/2024 11:51:41 - INFO - __main__ - Step 5594: {'lr': 0.0004986760947878644, 'samples': 268512, 'steps': 5593, 'loss/train': 1.3435122966766357} +07/25/2024 11:51:41 - INFO - __main__ - Step 5595: {'lr': 0.0004986755540683968, 'samples': 268560, 'steps': 5594, 'loss/train': 1.8040447235107422} +07/25/2024 11:51:41 - INFO - __main__ - Step 5596: {'lr': 0.0004986750132388227, 'samples': 268608, 'steps': 5595, 'loss/train': 2.1382181644439697} +07/25/2024 11:51:42 - INFO - __main__ - Step 5597: {'lr': 0.0004986744722991422, 'samples': 268656, 'steps': 5596, 'loss/train': 2.4168779850006104} +07/25/2024 11:51:42 - INFO - __main__ - Step 5598: {'lr': 0.0004986739312493557, 'samples': 268704, 'steps': 5597, 'loss/train': 1.7041150331497192} +07/25/2024 11:51:42 - INFO - __main__ - Step 5599: {'lr': 0.0004986733900894633, 'samples': 268752, 'steps': 5598, 'loss/train': 1.9364453554153442} +07/25/2024 11:51:42 - INFO - __main__ - Step 5600: {'lr': 0.0004986728488194653, 'samples': 268800, 'steps': 5599, 'loss/train': 2.2673182487487793} +07/25/2024 11:51:43 - INFO - __main__ - Step 5601: {'lr': 0.0004986723074393619, 'samples': 268848, 'steps': 5600, 'loss/train': 2.276794910430908} +07/25/2024 11:51:43 - INFO - __main__ - Step 5602: {'lr': 0.0004986717659491534, 'samples': 268896, 'steps': 5601, 'loss/train': 2.2096943855285645} +07/25/2024 11:51:43 - INFO - __main__ - Step 5603: {'lr': 0.00049867122434884, 'samples': 268944, 'steps': 5602, 'loss/train': 2.5473787784576416} +07/25/2024 11:51:44 - INFO - __main__ - Step 5604: {'lr': 0.0004986706826384219, 'samples': 268992, 'steps': 5603, 'loss/train': 2.632108688354492} +07/25/2024 11:51:44 - INFO - __main__ - Step 5605: {'lr': 0.0004986701408178994, 'samples': 269040, 'steps': 5604, 'loss/train': 2.509660482406616} +07/25/2024 11:51:44 - INFO - __main__ - Step 5606: {'lr': 0.0004986695988872728, 'samples': 269088, 'steps': 5605, 'loss/train': 2.2253029346466064} +07/25/2024 11:51:44 - INFO - __main__ - Step 5607: {'lr': 0.0004986690568465423, 'samples': 269136, 'steps': 5606, 'loss/train': 1.5290238857269287} +07/25/2024 11:51:45 - INFO - __main__ - Step 5608: {'lr': 0.000498668514695708, 'samples': 269184, 'steps': 5607, 'loss/train': 2.5958425998687744} +07/25/2024 11:51:45 - INFO - __main__ - Step 5609: {'lr': 0.0004986679724347704, 'samples': 269232, 'steps': 5608, 'loss/train': 1.2975200414657593} +07/25/2024 11:51:45 - INFO - __main__ - Step 5610: {'lr': 0.0004986674300637294, 'samples': 269280, 'steps': 5609, 'loss/train': 2.0869457721710205} +07/25/2024 11:51:46 - INFO - __main__ - Step 5611: {'lr': 0.0004986668875825855, 'samples': 269328, 'steps': 5610, 'loss/train': 0.4100489318370819} +07/25/2024 11:51:46 - INFO - __main__ - Step 5612: {'lr': 0.0004986663449913389, 'samples': 269376, 'steps': 5611, 'loss/train': 2.19346284866333} +07/25/2024 11:51:46 - INFO - __main__ - Step 5613: {'lr': 0.0004986658022899898, 'samples': 269424, 'steps': 5612, 'loss/train': 2.3145368099212646} +07/25/2024 11:51:46 - INFO - __main__ - Step 5614: {'lr': 0.0004986652594785384, 'samples': 269472, 'steps': 5613, 'loss/train': 2.057079315185547} +07/25/2024 11:51:47 - INFO - __main__ - Step 5615: {'lr': 0.0004986647165569851, 'samples': 269520, 'steps': 5614, 'loss/train': 2.2641818523406982} +07/25/2024 11:51:47 - INFO - __main__ - Step 5616: {'lr': 0.0004986641735253299, 'samples': 269568, 'steps': 5615, 'loss/train': 2.2847955226898193} +07/25/2024 11:51:47 - INFO - __main__ - Step 5617: {'lr': 0.0004986636303835732, 'samples': 269616, 'steps': 5616, 'loss/train': 1.9328426122665405} +07/25/2024 11:51:48 - INFO - __main__ - Step 5618: {'lr': 0.0004986630871317152, 'samples': 269664, 'steps': 5617, 'loss/train': 2.2812910079956055} +07/25/2024 11:51:48 - INFO - __main__ - Step 5619: {'lr': 0.0004986625437697563, 'samples': 269712, 'steps': 5618, 'loss/train': 2.4933090209960938} +07/25/2024 11:51:48 - INFO - __main__ - Step 5620: {'lr': 0.0004986620002976965, 'samples': 269760, 'steps': 5619, 'loss/train': 2.147031784057617} +07/25/2024 11:51:48 - INFO - __main__ - Step 5621: {'lr': 0.0004986614567155361, 'samples': 269808, 'steps': 5620, 'loss/train': 2.595937728881836} +07/25/2024 11:51:49 - INFO - __main__ - Step 5622: {'lr': 0.0004986609130232754, 'samples': 269856, 'steps': 5621, 'loss/train': 1.3103604316711426} +07/25/2024 11:51:49 - INFO - __main__ - Step 5623: {'lr': 0.0004986603692209146, 'samples': 269904, 'steps': 5622, 'loss/train': 2.246671438217163} +07/25/2024 11:51:49 - INFO - __main__ - Step 5624: {'lr': 0.000498659825308454, 'samples': 269952, 'steps': 5623, 'loss/train': 2.243870973587036} +07/25/2024 11:51:50 - INFO - __main__ - Step 5625: {'lr': 0.0004986592812858938, 'samples': 270000, 'steps': 5624, 'loss/train': 1.853823184967041} +07/25/2024 11:51:50 - INFO - __main__ - Step 5626: {'lr': 0.0004986587371532342, 'samples': 270048, 'steps': 5625, 'loss/train': 2.1333250999450684} +07/25/2024 11:51:50 - INFO - __main__ - Step 5627: {'lr': 0.0004986581929104755, 'samples': 270096, 'steps': 5626, 'loss/train': 2.3853368759155273} +07/25/2024 11:51:50 - INFO - __main__ - Step 5628: {'lr': 0.0004986576485576179, 'samples': 270144, 'steps': 5627, 'loss/train': 1.967094898223877} +07/25/2024 11:51:51 - INFO - __main__ - Step 5629: {'lr': 0.0004986571040946617, 'samples': 270192, 'steps': 5628, 'loss/train': 2.355783462524414} +07/25/2024 11:51:51 - INFO - __main__ - Step 5630: {'lr': 0.0004986565595216071, 'samples': 270240, 'steps': 5629, 'loss/train': 1.4705185890197754} +07/25/2024 11:51:51 - INFO - __main__ - Step 5631: {'lr': 0.0004986560148384542, 'samples': 270288, 'steps': 5630, 'loss/train': 2.2903170585632324} +07/25/2024 11:51:52 - INFO - __main__ - Step 5632: {'lr': 0.0004986554700452035, 'samples': 270336, 'steps': 5631, 'loss/train': 2.0391950607299805} +07/25/2024 11:51:52 - INFO - __main__ - Step 5633: {'lr': 0.0004986549251418552, 'samples': 270384, 'steps': 5632, 'loss/train': 2.6629931926727295} +07/25/2024 11:51:52 - INFO - __main__ - Step 5634: {'lr': 0.0004986543801284094, 'samples': 270432, 'steps': 5633, 'loss/train': 2.5847983360290527} +07/25/2024 11:51:52 - INFO - __main__ - Step 5635: {'lr': 0.0004986538350048663, 'samples': 270480, 'steps': 5634, 'loss/train': 0.4253825545310974} +07/25/2024 11:51:53 - INFO - __main__ - Step 5636: {'lr': 0.0004986532897712264, 'samples': 270528, 'steps': 5635, 'loss/train': 2.324761390686035} +07/25/2024 11:51:53 - INFO - __main__ - Step 5637: {'lr': 0.0004986527444274897, 'samples': 270576, 'steps': 5636, 'loss/train': 2.9325156211853027} +07/25/2024 11:51:53 - INFO - __main__ - Step 5638: {'lr': 0.0004986521989736566, 'samples': 270624, 'steps': 5637, 'loss/train': 1.6617709398269653} +07/25/2024 11:51:54 - INFO - __main__ - Step 5639: {'lr': 0.0004986516534097272, 'samples': 270672, 'steps': 5638, 'loss/train': 2.29742169380188} +07/25/2024 11:51:54 - INFO - __main__ - Step 5640: {'lr': 0.0004986511077357019, 'samples': 270720, 'steps': 5639, 'loss/train': 2.2416586875915527} +07/25/2024 11:51:54 - INFO - __main__ - Step 5641: {'lr': 0.0004986505619515808, 'samples': 270768, 'steps': 5640, 'loss/train': 2.4638566970825195} +07/25/2024 11:51:54 - INFO - __main__ - Step 5642: {'lr': 0.0004986500160573642, 'samples': 270816, 'steps': 5641, 'loss/train': 2.4694600105285645} +07/25/2024 11:51:55 - INFO - __main__ - Step 5643: {'lr': 0.0004986494700530524, 'samples': 270864, 'steps': 5642, 'loss/train': 1.9896461963653564} +07/25/2024 11:51:55 - INFO - __main__ - Step 5644: {'lr': 0.0004986489239386455, 'samples': 270912, 'steps': 5643, 'loss/train': 2.5070202350616455} +07/25/2024 11:51:55 - INFO - __main__ - Step 5645: {'lr': 0.0004986483777141438, 'samples': 270960, 'steps': 5644, 'loss/train': 2.0749120712280273} +07/25/2024 11:51:56 - INFO - __main__ - Step 5646: {'lr': 0.0004986478313795476, 'samples': 271008, 'steps': 5645, 'loss/train': 1.9566612243652344} +07/25/2024 11:51:56 - INFO - __main__ - Step 5647: {'lr': 0.0004986472849348571, 'samples': 271056, 'steps': 5646, 'loss/train': 2.4956932067871094} +07/25/2024 11:51:56 - INFO - __main__ - Step 5648: {'lr': 0.0004986467383800727, 'samples': 271104, 'steps': 5647, 'loss/train': 2.1438629627227783} +07/25/2024 11:51:56 - INFO - __main__ - Step 5649: {'lr': 0.0004986461917151943, 'samples': 271152, 'steps': 5648, 'loss/train': 2.6287267208099365} +07/25/2024 11:51:57 - INFO - __main__ - Step 5650: {'lr': 0.0004986456449402224, 'samples': 271200, 'steps': 5649, 'loss/train': 0.726989209651947} +07/25/2024 11:51:57 - INFO - __main__ - Step 5651: {'lr': 0.0004986450980551572, 'samples': 271248, 'steps': 5650, 'loss/train': 2.4279797077178955} +07/25/2024 11:51:57 - INFO - __main__ - Step 5652: {'lr': 0.0004986445510599989, 'samples': 271296, 'steps': 5651, 'loss/train': 2.3373594284057617} +07/25/2024 11:51:58 - INFO - __main__ - Step 5653: {'lr': 0.0004986440039547478, 'samples': 271344, 'steps': 5652, 'loss/train': 2.215538501739502} +07/25/2024 11:51:58 - INFO - __main__ - Step 5654: {'lr': 0.000498643456739404, 'samples': 271392, 'steps': 5653, 'loss/train': 2.333204984664917} +07/25/2024 11:51:58 - INFO - __main__ - Step 5655: {'lr': 0.000498642909413968, 'samples': 271440, 'steps': 5654, 'loss/train': 2.4049761295318604} +07/25/2024 11:51:58 - INFO - __main__ - Step 5656: {'lr': 0.0004986423619784397, 'samples': 271488, 'steps': 5655, 'loss/train': 2.341984987258911} +07/25/2024 11:51:59 - INFO - __main__ - Step 5657: {'lr': 0.0004986418144328196, 'samples': 271536, 'steps': 5656, 'loss/train': 2.0013139247894287} +07/25/2024 11:51:59 - INFO - __main__ - Step 5658: {'lr': 0.0004986412667771079, 'samples': 271584, 'steps': 5657, 'loss/train': 2.137012243270874} +07/25/2024 11:51:59 - INFO - __main__ - Step 5659: {'lr': 0.0004986407190113048, 'samples': 271632, 'steps': 5658, 'loss/train': 0.40097662806510925} +07/25/2024 11:52:00 - INFO - __main__ - Step 5660: {'lr': 0.0004986401711354106, 'samples': 271680, 'steps': 5659, 'loss/train': 2.0515034198760986} +07/25/2024 11:52:00 - INFO - __main__ - Step 5661: {'lr': 0.0004986396231494254, 'samples': 271728, 'steps': 5660, 'loss/train': 2.628570318222046} +07/25/2024 11:52:00 - INFO - __main__ - Step 5662: {'lr': 0.0004986390750533497, 'samples': 271776, 'steps': 5661, 'loss/train': 1.5437417030334473} +07/25/2024 11:52:00 - INFO - __main__ - Step 5663: {'lr': 0.0004986385268471833, 'samples': 271824, 'steps': 5662, 'loss/train': 2.2007029056549072} +07/25/2024 11:52:01 - INFO - __main__ - Step 5664: {'lr': 0.000498637978530927, 'samples': 271872, 'steps': 5663, 'loss/train': 2.0518155097961426} +07/25/2024 11:52:01 - INFO - __main__ - Step 5665: {'lr': 0.0004986374301045807, 'samples': 271920, 'steps': 5664, 'loss/train': 1.8190549612045288} +07/25/2024 11:52:01 - INFO - __main__ - Step 5666: {'lr': 0.0004986368815681447, 'samples': 271968, 'steps': 5665, 'loss/train': 2.0582261085510254} +07/25/2024 11:52:01 - INFO - __main__ - Step 5667: {'lr': 0.0004986363329216192, 'samples': 272016, 'steps': 5666, 'loss/train': 2.682976484298706} +07/25/2024 11:52:02 - INFO - __main__ - Step 5668: {'lr': 0.0004986357841650045, 'samples': 272064, 'steps': 5667, 'loss/train': 2.0647165775299072} +07/25/2024 11:52:02 - INFO - __main__ - Step 5669: {'lr': 0.0004986352352983009, 'samples': 272112, 'steps': 5668, 'loss/train': 2.315932035446167} +07/25/2024 11:52:02 - INFO - __main__ - Step 5670: {'lr': 0.0004986346863215086, 'samples': 272160, 'steps': 5669, 'loss/train': 2.221356153488159} +07/25/2024 11:52:03 - INFO - __main__ - Step 5671: {'lr': 0.0004986341372346278, 'samples': 272208, 'steps': 5670, 'loss/train': 2.1244382858276367} +07/25/2024 11:52:03 - INFO - __main__ - Step 5672: {'lr': 0.0004986335880376588, 'samples': 272256, 'steps': 5671, 'loss/train': 2.0935449600219727} +07/25/2024 11:52:03 - INFO - __main__ - Step 5673: {'lr': 0.0004986330387306017, 'samples': 272304, 'steps': 5672, 'loss/train': 2.104928493499756} +07/25/2024 11:52:03 - INFO - __main__ - Step 5674: {'lr': 0.000498632489313457, 'samples': 272352, 'steps': 5673, 'loss/train': 2.365067481994629} +07/25/2024 11:52:04 - INFO - __main__ - Step 5675: {'lr': 0.0004986319397862248, 'samples': 272400, 'steps': 5674, 'loss/train': 2.4556150436401367} +07/25/2024 11:52:04 - INFO - __main__ - Step 5676: {'lr': 0.0004986313901489053, 'samples': 272448, 'steps': 5675, 'loss/train': 1.7284144163131714} +07/25/2024 11:52:04 - INFO - __main__ - Step 5677: {'lr': 0.0004986308404014987, 'samples': 272496, 'steps': 5676, 'loss/train': 2.400498867034912} +07/25/2024 11:52:05 - INFO - __main__ - Step 5678: {'lr': 0.0004986302905440054, 'samples': 272544, 'steps': 5677, 'loss/train': 1.2831677198410034} +07/25/2024 11:52:05 - INFO - __main__ - Step 5679: {'lr': 0.0004986297405764256, 'samples': 272592, 'steps': 5678, 'loss/train': 2.558450937271118} +07/25/2024 11:52:05 - INFO - __main__ - Step 5680: {'lr': 0.0004986291904987595, 'samples': 272640, 'steps': 5679, 'loss/train': 1.887244701385498} +07/25/2024 11:52:05 - INFO - __main__ - Step 5681: {'lr': 0.0004986286403110073, 'samples': 272688, 'steps': 5680, 'loss/train': 2.6247191429138184} +07/25/2024 11:52:06 - INFO - __main__ - Step 5682: {'lr': 0.0004986280900131694, 'samples': 272736, 'steps': 5681, 'loss/train': 2.736818790435791} +07/25/2024 11:52:06 - INFO - __main__ - Step 5683: {'lr': 0.0004986275396052459, 'samples': 272784, 'steps': 5682, 'loss/train': 0.3404773771762848} +07/25/2024 11:52:06 - INFO - __main__ - Step 5684: {'lr': 0.0004986269890872371, 'samples': 272832, 'steps': 5683, 'loss/train': 1.3140636682510376} +07/25/2024 11:52:07 - INFO - __main__ - Step 5685: {'lr': 0.0004986264384591432, 'samples': 272880, 'steps': 5684, 'loss/train': 2.3979692459106445} +07/25/2024 11:52:07 - INFO - __main__ - Step 5686: {'lr': 0.0004986258877209645, 'samples': 272928, 'steps': 5685, 'loss/train': 1.4592278003692627} +07/25/2024 11:52:07 - INFO - __main__ - Step 5687: {'lr': 0.0004986253368727013, 'samples': 272976, 'steps': 5686, 'loss/train': 2.02649188041687} +07/25/2024 11:52:07 - INFO - __main__ - Step 5688: {'lr': 0.0004986247859143537, 'samples': 273024, 'steps': 5687, 'loss/train': 2.067021608352661} +07/25/2024 11:52:08 - INFO - __main__ - Step 5689: {'lr': 0.000498624234845922, 'samples': 273072, 'steps': 5688, 'loss/train': 2.4124279022216797} +07/25/2024 11:52:08 - INFO - __main__ - Step 5690: {'lr': 0.0004986236836674064, 'samples': 273120, 'steps': 5689, 'loss/train': 1.6070767641067505} +07/25/2024 11:52:08 - INFO - __main__ - Step 5691: {'lr': 0.0004986231323788073, 'samples': 273168, 'steps': 5690, 'loss/train': 2.5772347450256348} +07/25/2024 11:52:09 - INFO - __main__ - Step 5692: {'lr': 0.0004986225809801248, 'samples': 273216, 'steps': 5691, 'loss/train': 2.2024757862091064} +07/25/2024 11:52:09 - INFO - __main__ - Step 5693: {'lr': 0.0004986220294713593, 'samples': 273264, 'steps': 5692, 'loss/train': 2.4468634128570557} +07/25/2024 11:52:09 - INFO - __main__ - Step 5694: {'lr': 0.0004986214778525108, 'samples': 273312, 'steps': 5693, 'loss/train': 2.7499992847442627} +07/25/2024 11:52:09 - INFO - __main__ - Step 5695: {'lr': 0.0004986209261235797, 'samples': 273360, 'steps': 5694, 'loss/train': 2.4199187755584717} +07/25/2024 11:52:10 - INFO - __main__ - Step 5696: {'lr': 0.0004986203742845662, 'samples': 273408, 'steps': 5695, 'loss/train': 1.8903459310531616} +07/25/2024 11:52:10 - INFO - __main__ - Step 5697: {'lr': 0.0004986198223354706, 'samples': 273456, 'steps': 5696, 'loss/train': 1.6713467836380005} +07/25/2024 11:52:10 - INFO - __main__ - Step 5698: {'lr': 0.0004986192702762932, 'samples': 273504, 'steps': 5697, 'loss/train': 2.3988020420074463} +07/25/2024 11:52:11 - INFO - __main__ - Step 5699: {'lr': 0.000498618718107034, 'samples': 273552, 'steps': 5698, 'loss/train': 2.227879047393799} +07/25/2024 11:52:11 - INFO - __main__ - Step 5700: {'lr': 0.0004986181658276934, 'samples': 273600, 'steps': 5699, 'loss/train': 2.0438451766967773} +07/25/2024 11:52:11 - INFO - __main__ - Step 5701: {'lr': 0.0004986176134382717, 'samples': 273648, 'steps': 5700, 'loss/train': 2.3349599838256836} +07/25/2024 11:52:11 - INFO - __main__ - Step 5702: {'lr': 0.0004986170609387691, 'samples': 273696, 'steps': 5701, 'loss/train': 0.7957969307899475} +07/25/2024 11:52:12 - INFO - __main__ - Step 5703: {'lr': 0.0004986165083291858, 'samples': 273744, 'steps': 5702, 'loss/train': 2.4256396293640137} +07/25/2024 11:52:12 - INFO - __main__ - Step 5704: {'lr': 0.0004986159556095221, 'samples': 273792, 'steps': 5703, 'loss/train': 2.77144455909729} +07/25/2024 11:52:12 - INFO - __main__ - Step 5705: {'lr': 0.0004986154027797783, 'samples': 273840, 'steps': 5704, 'loss/train': 2.5630176067352295} +07/25/2024 11:52:13 - INFO - __main__ - Step 5706: {'lr': 0.0004986148498399545, 'samples': 273888, 'steps': 5705, 'loss/train': 2.064042329788208} +07/25/2024 11:52:13 - INFO - __main__ - Step 5707: {'lr': 0.0004986142967900508, 'samples': 273936, 'steps': 5706, 'loss/train': 0.6239055395126343} +07/25/2024 11:52:13 - INFO - __main__ - Step 5708: {'lr': 0.000498613743630068, 'samples': 273984, 'steps': 5707, 'loss/train': 2.534717082977295} +07/25/2024 11:52:13 - INFO - __main__ - Step 5709: {'lr': 0.0004986131903600057, 'samples': 274032, 'steps': 5708, 'loss/train': 2.021624803543091} +07/25/2024 11:52:14 - INFO - __main__ - Step 5710: {'lr': 0.0004986126369798646, 'samples': 274080, 'steps': 5709, 'loss/train': 1.9986112117767334} +07/25/2024 11:52:14 - INFO - __main__ - Step 5711: {'lr': 0.0004986120834896448, 'samples': 274128, 'steps': 5710, 'loss/train': 2.196171283721924} +07/25/2024 11:52:14 - INFO - __main__ - Step 5712: {'lr': 0.0004986115298893465, 'samples': 274176, 'steps': 5711, 'loss/train': 2.3782646656036377} +07/25/2024 11:52:15 - INFO - __main__ - Step 5713: {'lr': 0.00049861097617897, 'samples': 274224, 'steps': 5712, 'loss/train': 2.0506114959716797} +07/25/2024 11:52:15 - INFO - __main__ - Step 5714: {'lr': 0.0004986104223585156, 'samples': 274272, 'steps': 5713, 'loss/train': 1.9304667711257935} +07/25/2024 11:52:15 - INFO - __main__ - Step 5715: {'lr': 0.0004986098684279834, 'samples': 274320, 'steps': 5714, 'loss/train': 1.9420855045318604} +07/25/2024 11:52:15 - INFO - __main__ - Step 5716: {'lr': 0.0004986093143873737, 'samples': 274368, 'steps': 5715, 'loss/train': 2.0043704509735107} +07/25/2024 11:52:16 - INFO - __main__ - Step 5717: {'lr': 0.0004986087602366867, 'samples': 274416, 'steps': 5716, 'loss/train': 2.330782890319824} +07/25/2024 11:52:16 - INFO - __main__ - Step 5718: {'lr': 0.0004986082059759228, 'samples': 274464, 'steps': 5717, 'loss/train': 2.9592199325561523} +07/25/2024 11:52:16 - INFO - __main__ - Step 5719: {'lr': 0.0004986076516050822, 'samples': 274512, 'steps': 5718, 'loss/train': 2.378661632537842} +07/25/2024 11:52:17 - INFO - __main__ - Step 5720: {'lr': 0.0004986070971241649, 'samples': 274560, 'steps': 5719, 'loss/train': 1.8793935775756836} +07/25/2024 11:52:17 - INFO - __main__ - Step 5721: {'lr': 0.0004986065425331715, 'samples': 274608, 'steps': 5720, 'loss/train': 2.2430005073547363} +07/25/2024 11:52:17 - INFO - __main__ - Step 5722: {'lr': 0.0004986059878321021, 'samples': 274656, 'steps': 5721, 'loss/train': 2.0830249786376953} +07/25/2024 11:52:17 - INFO - __main__ - Step 5723: {'lr': 0.0004986054330209568, 'samples': 274704, 'steps': 5722, 'loss/train': 2.501539707183838} +07/25/2024 11:52:18 - INFO - __main__ - Step 5724: {'lr': 0.0004986048780997362, 'samples': 274752, 'steps': 5723, 'loss/train': 2.3339951038360596} +07/25/2024 11:52:18 - INFO - __main__ - Step 5725: {'lr': 0.0004986043230684402, 'samples': 274800, 'steps': 5724, 'loss/train': 2.288402557373047} +07/25/2024 11:52:18 - INFO - __main__ - Step 5726: {'lr': 0.0004986037679270692, 'samples': 274848, 'steps': 5725, 'loss/train': 2.103170156478882} +07/25/2024 11:52:19 - INFO - __main__ - Step 5727: {'lr': 0.0004986032126756233, 'samples': 274896, 'steps': 5726, 'loss/train': 1.8515876531600952} +07/25/2024 11:52:19 - INFO - __main__ - Step 5728: {'lr': 0.0004986026573141029, 'samples': 274944, 'steps': 5727, 'loss/train': 2.4143755435943604} +07/25/2024 11:52:19 - INFO - __main__ - Step 5729: {'lr': 0.0004986021018425083, 'samples': 274992, 'steps': 5728, 'loss/train': 2.5756428241729736} +07/25/2024 11:52:19 - INFO - __main__ - Step 5730: {'lr': 0.0004986015462608396, 'samples': 275040, 'steps': 5729, 'loss/train': 1.617811679840088} +07/25/2024 11:52:20 - INFO - __main__ - Step 5731: {'lr': 0.0004986009905690971, 'samples': 275088, 'steps': 5730, 'loss/train': 0.48669126629829407} +07/25/2024 11:52:20 - INFO - __main__ - Step 5732: {'lr': 0.0004986004347672811, 'samples': 275136, 'steps': 5731, 'loss/train': 2.472548723220825} +07/25/2024 11:52:20 - INFO - __main__ - Step 5733: {'lr': 0.0004985998788553917, 'samples': 275184, 'steps': 5732, 'loss/train': 1.9763988256454468} +07/25/2024 11:52:21 - INFO - __main__ - Step 5734: {'lr': 0.0004985993228334293, 'samples': 275232, 'steps': 5733, 'loss/train': 1.6795953512191772} +07/25/2024 11:52:21 - INFO - __main__ - Step 5735: {'lr': 0.0004985987667013942, 'samples': 275280, 'steps': 5734, 'loss/train': 1.806861162185669} +07/25/2024 11:52:21 - INFO - __main__ - Step 5736: {'lr': 0.0004985982104592863, 'samples': 275328, 'steps': 5735, 'loss/train': 2.095963716506958} +07/25/2024 11:52:21 - INFO - __main__ - Step 5737: {'lr': 0.0004985976541071062, 'samples': 275376, 'steps': 5736, 'loss/train': 2.328310012817383} +07/25/2024 11:52:22 - INFO - __main__ - Step 5738: {'lr': 0.000498597097644854, 'samples': 275424, 'steps': 5737, 'loss/train': 2.3228416442871094} +07/25/2024 11:52:22 - INFO - __main__ - Step 5739: {'lr': 0.00049859654107253, 'samples': 275472, 'steps': 5738, 'loss/train': 2.027501344680786} +07/25/2024 11:52:22 - INFO - __main__ - Step 5740: {'lr': 0.0004985959843901344, 'samples': 275520, 'steps': 5739, 'loss/train': 1.9938725233078003} +07/25/2024 11:52:23 - INFO - __main__ - Step 5741: {'lr': 0.0004985954275976676, 'samples': 275568, 'steps': 5740, 'loss/train': 1.823651671409607} +07/25/2024 11:52:23 - INFO - __main__ - Step 5742: {'lr': 0.0004985948706951296, 'samples': 275616, 'steps': 5741, 'loss/train': 2.845276117324829} +07/25/2024 11:52:23 - INFO - __main__ - Step 5743: {'lr': 0.0004985943136825206, 'samples': 275664, 'steps': 5742, 'loss/train': 1.5911622047424316} +07/25/2024 11:52:23 - INFO - __main__ - Step 5744: {'lr': 0.0004985937565598412, 'samples': 275712, 'steps': 5743, 'loss/train': 2.5732932090759277} +07/25/2024 11:52:24 - INFO - __main__ - Step 5745: {'lr': 0.0004985931993270914, 'samples': 275760, 'steps': 5744, 'loss/train': 3.3548760414123535} +07/25/2024 11:52:24 - INFO - __main__ - Step 5746: {'lr': 0.0004985926419842715, 'samples': 275808, 'steps': 5745, 'loss/train': 1.785244345664978} +07/25/2024 11:52:24 - INFO - __main__ - Step 5747: {'lr': 0.0004985920845313817, 'samples': 275856, 'steps': 5746, 'loss/train': 1.904127597808838} +07/25/2024 11:52:25 - INFO - __main__ - Step 5748: {'lr': 0.0004985915269684223, 'samples': 275904, 'steps': 5747, 'loss/train': 2.3300273418426514} +07/25/2024 11:52:25 - INFO - __main__ - Step 5749: {'lr': 0.0004985909692953936, 'samples': 275952, 'steps': 5748, 'loss/train': 0.4713479280471802} +07/25/2024 11:52:25 - INFO - __main__ - Step 5750: {'lr': 0.0004985904115122956, 'samples': 276000, 'steps': 5749, 'loss/train': 2.1658568382263184} +07/25/2024 11:52:25 - INFO - __main__ - Step 5751: {'lr': 0.000498589853619129, 'samples': 276048, 'steps': 5750, 'loss/train': 1.8611195087432861} +07/25/2024 11:52:26 - INFO - __main__ - Step 5752: {'lr': 0.0004985892956158937, 'samples': 276096, 'steps': 5751, 'loss/train': 2.181758403778076} +07/25/2024 11:52:26 - INFO - __main__ - Step 5753: {'lr': 0.0004985887375025899, 'samples': 276144, 'steps': 5752, 'loss/train': 2.619581460952759} +07/25/2024 11:52:26 - INFO - __main__ - Step 5754: {'lr': 0.0004985881792792181, 'samples': 276192, 'steps': 5753, 'loss/train': 2.1091649532318115} +07/25/2024 11:52:26 - INFO - __main__ - Step 5755: {'lr': 0.0004985876209457783, 'samples': 276240, 'steps': 5754, 'loss/train': 0.40401482582092285} +07/25/2024 11:52:27 - INFO - __main__ - Step 5756: {'lr': 0.0004985870625022709, 'samples': 276288, 'steps': 5755, 'loss/train': 2.381122589111328} +07/25/2024 11:52:27 - INFO - __main__ - Step 5757: {'lr': 0.0004985865039486962, 'samples': 276336, 'steps': 5756, 'loss/train': 2.4326720237731934} +07/25/2024 11:52:27 - INFO - __main__ - Step 5758: {'lr': 0.0004985859452850543, 'samples': 276384, 'steps': 5757, 'loss/train': 1.7574405670166016} +07/25/2024 11:52:28 - INFO - __main__ - Step 5759: {'lr': 0.0004985853865113456, 'samples': 276432, 'steps': 5758, 'loss/train': 2.3961589336395264} +07/25/2024 11:52:28 - INFO - __main__ - Step 5760: {'lr': 0.00049858482762757, 'samples': 276480, 'steps': 5759, 'loss/train': 2.070584297180176} +07/25/2024 11:52:28 - INFO - __main__ - Step 5761: {'lr': 0.0004985842686337283, 'samples': 276528, 'steps': 5760, 'loss/train': 1.374784231185913} +07/25/2024 11:52:28 - INFO - __main__ - Step 5762: {'lr': 0.0004985837095298203, 'samples': 276576, 'steps': 5761, 'loss/train': 1.9102869033813477} +07/25/2024 11:52:29 - INFO - __main__ - Step 5763: {'lr': 0.0004985831503158464, 'samples': 276624, 'steps': 5762, 'loss/train': 1.7502394914627075} +07/25/2024 11:52:29 - INFO - __main__ - Step 5764: {'lr': 0.0004985825909918069, 'samples': 276672, 'steps': 5763, 'loss/train': 2.3455288410186768} +07/25/2024 11:52:29 - INFO - __main__ - Step 5765: {'lr': 0.000498582031557702, 'samples': 276720, 'steps': 5764, 'loss/train': 2.025052070617676} +07/25/2024 11:52:30 - INFO - __main__ - Step 5766: {'lr': 0.0004985814720135319, 'samples': 276768, 'steps': 5765, 'loss/train': 2.140096664428711} +07/25/2024 11:52:30 - INFO - __main__ - Step 5767: {'lr': 0.0004985809123592968, 'samples': 276816, 'steps': 5766, 'loss/train': 1.6353257894515991} +07/25/2024 11:52:30 - INFO - __main__ - Step 5768: {'lr': 0.0004985803525949972, 'samples': 276864, 'steps': 5767, 'loss/train': 1.8513286113739014} +07/25/2024 11:52:30 - INFO - __main__ - Step 5769: {'lr': 0.0004985797927206331, 'samples': 276912, 'steps': 5768, 'loss/train': 3.330400228500366} +07/25/2024 11:52:31 - INFO - __main__ - Step 5770: {'lr': 0.0004985792327362048, 'samples': 276960, 'steps': 5769, 'loss/train': 2.0926108360290527} +07/25/2024 11:52:31 - INFO - __main__ - Step 5771: {'lr': 0.0004985786726417127, 'samples': 277008, 'steps': 5770, 'loss/train': 2.035808801651001} +07/25/2024 11:52:31 - INFO - __main__ - Step 5772: {'lr': 0.0004985781124371569, 'samples': 277056, 'steps': 5771, 'loss/train': 2.290046453475952} +07/25/2024 11:52:32 - INFO - __main__ - Step 5773: {'lr': 0.0004985775521225376, 'samples': 277104, 'steps': 5772, 'loss/train': 0.2595573365688324} +07/25/2024 11:52:32 - INFO - __main__ - Step 5774: {'lr': 0.0004985769916978552, 'samples': 277152, 'steps': 5773, 'loss/train': 2.644491672515869} +07/25/2024 11:52:32 - INFO - __main__ - Step 5775: {'lr': 0.0004985764311631098, 'samples': 277200, 'steps': 5774, 'loss/train': 2.0104944705963135} +07/25/2024 11:52:32 - INFO - __main__ - Step 5776: {'lr': 0.0004985758705183018, 'samples': 277248, 'steps': 5775, 'loss/train': 1.7067948579788208} +07/25/2024 11:52:33 - INFO - __main__ - Step 5777: {'lr': 0.0004985753097634313, 'samples': 277296, 'steps': 5776, 'loss/train': 2.626863956451416} +07/25/2024 11:52:33 - INFO - __main__ - Step 5778: {'lr': 0.0004985747488984987, 'samples': 277344, 'steps': 5777, 'loss/train': 2.2764291763305664} +07/25/2024 11:52:33 - INFO - __main__ - Step 5779: {'lr': 0.000498574187923504, 'samples': 277392, 'steps': 5778, 'loss/train': 0.2719467878341675} +07/25/2024 11:52:34 - INFO - __main__ - Step 5780: {'lr': 0.0004985736268384478, 'samples': 277440, 'steps': 5779, 'loss/train': 2.3154220581054688} +07/25/2024 11:52:34 - INFO - __main__ - Step 5781: {'lr': 0.0004985730656433301, 'samples': 277488, 'steps': 5780, 'loss/train': 2.6407954692840576} +07/25/2024 11:52:34 - INFO - __main__ - Step 5782: {'lr': 0.0004985725043381512, 'samples': 277536, 'steps': 5781, 'loss/train': 1.8418160676956177} +07/25/2024 11:52:34 - INFO - __main__ - Step 5783: {'lr': 0.0004985719429229113, 'samples': 277584, 'steps': 5782, 'loss/train': 2.3851940631866455} +07/25/2024 11:52:35 - INFO - __main__ - Step 5784: {'lr': 0.0004985713813976108, 'samples': 277632, 'steps': 5783, 'loss/train': 2.2041375637054443} +07/25/2024 11:52:35 - INFO - __main__ - Step 5785: {'lr': 0.0004985708197622499, 'samples': 277680, 'steps': 5784, 'loss/train': 2.1389031410217285} +07/25/2024 11:52:35 - INFO - __main__ - Step 5786: {'lr': 0.0004985702580168287, 'samples': 277728, 'steps': 5785, 'loss/train': 2.073913335800171} +07/25/2024 11:52:36 - INFO - __main__ - Step 5787: {'lr': 0.0004985696961613476, 'samples': 277776, 'steps': 5786, 'loss/train': 2.1238515377044678} +07/25/2024 11:52:36 - INFO - __main__ - Step 5788: {'lr': 0.0004985691341958067, 'samples': 277824, 'steps': 5787, 'loss/train': 2.5082004070281982} +07/25/2024 11:52:36 - INFO - __main__ - Step 5789: {'lr': 0.0004985685721202065, 'samples': 277872, 'steps': 5788, 'loss/train': 1.9446488618850708} +07/25/2024 11:52:36 - INFO - __main__ - Step 5790: {'lr': 0.000498568009934547, 'samples': 277920, 'steps': 5789, 'loss/train': 1.9361377954483032} +07/25/2024 11:52:37 - INFO - __main__ - Step 5791: {'lr': 0.0004985674476388287, 'samples': 277968, 'steps': 5790, 'loss/train': 2.059079647064209} +07/25/2024 11:52:37 - INFO - __main__ - Step 5792: {'lr': 0.0004985668852330515, 'samples': 278016, 'steps': 5791, 'loss/train': 2.3257718086242676} +07/25/2024 11:52:37 - INFO - __main__ - Step 5793: {'lr': 0.000498566322717216, 'samples': 278064, 'steps': 5792, 'loss/train': 2.8165252208709717} +07/25/2024 11:52:38 - INFO - __main__ - Step 5794: {'lr': 0.0004985657600913222, 'samples': 278112, 'steps': 5793, 'loss/train': 2.2823617458343506} +07/25/2024 11:52:38 - INFO - __main__ - Step 5795: {'lr': 0.0004985651973553705, 'samples': 278160, 'steps': 5794, 'loss/train': 2.0992486476898193} +07/25/2024 11:52:38 - INFO - __main__ - Step 5796: {'lr': 0.000498564634509361, 'samples': 278208, 'steps': 5795, 'loss/train': 2.3710973262786865} +07/25/2024 11:52:38 - INFO - __main__ - Step 5797: {'lr': 0.0004985640715532942, 'samples': 278256, 'steps': 5796, 'loss/train': 0.2505936622619629} +07/25/2024 11:52:39 - INFO - __main__ - Step 5798: {'lr': 0.0004985635084871702, 'samples': 278304, 'steps': 5797, 'loss/train': 2.2689199447631836} +07/25/2024 11:52:39 - INFO - __main__ - Step 5799: {'lr': 0.000498562945310989, 'samples': 278352, 'steps': 5798, 'loss/train': 2.0356125831604004} +07/25/2024 11:52:39 - INFO - __main__ - Step 5800: {'lr': 0.0004985623820247514, 'samples': 278400, 'steps': 5799, 'loss/train': 2.0781900882720947} +07/25/2024 11:52:40 - INFO - __main__ - Step 5801: {'lr': 0.0004985618186284571, 'samples': 278448, 'steps': 5800, 'loss/train': 2.474374294281006} +07/25/2024 11:52:40 - INFO - __main__ - Step 5802: {'lr': 0.0004985612551221067, 'samples': 278496, 'steps': 5801, 'loss/train': 2.4910614490509033} +07/25/2024 11:52:40 - INFO - __main__ - Step 5803: {'lr': 0.0004985606915057004, 'samples': 278544, 'steps': 5802, 'loss/train': 0.22152522206306458} +07/25/2024 11:52:40 - INFO - __main__ - Step 5804: {'lr': 0.0004985601277792383, 'samples': 278592, 'steps': 5803, 'loss/train': 1.6120766401290894} +07/25/2024 11:52:41 - INFO - __main__ - Step 5805: {'lr': 0.0004985595639427207, 'samples': 278640, 'steps': 5804, 'loss/train': 2.291008949279785} +07/25/2024 11:52:41 - INFO - __main__ - Step 5806: {'lr': 0.0004985589999961479, 'samples': 278688, 'steps': 5805, 'loss/train': 2.307530164718628} +07/25/2024 11:52:41 - INFO - __main__ - Step 5807: {'lr': 0.0004985584359395202, 'samples': 278736, 'steps': 5806, 'loss/train': 2.491748332977295} +07/25/2024 11:52:42 - INFO - __main__ - Step 5808: {'lr': 0.0004985578717728379, 'samples': 278784, 'steps': 5807, 'loss/train': 2.5463688373565674} +07/25/2024 11:52:42 - INFO - __main__ - Step 5809: {'lr': 0.0004985573074961009, 'samples': 278832, 'steps': 5808, 'loss/train': 3.125501871109009} +07/25/2024 11:52:42 - INFO - __main__ - Step 5810: {'lr': 0.0004985567431093099, 'samples': 278880, 'steps': 5809, 'loss/train': 1.6481934785842896} +07/25/2024 11:52:42 - INFO - __main__ - Step 5811: {'lr': 0.0004985561786124648, 'samples': 278928, 'steps': 5810, 'loss/train': 2.4590277671813965} +07/25/2024 11:52:43 - INFO - __main__ - Step 5812: {'lr': 0.000498555614005566, 'samples': 278976, 'steps': 5811, 'loss/train': 1.6851266622543335} +07/25/2024 11:52:43 - INFO - __main__ - Step 5813: {'lr': 0.0004985550492886138, 'samples': 279024, 'steps': 5812, 'loss/train': 2.2166848182678223} +07/25/2024 11:52:43 - INFO - __main__ - Step 5814: {'lr': 0.0004985544844616083, 'samples': 279072, 'steps': 5813, 'loss/train': 2.370913028717041} +07/25/2024 11:52:44 - INFO - __main__ - Step 5815: {'lr': 0.0004985539195245499, 'samples': 279120, 'steps': 5814, 'loss/train': 1.7566691637039185} +07/25/2024 11:52:44 - INFO - __main__ - Step 5816: {'lr': 0.0004985533544774388, 'samples': 279168, 'steps': 5815, 'loss/train': 1.8377232551574707} +07/25/2024 11:52:44 - INFO - __main__ - Step 5817: {'lr': 0.0004985527893202752, 'samples': 279216, 'steps': 5816, 'loss/train': 3.4488003253936768} +07/25/2024 11:52:44 - INFO - __main__ - Step 5818: {'lr': 0.0004985522240530594, 'samples': 279264, 'steps': 5817, 'loss/train': 2.389176607131958} +07/25/2024 11:52:45 - INFO - __main__ - Step 5819: {'lr': 0.0004985516586757917, 'samples': 279312, 'steps': 5818, 'loss/train': 2.384036064147949} +07/25/2024 11:52:45 - INFO - __main__ - Step 5820: {'lr': 0.0004985510931884722, 'samples': 279360, 'steps': 5819, 'loss/train': 2.643824577331543} +07/25/2024 11:52:45 - INFO - __main__ - Step 5821: {'lr': 0.0004985505275911013, 'samples': 279408, 'steps': 5820, 'loss/train': 0.2610096037387848} +07/25/2024 11:52:46 - INFO - __main__ - Step 5822: {'lr': 0.0004985499618836792, 'samples': 279456, 'steps': 5821, 'loss/train': 2.3344757556915283} +07/25/2024 11:52:46 - INFO - __main__ - Step 5823: {'lr': 0.0004985493960662061, 'samples': 279504, 'steps': 5822, 'loss/train': 2.4023821353912354} +07/25/2024 11:52:46 - INFO - __main__ - Step 5824: {'lr': 0.0004985488301386823, 'samples': 279552, 'steps': 5823, 'loss/train': 1.8963276147842407} +07/25/2024 11:52:46 - INFO - __main__ - Step 5825: {'lr': 0.0004985482641011081, 'samples': 279600, 'steps': 5824, 'loss/train': 2.389923334121704} +07/25/2024 11:52:47 - INFO - __main__ - Step 5826: {'lr': 0.0004985476979534836, 'samples': 279648, 'steps': 5825, 'loss/train': 2.157109022140503} +07/25/2024 11:52:47 - INFO - __main__ - Step 5827: {'lr': 0.0004985471316958092, 'samples': 279696, 'steps': 5826, 'loss/train': 0.33638760447502136} +07/25/2024 11:52:47 - INFO - __main__ - Step 5828: {'lr': 0.0004985465653280851, 'samples': 279744, 'steps': 5827, 'loss/train': 1.5179214477539062} +07/25/2024 11:52:48 - INFO - __main__ - Step 5829: {'lr': 0.0004985459988503115, 'samples': 279792, 'steps': 5828, 'loss/train': 1.3720314502716064} +07/25/2024 11:52:48 - INFO - __main__ - Step 5830: {'lr': 0.0004985454322624887, 'samples': 279840, 'steps': 5829, 'loss/train': 2.702270269393921} +07/25/2024 11:52:48 - INFO - __main__ - Step 5831: {'lr': 0.000498544865564617, 'samples': 279888, 'steps': 5830, 'loss/train': 2.8160574436187744} +07/25/2024 11:52:48 - INFO - __main__ - Step 5832: {'lr': 0.0004985442987566966, 'samples': 279936, 'steps': 5831, 'loss/train': 1.9866306781768799} +07/25/2024 11:52:49 - INFO - __main__ - Step 5833: {'lr': 0.0004985437318387277, 'samples': 279984, 'steps': 5832, 'loss/train': 6.691227912902832} +07/25/2024 11:52:49 - INFO - __main__ - Step 5834: {'lr': 0.0004985431648107106, 'samples': 280032, 'steps': 5833, 'loss/train': 2.9984755516052246} +07/25/2024 11:52:49 - INFO - __main__ - Step 5835: {'lr': 0.0004985425976726456, 'samples': 280080, 'steps': 5834, 'loss/train': 2.7458741664886475} +07/25/2024 11:52:49 - INFO - __main__ - Step 5836: {'lr': 0.0004985420304245328, 'samples': 280128, 'steps': 5835, 'loss/train': 1.5697038173675537} +07/25/2024 11:52:50 - INFO - __main__ - Step 5837: {'lr': 0.0004985414630663727, 'samples': 280176, 'steps': 5836, 'loss/train': 2.1741960048675537} +07/25/2024 11:52:50 - INFO - __main__ - Step 5838: {'lr': 0.0004985408955981653, 'samples': 280224, 'steps': 5837, 'loss/train': 2.450730800628662} +07/25/2024 11:52:50 - INFO - __main__ - Step 5839: {'lr': 0.0004985403280199109, 'samples': 280272, 'steps': 5838, 'loss/train': 2.245189666748047} +07/25/2024 11:52:51 - INFO - __main__ - Step 5840: {'lr': 0.00049853976033161, 'samples': 280320, 'steps': 5839, 'loss/train': 2.14975905418396} +07/25/2024 11:52:51 - INFO - __main__ - Step 5841: {'lr': 0.0004985391925332625, 'samples': 280368, 'steps': 5840, 'loss/train': 1.6959209442138672} +07/25/2024 11:52:51 - INFO - __main__ - Step 5842: {'lr': 0.0004985386246248688, 'samples': 280416, 'steps': 5841, 'loss/train': 1.9643217325210571} +07/25/2024 11:52:51 - INFO - __main__ - Step 5843: {'lr': 0.0004985380566064293, 'samples': 280464, 'steps': 5842, 'loss/train': 2.1445884704589844} +07/25/2024 11:52:52 - INFO - __main__ - Step 5844: {'lr': 0.000498537488477944, 'samples': 280512, 'steps': 5843, 'loss/train': 2.268622398376465} +07/25/2024 11:52:52 - INFO - __main__ - Step 5845: {'lr': 0.0004985369202394133, 'samples': 280560, 'steps': 5844, 'loss/train': 0.23789875209331512} +07/25/2024 11:52:52 - INFO - __main__ - Step 5846: {'lr': 0.0004985363518908374, 'samples': 280608, 'steps': 5845, 'loss/train': 2.0376453399658203} +07/25/2024 11:52:53 - INFO - __main__ - Step 5847: {'lr': 0.0004985357834322166, 'samples': 280656, 'steps': 5846, 'loss/train': 2.109161853790283} +07/25/2024 11:52:53 - INFO - __main__ - Step 5848: {'lr': 0.000498535214863551, 'samples': 280704, 'steps': 5847, 'loss/train': 1.3660165071487427} +07/25/2024 11:52:53 - INFO - __main__ - Step 5849: {'lr': 0.0004985346461848411, 'samples': 280752, 'steps': 5848, 'loss/train': 2.2149922847747803} +07/25/2024 11:52:53 - INFO - __main__ - Step 5850: {'lr': 0.0004985340773960871, 'samples': 280800, 'steps': 5849, 'loss/train': 2.27177357673645} +07/25/2024 11:52:54 - INFO - __main__ - Step 5851: {'lr': 0.000498533508497289, 'samples': 280848, 'steps': 5850, 'loss/train': 0.7556124329566956} +07/25/2024 11:52:54 - INFO - __main__ - Step 5852: {'lr': 0.0004985329394884472, 'samples': 280896, 'steps': 5851, 'loss/train': 2.236703395843506} +07/25/2024 11:52:54 - INFO - __main__ - Step 5853: {'lr': 0.0004985323703695622, 'samples': 280944, 'steps': 5852, 'loss/train': 1.7990572452545166} +07/25/2024 11:52:55 - INFO - __main__ - Step 5854: {'lr': 0.0004985318011406338, 'samples': 280992, 'steps': 5853, 'loss/train': 2.2473416328430176} +07/25/2024 11:52:55 - INFO - __main__ - Step 5855: {'lr': 0.0004985312318016626, 'samples': 281040, 'steps': 5854, 'loss/train': 2.488672971725464} +07/25/2024 11:52:55 - INFO - __main__ - Step 5856: {'lr': 0.0004985306623526487, 'samples': 281088, 'steps': 5855, 'loss/train': 2.4818015098571777} +07/25/2024 11:52:55 - INFO - __main__ - Step 5857: {'lr': 0.0004985300927935924, 'samples': 281136, 'steps': 5856, 'loss/train': 7.484771251678467} +07/25/2024 11:52:56 - INFO - __main__ - Step 5858: {'lr': 0.000498529523124494, 'samples': 281184, 'steps': 5857, 'loss/train': 2.8216161727905273} +07/25/2024 11:52:56 - INFO - __main__ - Step 5859: {'lr': 0.0004985289533453535, 'samples': 281232, 'steps': 5858, 'loss/train': 2.6156413555145264} +07/25/2024 11:52:56 - INFO - __main__ - Step 5860: {'lr': 0.0004985283834561715, 'samples': 281280, 'steps': 5859, 'loss/train': 2.6633718013763428} +07/25/2024 11:52:57 - INFO - __main__ - Step 5861: {'lr': 0.0004985278134569481, 'samples': 281328, 'steps': 5860, 'loss/train': 2.109846830368042} +07/25/2024 11:52:57 - INFO - __main__ - Step 5862: {'lr': 0.0004985272433476835, 'samples': 281376, 'steps': 5861, 'loss/train': 0.6895480155944824} +07/25/2024 11:52:57 - INFO - __main__ - Step 5863: {'lr': 0.000498526673128378, 'samples': 281424, 'steps': 5862, 'loss/train': 1.6395519971847534} +07/25/2024 11:52:57 - INFO - __main__ - Step 5864: {'lr': 0.0004985261027990319, 'samples': 281472, 'steps': 5863, 'loss/train': 2.1971046924591064} +07/25/2024 11:52:58 - INFO - __main__ - Step 5865: {'lr': 0.0004985255323596454, 'samples': 281520, 'steps': 5864, 'loss/train': 2.3227386474609375} +07/25/2024 11:52:58 - INFO - __main__ - Step 5866: {'lr': 0.0004985249618102187, 'samples': 281568, 'steps': 5865, 'loss/train': 2.902421474456787} +07/25/2024 11:52:58 - INFO - __main__ - Step 5867: {'lr': 0.0004985243911507521, 'samples': 281616, 'steps': 5866, 'loss/train': 2.4078521728515625} +07/25/2024 11:52:59 - INFO - __main__ - Step 5868: {'lr': 0.0004985238203812459, 'samples': 281664, 'steps': 5867, 'loss/train': 2.1343140602111816} +07/25/2024 11:52:59 - INFO - __main__ - Step 5869: {'lr': 0.0004985232495017004, 'samples': 281712, 'steps': 5868, 'loss/train': 0.28995808959007263} +07/25/2024 11:52:59 - INFO - __main__ - Step 5870: {'lr': 0.0004985226785121157, 'samples': 281760, 'steps': 5869, 'loss/train': 2.226144790649414} +07/25/2024 11:52:59 - INFO - __main__ - Step 5871: {'lr': 0.000498522107412492, 'samples': 281808, 'steps': 5870, 'loss/train': 1.9993401765823364} +07/25/2024 11:53:00 - INFO - __main__ - Step 5872: {'lr': 0.0004985215362028298, 'samples': 281856, 'steps': 5871, 'loss/train': 2.719400405883789} +07/25/2024 11:53:00 - INFO - __main__ - Step 5873: {'lr': 0.0004985209648831293, 'samples': 281904, 'steps': 5872, 'loss/train': 1.4715559482574463} +07/25/2024 11:53:00 - INFO - __main__ - Step 5874: {'lr': 0.0004985203934533905, 'samples': 281952, 'steps': 5873, 'loss/train': 2.6228973865509033} +07/25/2024 11:53:01 - INFO - __main__ - Step 5875: {'lr': 0.000498519821913614, 'samples': 282000, 'steps': 5874, 'loss/train': 1.6430070400238037} +07/25/2024 11:53:01 - INFO - __main__ - Step 5876: {'lr': 0.0004985192502637997, 'samples': 282048, 'steps': 5875, 'loss/train': 1.114784598350525} +07/25/2024 11:53:01 - INFO - __main__ - Step 5877: {'lr': 0.0004985186785039482, 'samples': 282096, 'steps': 5876, 'loss/train': 2.066051483154297} +07/25/2024 11:53:01 - INFO - __main__ - Step 5878: {'lr': 0.0004985181066340595, 'samples': 282144, 'steps': 5877, 'loss/train': 2.109940528869629} +07/25/2024 11:53:02 - INFO - __main__ - Step 5879: {'lr': 0.0004985175346541341, 'samples': 282192, 'steps': 5878, 'loss/train': 2.2135753631591797} +07/25/2024 11:53:02 - INFO - __main__ - Step 5880: {'lr': 0.0004985169625641719, 'samples': 282240, 'steps': 5879, 'loss/train': 2.7388718128204346} +07/25/2024 11:53:02 - INFO - __main__ - Step 5881: {'lr': 0.0004985163903641736, 'samples': 282288, 'steps': 5880, 'loss/train': 7.534679412841797} +07/25/2024 11:53:03 - INFO - __main__ - Step 5882: {'lr': 0.0004985158180541389, 'samples': 282336, 'steps': 5881, 'loss/train': 2.8510050773620605} +07/25/2024 11:53:03 - INFO - __main__ - Step 5883: {'lr': 0.0004985152456340686, 'samples': 282384, 'steps': 5882, 'loss/train': 2.5634853839874268} +07/25/2024 11:53:03 - INFO - __main__ - Step 5884: {'lr': 0.0004985146731039626, 'samples': 282432, 'steps': 5883, 'loss/train': 2.2644083499908447} +07/25/2024 11:53:03 - INFO - __main__ - Step 5885: {'lr': 0.0004985141004638212, 'samples': 282480, 'steps': 5884, 'loss/train': 1.71623957157135} +07/25/2024 11:53:04 - INFO - __main__ - Step 5886: {'lr': 0.0004985135277136448, 'samples': 282528, 'steps': 5885, 'loss/train': 2.262796401977539} +07/25/2024 11:53:04 - INFO - __main__ - Step 5887: {'lr': 0.0004985129548534336, 'samples': 282576, 'steps': 5886, 'loss/train': 2.3036439418792725} +07/25/2024 11:53:04 - INFO - __main__ - Step 5888: {'lr': 0.0004985123818831878, 'samples': 282624, 'steps': 5887, 'loss/train': 1.8932433128356934} +07/25/2024 11:53:05 - INFO - __main__ - Step 5889: {'lr': 0.0004985118088029077, 'samples': 282672, 'steps': 5888, 'loss/train': 1.7803194522857666} +07/25/2024 11:53:05 - INFO - __main__ - Step 5890: {'lr': 0.0004985112356125934, 'samples': 282720, 'steps': 5889, 'loss/train': 2.5965335369110107} +07/25/2024 11:53:05 - INFO - __main__ - Step 5891: {'lr': 0.0004985106623122455, 'samples': 282768, 'steps': 5890, 'loss/train': 1.8410483598709106} +07/25/2024 11:53:05 - INFO - __main__ - Step 5892: {'lr': 0.000498510088901864, 'samples': 282816, 'steps': 5891, 'loss/train': 2.13565731048584} +07/25/2024 11:53:06 - INFO - __main__ - Step 5893: {'lr': 0.0004985095153814491, 'samples': 282864, 'steps': 5892, 'loss/train': 0.27855467796325684} +07/25/2024 11:53:06 - INFO - __main__ - Step 5894: {'lr': 0.0004985089417510012, 'samples': 282912, 'steps': 5893, 'loss/train': 2.3285510540008545} +07/25/2024 11:53:06 - INFO - __main__ - Step 5895: {'lr': 0.0004985083680105204, 'samples': 282960, 'steps': 5894, 'loss/train': 1.6720043420791626} +07/25/2024 11:53:07 - INFO - __main__ - Step 5896: {'lr': 0.0004985077941600072, 'samples': 283008, 'steps': 5895, 'loss/train': 2.5393967628479004} +07/25/2024 11:53:07 - INFO - __main__ - Step 5897: {'lr': 0.0004985072201994617, 'samples': 283056, 'steps': 5896, 'loss/train': 1.3413087129592896} +07/25/2024 11:53:07 - INFO - __main__ - Step 5898: {'lr': 0.0004985066461288842, 'samples': 283104, 'steps': 5897, 'loss/train': 1.886823296546936} +07/25/2024 11:53:07 - INFO - __main__ - Step 5899: {'lr': 0.0004985060719482748, 'samples': 283152, 'steps': 5898, 'loss/train': 2.4428765773773193} +07/25/2024 11:53:08 - INFO - __main__ - Step 5900: {'lr': 0.000498505497657634, 'samples': 283200, 'steps': 5899, 'loss/train': 1.1526867151260376} +07/25/2024 11:53:08 - INFO - __main__ - Step 5901: {'lr': 0.0004985049232569618, 'samples': 283248, 'steps': 5900, 'loss/train': 1.5990335941314697} +07/25/2024 11:53:08 - INFO - __main__ - Step 5902: {'lr': 0.0004985043487462586, 'samples': 283296, 'steps': 5901, 'loss/train': 2.160978078842163} +07/25/2024 11:53:09 - INFO - __main__ - Step 5903: {'lr': 0.0004985037741255247, 'samples': 283344, 'steps': 5902, 'loss/train': 1.786380648612976} +07/25/2024 11:53:09 - INFO - __main__ - Step 5904: {'lr': 0.0004985031993947603, 'samples': 283392, 'steps': 5903, 'loss/train': 2.374940872192383} +07/25/2024 11:53:09 - INFO - __main__ - Step 5905: {'lr': 0.0004985026245539656, 'samples': 283440, 'steps': 5904, 'loss/train': 7.055428504943848} +07/25/2024 11:53:09 - INFO - __main__ - Step 5906: {'lr': 0.0004985020496031409, 'samples': 283488, 'steps': 5905, 'loss/train': 2.678351402282715} +07/25/2024 11:53:10 - INFO - __main__ - Step 5907: {'lr': 0.0004985014745422865, 'samples': 283536, 'steps': 5906, 'loss/train': 1.858335256576538} +07/25/2024 11:53:10 - INFO - __main__ - Step 5908: {'lr': 0.0004985008993714026, 'samples': 283584, 'steps': 5907, 'loss/train': 2.307298421859741} +07/25/2024 11:53:10 - INFO - __main__ - Step 5909: {'lr': 0.0004985003240904894, 'samples': 283632, 'steps': 5908, 'loss/train': 1.281524896621704} +07/25/2024 11:53:11 - INFO - __main__ - Step 5910: {'lr': 0.0004984997486995473, 'samples': 283680, 'steps': 5909, 'loss/train': 2.6414268016815186} +07/25/2024 11:53:11 - INFO - __main__ - Step 5911: {'lr': 0.0004984991731985764, 'samples': 283728, 'steps': 5910, 'loss/train': 1.7970006465911865} +07/25/2024 11:53:11 - INFO - __main__ - Step 5912: {'lr': 0.0004984985975875771, 'samples': 283776, 'steps': 5911, 'loss/train': 1.7831804752349854} +07/25/2024 11:53:11 - INFO - __main__ - Step 5913: {'lr': 0.0004984980218665495, 'samples': 283824, 'steps': 5912, 'loss/train': 2.242932081222534} +07/25/2024 11:53:12 - INFO - __main__ - Step 5914: {'lr': 0.000498497446035494, 'samples': 283872, 'steps': 5913, 'loss/train': 2.0918080806732178} +07/25/2024 11:53:12 - INFO - __main__ - Step 5915: {'lr': 0.0004984968700944108, 'samples': 283920, 'steps': 5914, 'loss/train': 2.1546051502227783} +07/25/2024 11:53:12 - INFO - __main__ - Step 5916: {'lr': 0.0004984962940433001, 'samples': 283968, 'steps': 5915, 'loss/train': 2.353649377822876} +07/25/2024 11:53:13 - INFO - __main__ - Step 5917: {'lr': 0.0004984957178821622, 'samples': 284016, 'steps': 5916, 'loss/train': 0.32968056201934814} +07/25/2024 11:53:13 - INFO - __main__ - Step 5918: {'lr': 0.0004984951416109975, 'samples': 284064, 'steps': 5917, 'loss/train': 1.8442094326019287} +07/25/2024 11:53:13 - INFO - __main__ - Step 5919: {'lr': 0.0004984945652298059, 'samples': 284112, 'steps': 5918, 'loss/train': 2.239171028137207} +07/25/2024 11:53:13 - INFO - __main__ - Step 5920: {'lr': 0.000498493988738588, 'samples': 284160, 'steps': 5919, 'loss/train': 2.083855152130127} +07/25/2024 11:53:14 - INFO - __main__ - Step 5921: {'lr': 0.0004984934121373439, 'samples': 284208, 'steps': 5920, 'loss/train': 1.1896036863327026} +07/25/2024 11:53:14 - INFO - __main__ - Step 5922: {'lr': 0.0004984928354260739, 'samples': 284256, 'steps': 5921, 'loss/train': 2.5127182006835938} +07/25/2024 11:53:14 - INFO - __main__ - Step 5923: {'lr': 0.0004984922586047782, 'samples': 284304, 'steps': 5922, 'loss/train': 2.1790757179260254} +07/25/2024 11:53:15 - INFO - __main__ - Step 5924: {'lr': 0.0004984916816734572, 'samples': 284352, 'steps': 5923, 'loss/train': 2.2658660411834717} +07/25/2024 11:53:15 - INFO - __main__ - Step 5925: {'lr': 0.0004984911046321109, 'samples': 284400, 'steps': 5924, 'loss/train': 1.8964534997940063} +07/25/2024 11:53:15 - INFO - __main__ - Step 5926: {'lr': 0.0004984905274807397, 'samples': 284448, 'steps': 5925, 'loss/train': 1.8727493286132812} +07/25/2024 11:53:15 - INFO - __main__ - Step 5927: {'lr': 0.0004984899502193439, 'samples': 284496, 'steps': 5926, 'loss/train': 2.1602015495300293} +07/25/2024 11:53:16 - INFO - __main__ - Step 5928: {'lr': 0.0004984893728479238, 'samples': 284544, 'steps': 5927, 'loss/train': 2.0655064582824707} +07/25/2024 11:53:16 - INFO - __main__ - Step 5929: {'lr': 0.0004984887953664795, 'samples': 284592, 'steps': 5928, 'loss/train': 6.644392967224121} +07/25/2024 11:53:16 - INFO - __main__ - Step 5930: {'lr': 0.0004984882177750113, 'samples': 284640, 'steps': 5929, 'loss/train': 2.612657308578491} +07/25/2024 11:53:16 - INFO - __main__ - Step 5931: {'lr': 0.0004984876400735195, 'samples': 284688, 'steps': 5930, 'loss/train': 2.329479694366455} +07/25/2024 11:53:17 - INFO - __main__ - Step 5932: {'lr': 0.0004984870622620044, 'samples': 284736, 'steps': 5931, 'loss/train': 2.4709277153015137} +07/25/2024 11:53:17 - INFO - __main__ - Step 5933: {'lr': 0.0004984864843404661, 'samples': 284784, 'steps': 5932, 'loss/train': 1.5547593832015991} +07/25/2024 11:53:17 - INFO - __main__ - Step 5934: {'lr': 0.000498485906308905, 'samples': 284832, 'steps': 5933, 'loss/train': 1.7266108989715576} +07/25/2024 11:53:18 - INFO - __main__ - Step 5935: {'lr': 0.0004984853281673211, 'samples': 284880, 'steps': 5934, 'loss/train': 2.558091402053833} +07/25/2024 11:53:18 - INFO - __main__ - Step 5936: {'lr': 0.0004984847499157151, 'samples': 284928, 'steps': 5935, 'loss/train': 2.2810111045837402} +07/25/2024 11:53:18 - INFO - __main__ - Step 5937: {'lr': 0.000498484171554087, 'samples': 284976, 'steps': 5936, 'loss/train': 2.4251389503479004} +07/25/2024 11:53:18 - INFO - __main__ - Step 5938: {'lr': 0.000498483593082437, 'samples': 285024, 'steps': 5937, 'loss/train': 2.2108166217803955} +07/25/2024 11:53:19 - INFO - __main__ - Step 5939: {'lr': 0.0004984830145007655, 'samples': 285072, 'steps': 5938, 'loss/train': 2.4043750762939453} +07/25/2024 11:53:19 - INFO - __main__ - Step 5940: {'lr': 0.0004984824358090726, 'samples': 285120, 'steps': 5939, 'loss/train': 2.2070884704589844} +07/25/2024 11:53:19 - INFO - __main__ - Step 5941: {'lr': 0.0004984818570073587, 'samples': 285168, 'steps': 5940, 'loss/train': 0.22323879599571228} +07/25/2024 11:53:20 - INFO - __main__ - Step 5942: {'lr': 0.000498481278095624, 'samples': 285216, 'steps': 5941, 'loss/train': 0.9731089472770691} +07/25/2024 11:53:20 - INFO - __main__ - Step 5943: {'lr': 0.0004984806990738686, 'samples': 285264, 'steps': 5942, 'loss/train': 2.227159261703491} +07/25/2024 11:53:20 - INFO - __main__ - Step 5944: {'lr': 0.0004984801199420931, 'samples': 285312, 'steps': 5943, 'loss/train': 1.9801032543182373} +07/25/2024 11:53:20 - INFO - __main__ - Step 5945: {'lr': 0.0004984795407002975, 'samples': 285360, 'steps': 5944, 'loss/train': 1.6466081142425537} +07/25/2024 11:53:21 - INFO - __main__ - Step 5946: {'lr': 0.0004984789613484821, 'samples': 285408, 'steps': 5945, 'loss/train': 2.669093370437622} +07/25/2024 11:53:21 - INFO - __main__ - Step 5947: {'lr': 0.0004984783818866472, 'samples': 285456, 'steps': 5946, 'loss/train': 2.199007511138916} +07/25/2024 11:53:21 - INFO - __main__ - Step 5948: {'lr': 0.000498477802314793, 'samples': 285504, 'steps': 5947, 'loss/train': 2.1582071781158447} +07/25/2024 11:53:22 - INFO - __main__ - Step 5949: {'lr': 0.0004984772226329198, 'samples': 285552, 'steps': 5948, 'loss/train': 2.0307445526123047} +07/25/2024 11:53:22 - INFO - __main__ - Step 5950: {'lr': 0.0004984766428410279, 'samples': 285600, 'steps': 5949, 'loss/train': 1.6564022302627563} +07/25/2024 11:53:22 - INFO - __main__ - Step 5951: {'lr': 0.0004984760629391175, 'samples': 285648, 'steps': 5950, 'loss/train': 2.4187629222869873} +07/25/2024 11:53:22 - INFO - __main__ - Step 5952: {'lr': 0.0004984754829271888, 'samples': 285696, 'steps': 5951, 'loss/train': 2.2186663150787354} +07/25/2024 11:53:23 - INFO - __main__ - Step 5953: {'lr': 0.0004984749028052422, 'samples': 285744, 'steps': 5952, 'loss/train': 6.713469982147217} +07/25/2024 11:53:23 - INFO - __main__ - Step 5954: {'lr': 0.0004984743225732778, 'samples': 285792, 'steps': 5953, 'loss/train': 3.0348739624023438} +07/25/2024 11:53:23 - INFO - __main__ - Step 5955: {'lr': 0.0004984737422312959, 'samples': 285840, 'steps': 5954, 'loss/train': 2.571812391281128} +07/25/2024 11:53:24 - INFO - __main__ - Step 5956: {'lr': 0.0004984731617792969, 'samples': 285888, 'steps': 5955, 'loss/train': 2.108968496322632} +07/25/2024 11:53:24 - INFO - __main__ - Step 5957: {'lr': 0.0004984725812172809, 'samples': 285936, 'steps': 5956, 'loss/train': 2.4203596115112305} +07/25/2024 11:53:24 - INFO - __main__ - Step 5958: {'lr': 0.0004984720005452482, 'samples': 285984, 'steps': 5957, 'loss/train': 2.5201807022094727} +07/25/2024 11:53:24 - INFO - __main__ - Step 5959: {'lr': 0.000498471419763199, 'samples': 286032, 'steps': 5958, 'loss/train': 1.9733093976974487} +07/25/2024 11:53:25 - INFO - __main__ - Step 5960: {'lr': 0.0004984708388711338, 'samples': 286080, 'steps': 5959, 'loss/train': 2.263092041015625} +07/25/2024 11:53:25 - INFO - __main__ - Step 5961: {'lr': 0.0004984702578690525, 'samples': 286128, 'steps': 5960, 'loss/train': 2.172597646713257} +07/25/2024 11:53:25 - INFO - __main__ - Step 5962: {'lr': 0.0004984696767569556, 'samples': 286176, 'steps': 5961, 'loss/train': 2.3981170654296875} +07/25/2024 11:53:26 - INFO - __main__ - Step 5963: {'lr': 0.0004984690955348432, 'samples': 286224, 'steps': 5962, 'loss/train': 2.440504550933838} +07/25/2024 11:53:26 - INFO - __main__ - Step 5964: {'lr': 0.0004984685142027157, 'samples': 286272, 'steps': 5963, 'loss/train': 2.146113634109497} +07/25/2024 11:53:26 - INFO - __main__ - Step 5965: {'lr': 0.0004984679327605734, 'samples': 286320, 'steps': 5964, 'loss/train': 0.5329943299293518} +07/25/2024 11:53:26 - INFO - __main__ - Step 5966: {'lr': 0.0004984673512084163, 'samples': 286368, 'steps': 5965, 'loss/train': 2.2717432975769043} +07/25/2024 11:53:27 - INFO - __main__ - Step 5967: {'lr': 0.0004984667695462449, 'samples': 286416, 'steps': 5966, 'loss/train': 1.7803761959075928} +07/25/2024 11:53:27 - INFO - __main__ - Step 5968: {'lr': 0.0004984661877740595, 'samples': 286464, 'steps': 5967, 'loss/train': 1.6704391241073608} +07/25/2024 11:53:27 - INFO - __main__ - Step 5969: {'lr': 0.00049846560589186, 'samples': 286512, 'steps': 5968, 'loss/train': 2.0887885093688965} +07/25/2024 11:53:28 - INFO - __main__ - Step 5970: {'lr': 0.000498465023899647, 'samples': 286560, 'steps': 5969, 'loss/train': 1.519692301750183} +07/25/2024 11:53:28 - INFO - __main__ - Step 5971: {'lr': 0.0004984644417974207, 'samples': 286608, 'steps': 5970, 'loss/train': 2.2049918174743652} +07/25/2024 11:53:28 - INFO - __main__ - Step 5972: {'lr': 0.0004984638595851812, 'samples': 286656, 'steps': 5971, 'loss/train': 2.2776496410369873} +07/25/2024 11:53:28 - INFO - __main__ - Step 5973: {'lr': 0.000498463277262929, 'samples': 286704, 'steps': 5972, 'loss/train': 1.8111473321914673} +07/25/2024 11:53:29 - INFO - __main__ - Step 5974: {'lr': 0.0004984626948306641, 'samples': 286752, 'steps': 5973, 'loss/train': 2.2146685123443604} +07/25/2024 11:53:29 - INFO - __main__ - Step 5975: {'lr': 0.000498462112288387, 'samples': 286800, 'steps': 5974, 'loss/train': 2.330214500427246} +07/25/2024 11:53:29 - INFO - __main__ - Step 5976: {'lr': 0.0004984615296360977, 'samples': 286848, 'steps': 5975, 'loss/train': 1.6752007007598877} +07/25/2024 11:53:30 - INFO - __main__ - Step 5977: {'lr': 0.0004984609468737968, 'samples': 286896, 'steps': 5976, 'loss/train': 9.842414855957031} +07/25/2024 11:53:30 - INFO - __main__ - Step 5978: {'lr': 0.0004984603640014841, 'samples': 286944, 'steps': 5977, 'loss/train': 2.2687060832977295} +07/25/2024 11:53:30 - INFO - __main__ - Step 5979: {'lr': 0.0004984597810191602, 'samples': 286992, 'steps': 5978, 'loss/train': 1.7607827186584473} +07/25/2024 11:53:30 - INFO - __main__ - Step 5980: {'lr': 0.0004984591979268254, 'samples': 287040, 'steps': 5979, 'loss/train': 2.4679648876190186} +07/25/2024 11:53:31 - INFO - __main__ - Step 5981: {'lr': 0.0004984586147244797, 'samples': 287088, 'steps': 5980, 'loss/train': 2.822303056716919} +07/25/2024 11:53:31 - INFO - __main__ - Step 5982: {'lr': 0.0004984580314121235, 'samples': 287136, 'steps': 5981, 'loss/train': 3.0674643516540527} +07/25/2024 11:53:31 - INFO - __main__ - Step 5983: {'lr': 0.0004984574479897571, 'samples': 287184, 'steps': 5982, 'loss/train': 2.339664936065674} +07/25/2024 11:53:32 - INFO - __main__ - Step 5984: {'lr': 0.0004984568644573806, 'samples': 287232, 'steps': 5983, 'loss/train': 2.6078414916992188} +07/25/2024 11:53:32 - INFO - __main__ - Step 5985: {'lr': 0.0004984562808149944, 'samples': 287280, 'steps': 5984, 'loss/train': 2.121220350265503} +07/25/2024 11:53:32 - INFO - __main__ - Step 5986: {'lr': 0.0004984556970625988, 'samples': 287328, 'steps': 5985, 'loss/train': 1.947849154472351} +07/25/2024 11:53:32 - INFO - __main__ - Step 5987: {'lr': 0.000498455113200194, 'samples': 287376, 'steps': 5986, 'loss/train': 1.9162204265594482} +07/25/2024 11:53:33 - INFO - __main__ - Step 5988: {'lr': 0.0004984545292277802, 'samples': 287424, 'steps': 5987, 'loss/train': 2.228752613067627} +07/25/2024 11:53:33 - INFO - __main__ - Step 5989: {'lr': 0.0004984539451453575, 'samples': 287472, 'steps': 5988, 'loss/train': 1.875002384185791} +07/25/2024 11:53:33 - INFO - __main__ - Step 5990: {'lr': 0.0004984533609529266, 'samples': 287520, 'steps': 5989, 'loss/train': 2.1450483798980713} +07/25/2024 11:53:34 - INFO - __main__ - Step 5991: {'lr': 0.0004984527766504873, 'samples': 287568, 'steps': 5990, 'loss/train': 3.2017886638641357} +07/25/2024 11:53:34 - INFO - __main__ - Step 5992: {'lr': 0.0004984521922380403, 'samples': 287616, 'steps': 5991, 'loss/train': 1.05487859249115} +07/25/2024 11:53:34 - INFO - __main__ - Step 5993: {'lr': 0.0004984516077155856, 'samples': 287664, 'steps': 5992, 'loss/train': 1.6835113763809204} +07/25/2024 11:53:34 - INFO - __main__ - Step 5994: {'lr': 0.0004984510230831233, 'samples': 287712, 'steps': 5993, 'loss/train': 2.283860206604004} +07/25/2024 11:53:35 - INFO - __main__ - Step 5995: {'lr': 0.000498450438340654, 'samples': 287760, 'steps': 5994, 'loss/train': 2.6363675594329834} +07/25/2024 11:53:35 - INFO - __main__ - Step 5996: {'lr': 0.0004984498534881778, 'samples': 287808, 'steps': 5995, 'loss/train': 2.416139841079712} +07/25/2024 11:53:35 - INFO - __main__ - Step 5997: {'lr': 0.0004984492685256949, 'samples': 287856, 'steps': 5996, 'loss/train': 1.8937448263168335} +07/25/2024 11:53:36 - INFO - __main__ - Step 5998: {'lr': 0.0004984486834532057, 'samples': 287904, 'steps': 5997, 'loss/train': 2.7413876056671143} +07/25/2024 11:53:36 - INFO - __main__ - Step 5999: {'lr': 0.0004984480982707103, 'samples': 287952, 'steps': 5998, 'loss/train': 1.8802028894424438} +07/25/2024 11:53:36 - INFO - __main__ - Step 6000: {'lr': 0.0004984475129782091, 'samples': 288000, 'steps': 5999, 'loss/train': 1.445600986480713} +07/25/2024 11:53:36 - INFO - __main__ - Step 6001: {'lr': 0.0004984469275757024, 'samples': 288048, 'steps': 6000, 'loss/train': 7.634767532348633} +07/25/2024 11:53:37 - INFO - __main__ - Step 6002: {'lr': 0.0004984463420631902, 'samples': 288096, 'steps': 6001, 'loss/train': 2.8206522464752197} +07/25/2024 11:53:37 - INFO - __main__ - Step 6003: {'lr': 0.000498445756440673, 'samples': 288144, 'steps': 6002, 'loss/train': 3.5257298946380615} +07/25/2024 11:53:37 - INFO - __main__ - Step 6004: {'lr': 0.000498445170708151, 'samples': 288192, 'steps': 6003, 'loss/train': 2.1621484756469727} +07/25/2024 11:53:38 - INFO - __main__ - Step 6005: {'lr': 0.0004984445848656244, 'samples': 288240, 'steps': 6004, 'loss/train': 3.5468387603759766} +07/25/2024 11:53:38 - INFO - __main__ - Step 6006: {'lr': 0.0004984439989130935, 'samples': 288288, 'steps': 6005, 'loss/train': 2.744662046432495} +07/25/2024 11:53:38 - INFO - __main__ - Step 6007: {'lr': 0.0004984434128505585, 'samples': 288336, 'steps': 6006, 'loss/train': 2.719477653503418} +07/25/2024 11:53:38 - INFO - __main__ - Step 6008: {'lr': 0.0004984428266780199, 'samples': 288384, 'steps': 6007, 'loss/train': 2.3572590351104736} +07/25/2024 11:53:39 - INFO - __main__ - Step 6009: {'lr': 0.0004984422403954776, 'samples': 288432, 'steps': 6008, 'loss/train': 2.367433547973633} +07/25/2024 11:53:39 - INFO - __main__ - Step 6010: {'lr': 0.0004984416540029321, 'samples': 288480, 'steps': 6009, 'loss/train': 2.066298484802246} +07/25/2024 11:53:39 - INFO - __main__ - Step 6011: {'lr': 0.0004984410675003835, 'samples': 288528, 'steps': 6010, 'loss/train': 2.1771109104156494} +07/25/2024 11:53:40 - INFO - __main__ - Step 6012: {'lr': 0.0004984404808878323, 'samples': 288576, 'steps': 6011, 'loss/train': 2.227574348449707} +07/25/2024 11:53:40 - INFO - __main__ - Step 6013: {'lr': 0.0004984398941652785, 'samples': 288624, 'steps': 6012, 'loss/train': 1.3688234090805054} +07/25/2024 11:53:40 - INFO - __main__ - Step 6014: {'lr': 0.0004984393073327226, 'samples': 288672, 'steps': 6013, 'loss/train': 2.0525312423706055} +07/25/2024 11:53:40 - INFO - __main__ - Step 6015: {'lr': 0.0004984387203901646, 'samples': 288720, 'steps': 6014, 'loss/train': 3.3268938064575195} +07/25/2024 11:53:41 - INFO - __main__ - Step 6016: {'lr': 0.0004984381333376051, 'samples': 288768, 'steps': 6015, 'loss/train': 1.2882964611053467} +07/25/2024 11:53:41 - INFO - __main__ - Step 6017: {'lr': 0.000498437546175044, 'samples': 288816, 'steps': 6016, 'loss/train': 2.235666275024414} +07/25/2024 11:53:41 - INFO - __main__ - Step 6018: {'lr': 0.0004984369589024817, 'samples': 288864, 'steps': 6017, 'loss/train': 2.4185285568237305} +07/25/2024 11:53:42 - INFO - __main__ - Step 6019: {'lr': 0.0004984363715199186, 'samples': 288912, 'steps': 6018, 'loss/train': 2.290048122406006} +07/25/2024 11:53:42 - INFO - __main__ - Step 6020: {'lr': 0.0004984357840273546, 'samples': 288960, 'steps': 6019, 'loss/train': 2.087048292160034} +07/25/2024 11:53:42 - INFO - __main__ - Step 6021: {'lr': 0.0004984351964247905, 'samples': 289008, 'steps': 6020, 'loss/train': 2.428349018096924} +07/25/2024 11:53:42 - INFO - __main__ - Step 6022: {'lr': 0.0004984346087122261, 'samples': 289056, 'steps': 6021, 'loss/train': 2.1553821563720703} +07/25/2024 11:53:43 - INFO - __main__ - Step 6023: {'lr': 0.0004984340208896618, 'samples': 289104, 'steps': 6022, 'loss/train': 2.566909074783325} +07/25/2024 11:53:43 - INFO - __main__ - Step 6024: {'lr': 0.0004984334329570979, 'samples': 289152, 'steps': 6023, 'loss/train': 2.2175679206848145} +07/25/2024 11:53:43 - INFO - __main__ - Step 6025: {'lr': 0.0004984328449145346, 'samples': 289200, 'steps': 6024, 'loss/train': 3.962611675262451} +07/25/2024 11:53:43 - INFO - __main__ - Step 6026: {'lr': 0.0004984322567619722, 'samples': 289248, 'steps': 6025, 'loss/train': 2.336561441421509} +07/25/2024 11:53:44 - INFO - __main__ - Step 6027: {'lr': 0.000498431668499411, 'samples': 289296, 'steps': 6026, 'loss/train': 2.676509380340576} +07/25/2024 11:53:44 - INFO - __main__ - Step 6028: {'lr': 0.0004984310801268513, 'samples': 289344, 'steps': 6027, 'loss/train': 2.083948850631714} +07/25/2024 11:53:44 - INFO - __main__ - Step 6029: {'lr': 0.0004984304916442931, 'samples': 289392, 'steps': 6028, 'loss/train': 3.3818840980529785} +07/25/2024 11:53:45 - INFO - __main__ - Step 6030: {'lr': 0.000498429903051737, 'samples': 289440, 'steps': 6029, 'loss/train': 1.0668553113937378} +07/25/2024 11:53:45 - INFO - __main__ - Step 6031: {'lr': 0.0004984293143491831, 'samples': 289488, 'steps': 6030, 'loss/train': 1.663351058959961} +07/25/2024 11:53:45 - INFO - __main__ - Step 6032: {'lr': 0.0004984287255366315, 'samples': 289536, 'steps': 6031, 'loss/train': 2.1688129901885986} +07/25/2024 11:53:45 - INFO - __main__ - Step 6033: {'lr': 0.0004984281366140827, 'samples': 289584, 'steps': 6032, 'loss/train': 2.5908169746398926} +07/25/2024 11:53:46 - INFO - __main__ - Step 6034: {'lr': 0.0004984275475815368, 'samples': 289632, 'steps': 6033, 'loss/train': 1.8581570386886597} +07/25/2024 11:53:46 - INFO - __main__ - Step 6035: {'lr': 0.0004984269584389943, 'samples': 289680, 'steps': 6034, 'loss/train': 2.196916341781616} +07/25/2024 11:53:46 - INFO - __main__ - Step 6036: {'lr': 0.0004984263691864552, 'samples': 289728, 'steps': 6035, 'loss/train': 2.3453025817871094} +07/25/2024 11:53:47 - INFO - __main__ - Step 6037: {'lr': 0.00049842577982392, 'samples': 289776, 'steps': 6036, 'loss/train': 1.6050117015838623} +07/25/2024 11:53:47 - INFO - __main__ - Step 6038: {'lr': 0.0004984251903513887, 'samples': 289824, 'steps': 6037, 'loss/train': 2.036144971847534} +07/25/2024 11:53:47 - INFO - __main__ - Step 6039: {'lr': 0.0004984246007688617, 'samples': 289872, 'steps': 6038, 'loss/train': 2.9456565380096436} +07/25/2024 11:53:47 - INFO - __main__ - Step 6040: {'lr': 0.0004984240110763393, 'samples': 289920, 'steps': 6039, 'loss/train': 1.9801812171936035} +07/25/2024 11:53:48 - INFO - __main__ - Step 6041: {'lr': 0.0004984234212738216, 'samples': 289968, 'steps': 6040, 'loss/train': 3.228259563446045} +07/25/2024 11:53:48 - INFO - __main__ - Step 6042: {'lr': 0.0004984228313613091, 'samples': 290016, 'steps': 6041, 'loss/train': 1.8695130348205566} +07/25/2024 11:53:48 - INFO - __main__ - Step 6043: {'lr': 0.0004984222413388019, 'samples': 290064, 'steps': 6042, 'loss/train': 2.9349470138549805} +07/25/2024 11:53:49 - INFO - __main__ - Step 6044: {'lr': 0.0004984216512063002, 'samples': 290112, 'steps': 6043, 'loss/train': 1.1884551048278809} +07/25/2024 11:53:49 - INFO - __main__ - Step 6045: {'lr': 0.0004984210609638044, 'samples': 290160, 'steps': 6044, 'loss/train': 1.9979333877563477} +07/25/2024 11:53:49 - INFO - __main__ - Step 6046: {'lr': 0.0004984204706113147, 'samples': 290208, 'steps': 6045, 'loss/train': 2.390059232711792} +07/25/2024 11:53:49 - INFO - __main__ - Step 6047: {'lr': 0.0004984198801488315, 'samples': 290256, 'steps': 6046, 'loss/train': 2.2176156044006348} +07/25/2024 11:53:50 - INFO - __main__ - Step 6048: {'lr': 0.0004984192895763548, 'samples': 290304, 'steps': 6047, 'loss/train': 2.0592997074127197} +07/25/2024 11:53:50 - INFO - __main__ - Step 6049: {'lr': 0.0004984186988938851, 'samples': 290352, 'steps': 6048, 'loss/train': 2.574953079223633} +07/25/2024 11:53:50 - INFO - __main__ - Step 6050: {'lr': 0.0004984181081014225, 'samples': 290400, 'steps': 6049, 'loss/train': 2.276047945022583} +07/25/2024 11:53:51 - INFO - __main__ - Step 6051: {'lr': 0.0004984175171989673, 'samples': 290448, 'steps': 6050, 'loss/train': 2.317894220352173} +07/25/2024 11:53:51 - INFO - __main__ - Step 6052: {'lr': 0.0004984169261865198, 'samples': 290496, 'steps': 6051, 'loss/train': 1.984994649887085} +07/25/2024 11:53:51 - INFO - __main__ - Step 6053: {'lr': 0.0004984163350640802, 'samples': 290544, 'steps': 6052, 'loss/train': 2.7351815700531006} +07/25/2024 11:53:51 - INFO - __main__ - Step 6054: {'lr': 0.0004984157438316489, 'samples': 290592, 'steps': 6053, 'loss/train': 1.2751272916793823} +07/25/2024 11:53:52 - INFO - __main__ - Step 6055: {'lr': 0.000498415152489226, 'samples': 290640, 'steps': 6054, 'loss/train': 1.6705803871154785} +07/25/2024 11:53:52 - INFO - __main__ - Step 6056: {'lr': 0.0004984145610368119, 'samples': 290688, 'steps': 6055, 'loss/train': 2.436396598815918} +07/25/2024 11:53:52 - INFO - __main__ - Step 6057: {'lr': 0.0004984139694744067, 'samples': 290736, 'steps': 6056, 'loss/train': 2.74666428565979} +07/25/2024 11:53:53 - INFO - __main__ - Step 6058: {'lr': 0.0004984133778020109, 'samples': 290784, 'steps': 6057, 'loss/train': 1.4712117910385132} +07/25/2024 11:53:53 - INFO - __main__ - Step 6059: {'lr': 0.0004984127860196244, 'samples': 290832, 'steps': 6058, 'loss/train': 1.6255276203155518} +07/25/2024 11:53:53 - INFO - __main__ - Step 6060: {'lr': 0.000498412194127248, 'samples': 290880, 'steps': 6059, 'loss/train': 2.9592971801757812} +07/25/2024 11:53:53 - INFO - __main__ - Step 6061: {'lr': 0.0004984116021248813, 'samples': 290928, 'steps': 6060, 'loss/train': 2.397047519683838} +07/25/2024 11:53:54 - INFO - __main__ - Step 6062: {'lr': 0.0004984110100125251, 'samples': 290976, 'steps': 6061, 'loss/train': 2.047508955001831} +07/25/2024 11:53:54 - INFO - __main__ - Step 6063: {'lr': 0.0004984104177901794, 'samples': 291024, 'steps': 6062, 'loss/train': 2.7428171634674072} +07/25/2024 11:53:54 - INFO - __main__ - Step 6064: {'lr': 0.0004984098254578446, 'samples': 291072, 'steps': 6063, 'loss/train': 2.3556435108184814} +07/25/2024 11:53:55 - INFO - __main__ - Step 6065: {'lr': 0.0004984092330155207, 'samples': 291120, 'steps': 6064, 'loss/train': 2.7445600032806396} +07/25/2024 11:53:55 - INFO - __main__ - Step 6066: {'lr': 0.0004984086404632084, 'samples': 291168, 'steps': 6065, 'loss/train': 1.375274896621704} +07/25/2024 11:53:55 - INFO - __main__ - Step 6067: {'lr': 0.0004984080478009076, 'samples': 291216, 'steps': 6066, 'loss/train': 1.8289883136749268} +07/25/2024 11:53:55 - INFO - __main__ - Step 6068: {'lr': 0.0004984074550286187, 'samples': 291264, 'steps': 6067, 'loss/train': 2.2771923542022705} +07/25/2024 11:53:56 - INFO - __main__ - Step 6069: {'lr': 0.0004984068621463419, 'samples': 291312, 'steps': 6068, 'loss/train': 2.508242130279541} +07/25/2024 11:53:56 - INFO - __main__ - Step 6070: {'lr': 0.0004984062691540775, 'samples': 291360, 'steps': 6069, 'loss/train': 2.1869964599609375} +07/25/2024 11:53:56 - INFO - __main__ - Step 6071: {'lr': 0.0004984056760518258, 'samples': 291408, 'steps': 6070, 'loss/train': 2.2236363887786865} +07/25/2024 11:53:57 - INFO - __main__ - Step 6072: {'lr': 0.0004984050828395869, 'samples': 291456, 'steps': 6071, 'loss/train': 2.202523708343506} +07/25/2024 11:53:57 - INFO - __main__ - Step 6073: {'lr': 0.0004984044895173614, 'samples': 291504, 'steps': 6072, 'loss/train': 2.527649402618408} +07/25/2024 11:53:57 - INFO - __main__ - Step 6074: {'lr': 0.0004984038960851492, 'samples': 291552, 'steps': 6073, 'loss/train': 2.1274819374084473} +07/25/2024 11:53:57 - INFO - __main__ - Step 6075: {'lr': 0.0004984033025429508, 'samples': 291600, 'steps': 6074, 'loss/train': 2.255983591079712} +07/25/2024 11:53:58 - INFO - __main__ - Step 6076: {'lr': 0.0004984027088907663, 'samples': 291648, 'steps': 6075, 'loss/train': 2.1290135383605957} +07/25/2024 11:53:58 - INFO - __main__ - Step 6077: {'lr': 0.0004984021151285961, 'samples': 291696, 'steps': 6076, 'loss/train': 3.2534072399139404} +07/25/2024 11:53:58 - INFO - __main__ - Step 6078: {'lr': 0.0004984015212564405, 'samples': 291744, 'steps': 6077, 'loss/train': 2.1504931449890137} +07/25/2024 11:53:59 - INFO - __main__ - Step 6079: {'lr': 0.0004984009272742995, 'samples': 291792, 'steps': 6078, 'loss/train': 2.3745334148406982} +07/25/2024 11:53:59 - INFO - __main__ - Step 6080: {'lr': 0.0004984003331821736, 'samples': 291840, 'steps': 6079, 'loss/train': 1.4507606029510498} +07/25/2024 11:53:59 - INFO - __main__ - Step 6081: {'lr': 0.000498399738980063, 'samples': 291888, 'steps': 6080, 'loss/train': 2.0688979625701904} +07/25/2024 11:53:59 - INFO - __main__ - Step 6082: {'lr': 0.0004983991446679679, 'samples': 291936, 'steps': 6081, 'loss/train': 1.6060997247695923} +07/25/2024 11:54:00 - INFO - __main__ - Step 6083: {'lr': 0.0004983985502458886, 'samples': 291984, 'steps': 6082, 'loss/train': 2.0780770778656006} +07/25/2024 11:54:00 - INFO - __main__ - Step 6084: {'lr': 0.0004983979557138255, 'samples': 292032, 'steps': 6083, 'loss/train': 2.097414970397949} +07/25/2024 11:54:00 - INFO - __main__ - Step 6085: {'lr': 0.0004983973610717786, 'samples': 292080, 'steps': 6084, 'loss/train': 2.4576661586761475} +07/25/2024 11:54:01 - INFO - __main__ - Step 6086: {'lr': 0.0004983967663197483, 'samples': 292128, 'steps': 6085, 'loss/train': 2.329864263534546} +07/25/2024 11:54:01 - INFO - __main__ - Step 6087: {'lr': 0.0004983961714577351, 'samples': 292176, 'steps': 6086, 'loss/train': 2.1841084957122803} +07/25/2024 11:54:01 - INFO - __main__ - Step 6088: {'lr': 0.0004983955764857388, 'samples': 292224, 'steps': 6087, 'loss/train': 2.430180311203003} +07/25/2024 11:54:01 - INFO - __main__ - Step 6089: {'lr': 0.00049839498140376, 'samples': 292272, 'steps': 6088, 'loss/train': 2.95733904838562} +07/25/2024 11:54:02 - INFO - __main__ - Step 6090: {'lr': 0.0004983943862117989, 'samples': 292320, 'steps': 6089, 'loss/train': 2.0111887454986572} +07/25/2024 11:54:02 - INFO - __main__ - Step 6091: {'lr': 0.0004983937909098557, 'samples': 292368, 'steps': 6090, 'loss/train': 2.126291275024414} +07/25/2024 11:54:02 - INFO - __main__ - Step 6092: {'lr': 0.0004983931954979306, 'samples': 292416, 'steps': 6091, 'loss/train': 1.9675301313400269} +07/25/2024 11:54:03 - INFO - __main__ - Step 6093: {'lr': 0.0004983925999760241, 'samples': 292464, 'steps': 6092, 'loss/train': 2.1899290084838867} +07/25/2024 11:54:03 - INFO - __main__ - Step 6094: {'lr': 0.0004983920043441362, 'samples': 292512, 'steps': 6093, 'loss/train': 2.01338791847229} +07/25/2024 11:54:03 - INFO - __main__ - Step 6095: {'lr': 0.0004983914086022673, 'samples': 292560, 'steps': 6094, 'loss/train': 2.0226075649261475} +07/25/2024 11:54:03 - INFO - __main__ - Step 6096: {'lr': 0.0004983908127504177, 'samples': 292608, 'steps': 6095, 'loss/train': 2.1593804359436035} +07/25/2024 11:54:04 - INFO - __main__ - Step 6097: {'lr': 0.0004983902167885875, 'samples': 292656, 'steps': 6096, 'loss/train': 2.238870620727539} +07/25/2024 11:54:04 - INFO - __main__ - Step 6098: {'lr': 0.0004983896207167771, 'samples': 292704, 'steps': 6097, 'loss/train': 1.5102440118789673} +07/25/2024 11:54:04 - INFO - __main__ - Step 6099: {'lr': 0.0004983890245349869, 'samples': 292752, 'steps': 6098, 'loss/train': 2.673556089401245} +07/25/2024 11:54:04 - INFO - __main__ - Step 6100: {'lr': 0.0004983884282432168, 'samples': 292800, 'steps': 6099, 'loss/train': 1.9652079343795776} +07/25/2024 11:54:05 - INFO - __main__ - Step 6101: {'lr': 0.0004983878318414674, 'samples': 292848, 'steps': 6100, 'loss/train': 2.5000219345092773} +07/25/2024 11:54:05 - INFO - __main__ - Step 6102: {'lr': 0.0004983872353297388, 'samples': 292896, 'steps': 6101, 'loss/train': 1.996846079826355} +07/25/2024 11:54:05 - INFO - __main__ - Step 6103: {'lr': 0.0004983866387080312, 'samples': 292944, 'steps': 6102, 'loss/train': 2.039306402206421} +07/25/2024 11:54:06 - INFO - __main__ - Step 6104: {'lr': 0.0004983860419763451, 'samples': 292992, 'steps': 6103, 'loss/train': 1.9175714254379272} +07/25/2024 11:54:06 - INFO - __main__ - Step 6105: {'lr': 0.0004983854451346806, 'samples': 293040, 'steps': 6104, 'loss/train': 2.127671957015991} +07/25/2024 11:54:06 - INFO - __main__ - Step 6106: {'lr': 0.0004983848481830378, 'samples': 293088, 'steps': 6105, 'loss/train': 1.785735845565796} +07/25/2024 11:54:06 - INFO - __main__ - Step 6107: {'lr': 0.0004983842511214173, 'samples': 293136, 'steps': 6106, 'loss/train': 1.93449866771698} +07/25/2024 11:54:07 - INFO - __main__ - Step 6108: {'lr': 0.0004983836539498192, 'samples': 293184, 'steps': 6107, 'loss/train': 1.9004729986190796} +07/25/2024 11:54:07 - INFO - __main__ - Step 6109: {'lr': 0.0004983830566682437, 'samples': 293232, 'steps': 6108, 'loss/train': 2.2965762615203857} +07/25/2024 11:54:07 - INFO - __main__ - Step 6110: {'lr': 0.0004983824592766912, 'samples': 293280, 'steps': 6109, 'loss/train': 1.8993768692016602} +07/25/2024 11:54:08 - INFO - __main__ - Step 6111: {'lr': 0.000498381861775162, 'samples': 293328, 'steps': 6110, 'loss/train': 2.8071389198303223} +07/25/2024 11:54:08 - INFO - __main__ - Step 6112: {'lr': 0.0004983812641636561, 'samples': 293376, 'steps': 6111, 'loss/train': 2.261505603790283} +07/25/2024 11:54:08 - INFO - __main__ - Step 6113: {'lr': 0.0004983806664421741, 'samples': 293424, 'steps': 6112, 'loss/train': 2.690063714981079} +07/25/2024 11:54:08 - INFO - __main__ - Step 6114: {'lr': 0.0004983800686107159, 'samples': 293472, 'steps': 6113, 'loss/train': 2.056894302368164} +07/25/2024 11:54:09 - INFO - __main__ - Step 6115: {'lr': 0.0004983794706692822, 'samples': 293520, 'steps': 6114, 'loss/train': 2.3669662475585938} +07/25/2024 11:54:09 - INFO - __main__ - Step 6116: {'lr': 0.0004983788726178729, 'samples': 293568, 'steps': 6115, 'loss/train': 2.752906560897827} +07/25/2024 11:54:09 - INFO - __main__ - Step 6117: {'lr': 0.0004983782744564884, 'samples': 293616, 'steps': 6116, 'loss/train': 2.252032995223999} +07/25/2024 11:54:10 - INFO - __main__ - Step 6118: {'lr': 0.000498377676185129, 'samples': 293664, 'steps': 6117, 'loss/train': 2.147562026977539} +07/25/2024 11:54:10 - INFO - __main__ - Step 6119: {'lr': 0.0004983770778037948, 'samples': 293712, 'steps': 6118, 'loss/train': 1.481270670890808} +07/25/2024 11:54:10 - INFO - __main__ - Step 6120: {'lr': 0.0004983764793124863, 'samples': 293760, 'steps': 6119, 'loss/train': 2.377204418182373} +07/25/2024 11:54:10 - INFO - __main__ - Step 6121: {'lr': 0.0004983758807112036, 'samples': 293808, 'steps': 6120, 'loss/train': 1.8682758808135986} +07/25/2024 11:54:11 - INFO - __main__ - Step 6122: {'lr': 0.0004983752819999471, 'samples': 293856, 'steps': 6121, 'loss/train': 0.31017550826072693} +07/25/2024 11:54:11 - INFO - __main__ - Step 6123: {'lr': 0.0004983746831787169, 'samples': 293904, 'steps': 6122, 'loss/train': 2.1910324096679688} +07/25/2024 11:54:11 - INFO - __main__ - Step 6124: {'lr': 0.0004983740842475135, 'samples': 293952, 'steps': 6123, 'loss/train': 1.600382924079895} +07/25/2024 11:54:12 - INFO - __main__ - Step 6125: {'lr': 0.0004983734852063369, 'samples': 294000, 'steps': 6124, 'loss/train': 2.5318915843963623} +07/25/2024 11:54:12 - INFO - __main__ - Step 6126: {'lr': 0.0004983728860551874, 'samples': 294048, 'steps': 6125, 'loss/train': 1.6246639490127563} +07/25/2024 11:54:12 - INFO - __main__ - Step 6127: {'lr': 0.0004983722867940655, 'samples': 294096, 'steps': 6126, 'loss/train': 1.8689618110656738} +07/25/2024 11:54:12 - INFO - __main__ - Step 6128: {'lr': 0.0004983716874229713, 'samples': 294144, 'steps': 6127, 'loss/train': 2.743826150894165} +07/25/2024 11:54:13 - INFO - __main__ - Step 6129: {'lr': 0.0004983710879419049, 'samples': 294192, 'steps': 6128, 'loss/train': 2.2067782878875732} +07/25/2024 11:54:13 - INFO - __main__ - Step 6130: {'lr': 0.0004983704883508668, 'samples': 294240, 'steps': 6129, 'loss/train': 2.6932945251464844} +07/25/2024 11:54:13 - INFO - __main__ - Step 6131: {'lr': 0.0004983698886498573, 'samples': 294288, 'steps': 6130, 'loss/train': 1.6834455728530884} +07/25/2024 11:54:14 - INFO - __main__ - Step 6132: {'lr': 0.0004983692888388766, 'samples': 294336, 'steps': 6131, 'loss/train': 1.9191325902938843} +07/25/2024 11:54:14 - INFO - __main__ - Step 6133: {'lr': 0.0004983686889179249, 'samples': 294384, 'steps': 6132, 'loss/train': 2.8832881450653076} +07/25/2024 11:54:14 - INFO - __main__ - Step 6134: {'lr': 0.0004983680888870025, 'samples': 294432, 'steps': 6133, 'loss/train': 2.751025676727295} +07/25/2024 11:54:14 - INFO - __main__ - Step 6135: {'lr': 0.0004983674887461095, 'samples': 294480, 'steps': 6134, 'loss/train': 2.1603684425354004} +07/25/2024 11:54:15 - INFO - __main__ - Step 6136: {'lr': 0.0004983668884952465, 'samples': 294528, 'steps': 6135, 'loss/train': 1.9955965280532837} +07/25/2024 11:54:15 - INFO - __main__ - Step 6137: {'lr': 0.0004983662881344137, 'samples': 294576, 'steps': 6136, 'loss/train': 2.371683120727539} +07/25/2024 11:54:15 - INFO - __main__ - Step 6138: {'lr': 0.0004983656876636111, 'samples': 294624, 'steps': 6137, 'loss/train': 2.818549633026123} +07/25/2024 11:54:16 - INFO - __main__ - Step 6139: {'lr': 0.0004983650870828393, 'samples': 294672, 'steps': 6138, 'loss/train': 1.0891554355621338} +07/25/2024 11:54:16 - INFO - __main__ - Step 6140: {'lr': 0.0004983644863920982, 'samples': 294720, 'steps': 6139, 'loss/train': 2.565718650817871} +07/25/2024 11:54:16 - INFO - __main__ - Step 6141: {'lr': 0.0004983638855913883, 'samples': 294768, 'steps': 6140, 'loss/train': 2.5050854682922363} +07/25/2024 11:54:16 - INFO - __main__ - Step 6142: {'lr': 0.0004983632846807099, 'samples': 294816, 'steps': 6141, 'loss/train': 2.0832631587982178} +07/25/2024 11:54:17 - INFO - __main__ - Step 6143: {'lr': 0.0004983626836600633, 'samples': 294864, 'steps': 6142, 'loss/train': 1.9037482738494873} +07/25/2024 11:54:17 - INFO - __main__ - Step 6144: {'lr': 0.0004983620825294485, 'samples': 294912, 'steps': 6143, 'loss/train': 2.058779716491699} +07/25/2024 11:54:17 - INFO - __main__ - Step 6145: {'lr': 0.0004983614812888659, 'samples': 294960, 'steps': 6144, 'loss/train': 1.9933090209960938} +07/25/2024 11:54:18 - INFO - __main__ - Step 6146: {'lr': 0.0004983608799383159, 'samples': 295008, 'steps': 6145, 'loss/train': 0.27054330706596375} +07/25/2024 11:54:18 - INFO - __main__ - Step 6147: {'lr': 0.0004983602784777985, 'samples': 295056, 'steps': 6146, 'loss/train': 1.8278967142105103} +07/25/2024 11:54:18 - INFO - __main__ - Step 6148: {'lr': 0.0004983596769073144, 'samples': 295104, 'steps': 6147, 'loss/train': 1.5790958404541016} +07/25/2024 11:54:18 - INFO - __main__ - Step 6149: {'lr': 0.0004983590752268633, 'samples': 295152, 'steps': 6148, 'loss/train': 2.306190013885498} +07/25/2024 11:54:19 - INFO - __main__ - Step 6150: {'lr': 0.0004983584734364459, 'samples': 295200, 'steps': 6149, 'loss/train': 1.7959108352661133} +07/25/2024 11:54:19 - INFO - __main__ - Step 6151: {'lr': 0.0004983578715360623, 'samples': 295248, 'steps': 6150, 'loss/train': 1.8558591604232788} +07/25/2024 11:54:19 - INFO - __main__ - Step 6152: {'lr': 0.0004983572695257129, 'samples': 295296, 'steps': 6151, 'loss/train': 1.6395766735076904} +07/25/2024 11:54:20 - INFO - __main__ - Step 6153: {'lr': 0.0004983566674053978, 'samples': 295344, 'steps': 6152, 'loss/train': 2.0065884590148926} +07/25/2024 11:54:20 - INFO - __main__ - Step 6154: {'lr': 0.0004983560651751173, 'samples': 295392, 'steps': 6153, 'loss/train': 2.2934141159057617} +07/25/2024 11:54:20 - INFO - __main__ - Step 6155: {'lr': 0.0004983554628348718, 'samples': 295440, 'steps': 6154, 'loss/train': 1.5933281183242798} +07/25/2024 11:54:20 - INFO - __main__ - Step 6156: {'lr': 0.0004983548603846613, 'samples': 295488, 'steps': 6155, 'loss/train': 3.0424346923828125} +07/25/2024 11:54:21 - INFO - __main__ - Step 6157: {'lr': 0.0004983542578244863, 'samples': 295536, 'steps': 6156, 'loss/train': 2.2448313236236572} +07/25/2024 11:54:21 - INFO - __main__ - Step 6158: {'lr': 0.0004983536551543469, 'samples': 295584, 'steps': 6157, 'loss/train': 2.4156651496887207} +07/25/2024 11:54:21 - INFO - __main__ - Step 6159: {'lr': 0.0004983530523742436, 'samples': 295632, 'steps': 6158, 'loss/train': 2.5717148780822754} +07/25/2024 11:54:22 - INFO - __main__ - Step 6160: {'lr': 0.0004983524494841764, 'samples': 295680, 'steps': 6159, 'loss/train': 2.082638740539551} +07/25/2024 11:54:22 - INFO - __main__ - Step 6161: {'lr': 0.0004983518464841459, 'samples': 295728, 'steps': 6160, 'loss/train': 2.2810542583465576} +07/25/2024 11:54:22 - INFO - __main__ - Step 6162: {'lr': 0.000498351243374152, 'samples': 295776, 'steps': 6161, 'loss/train': 2.558328628540039} +07/25/2024 11:54:22 - INFO - __main__ - Step 6163: {'lr': 0.0004983506401541951, 'samples': 295824, 'steps': 6162, 'loss/train': 1.5387792587280273} +07/25/2024 11:54:23 - INFO - __main__ - Step 6164: {'lr': 0.0004983500368242756, 'samples': 295872, 'steps': 6163, 'loss/train': 1.788033127784729} +07/25/2024 11:54:23 - INFO - __main__ - Step 6165: {'lr': 0.0004983494333843936, 'samples': 295920, 'steps': 6164, 'loss/train': 2.0780341625213623} +07/25/2024 11:54:23 - INFO - __main__ - Step 6166: {'lr': 0.0004983488298345494, 'samples': 295968, 'steps': 6165, 'loss/train': 2.401576042175293} +07/25/2024 11:54:24 - INFO - __main__ - Step 6167: {'lr': 0.0004983482261747435, 'samples': 296016, 'steps': 6166, 'loss/train': 1.9422798156738281} +07/25/2024 11:54:24 - INFO - __main__ - Step 6168: {'lr': 0.0004983476224049757, 'samples': 296064, 'steps': 6167, 'loss/train': 2.279423952102661} +07/25/2024 11:54:24 - INFO - __main__ - Step 6169: {'lr': 0.0004983470185252467, 'samples': 296112, 'steps': 6168, 'loss/train': 2.2768757343292236} +07/25/2024 11:54:24 - INFO - __main__ - Step 6170: {'lr': 0.0004983464145355565, 'samples': 296160, 'steps': 6169, 'loss/train': 0.3190339505672455} +07/25/2024 11:54:25 - INFO - __main__ - Step 6171: {'lr': 0.0004983458104359054, 'samples': 296208, 'steps': 6170, 'loss/train': 2.6655490398406982} +07/25/2024 11:54:25 - INFO - __main__ - Step 6172: {'lr': 0.0004983452062262938, 'samples': 296256, 'steps': 6171, 'loss/train': 1.68613600730896} +07/25/2024 11:54:25 - INFO - __main__ - Step 6173: {'lr': 0.000498344601906722, 'samples': 296304, 'steps': 6172, 'loss/train': 2.6160454750061035} +07/25/2024 11:54:26 - INFO - __main__ - Step 6174: {'lr': 0.0004983439974771901, 'samples': 296352, 'steps': 6173, 'loss/train': 2.21620774269104} +07/25/2024 11:54:26 - INFO - __main__ - Step 6175: {'lr': 0.0004983433929376985, 'samples': 296400, 'steps': 6174, 'loss/train': 1.90244460105896} +07/25/2024 11:54:26 - INFO - __main__ - Step 6176: {'lr': 0.0004983427882882473, 'samples': 296448, 'steps': 6175, 'loss/train': 2.49914288520813} +07/25/2024 11:54:26 - INFO - __main__ - Step 6177: {'lr': 0.0004983421835288369, 'samples': 296496, 'steps': 6176, 'loss/train': 2.3868772983551025} +07/25/2024 11:54:27 - INFO - __main__ - Step 6178: {'lr': 0.0004983415786594676, 'samples': 296544, 'steps': 6177, 'loss/train': 2.2440381050109863} +07/25/2024 11:54:27 - INFO - __main__ - Step 6179: {'lr': 0.0004983409736801396, 'samples': 296592, 'steps': 6178, 'loss/train': 1.7521724700927734} +07/25/2024 11:54:27 - INFO - __main__ - Step 6180: {'lr': 0.000498340368590853, 'samples': 296640, 'steps': 6179, 'loss/train': 2.995769500732422} +07/25/2024 11:54:28 - INFO - __main__ - Step 6181: {'lr': 0.0004983397633916085, 'samples': 296688, 'steps': 6180, 'loss/train': 1.3375146389007568} +07/25/2024 11:54:28 - INFO - __main__ - Step 6182: {'lr': 0.0004983391580824059, 'samples': 296736, 'steps': 6181, 'loss/train': 2.227250814437866} +07/25/2024 11:54:28 - INFO - __main__ - Step 6183: {'lr': 0.0004983385526632458, 'samples': 296784, 'steps': 6182, 'loss/train': 2.169837713241577} +07/25/2024 11:54:28 - INFO - __main__ - Step 6184: {'lr': 0.0004983379471341284, 'samples': 296832, 'steps': 6183, 'loss/train': 2.2283334732055664} +07/25/2024 11:54:29 - INFO - __main__ - Step 6185: {'lr': 0.0004983373414950538, 'samples': 296880, 'steps': 6184, 'loss/train': 2.736940383911133} +07/25/2024 11:54:29 - INFO - __main__ - Step 6186: {'lr': 0.0004983367357460224, 'samples': 296928, 'steps': 6185, 'loss/train': 1.9422968626022339} +07/25/2024 11:54:29 - INFO - __main__ - Step 6187: {'lr': 0.0004983361298870344, 'samples': 296976, 'steps': 6186, 'loss/train': 2.133193016052246} +07/25/2024 11:54:29 - INFO - __main__ - Step 6188: {'lr': 0.0004983355239180903, 'samples': 297024, 'steps': 6187, 'loss/train': 1.8842488527297974} +07/25/2024 11:54:30 - INFO - __main__ - Step 6189: {'lr': 0.0004983349178391901, 'samples': 297072, 'steps': 6188, 'loss/train': 2.0180671215057373} +07/25/2024 11:54:30 - INFO - __main__ - Step 6190: {'lr': 0.000498334311650334, 'samples': 297120, 'steps': 6189, 'loss/train': 1.737313151359558} +07/25/2024 11:54:30 - INFO - __main__ - Step 6191: {'lr': 0.0004983337053515226, 'samples': 297168, 'steps': 6190, 'loss/train': 2.128297805786133} +07/25/2024 11:54:31 - INFO - __main__ - Step 6192: {'lr': 0.000498333098942756, 'samples': 297216, 'steps': 6191, 'loss/train': 2.3500027656555176} +07/25/2024 11:54:31 - INFO - __main__ - Step 6193: {'lr': 0.0004983324924240344, 'samples': 297264, 'steps': 6192, 'loss/train': 2.4744064807891846} +07/25/2024 11:54:31 - INFO - __main__ - Step 6194: {'lr': 0.0004983318857953581, 'samples': 297312, 'steps': 6193, 'loss/train': 0.2796185612678528} +07/25/2024 11:54:31 - INFO - __main__ - Step 6195: {'lr': 0.0004983312790567275, 'samples': 297360, 'steps': 6194, 'loss/train': 2.4929163455963135} +07/25/2024 11:54:32 - INFO - __main__ - Step 6196: {'lr': 0.0004983306722081426, 'samples': 297408, 'steps': 6195, 'loss/train': 1.9698725938796997} +07/25/2024 11:54:32 - INFO - __main__ - Step 6197: {'lr': 0.000498330065249604, 'samples': 297456, 'steps': 6196, 'loss/train': 2.034773111343384} +07/25/2024 11:54:32 - INFO - __main__ - Step 6198: {'lr': 0.0004983294581811118, 'samples': 297504, 'steps': 6197, 'loss/train': 2.1681761741638184} +07/25/2024 11:54:33 - INFO - __main__ - Step 6199: {'lr': 0.0004983288510026663, 'samples': 297552, 'steps': 6198, 'loss/train': 2.4689977169036865} +07/25/2024 11:54:33 - INFO - __main__ - Step 6200: {'lr': 0.0004983282437142677, 'samples': 297600, 'steps': 6199, 'loss/train': 1.8336894512176514} +07/25/2024 11:54:33 - INFO - __main__ - Step 6201: {'lr': 0.0004983276363159162, 'samples': 297648, 'steps': 6200, 'loss/train': 1.9934087991714478} +07/25/2024 11:54:33 - INFO - __main__ - Step 6202: {'lr': 0.0004983270288076122, 'samples': 297696, 'steps': 6201, 'loss/train': 2.2854397296905518} +07/25/2024 11:54:34 - INFO - __main__ - Step 6203: {'lr': 0.0004983264211893561, 'samples': 297744, 'steps': 6202, 'loss/train': 2.517954111099243} +07/25/2024 11:54:34 - INFO - __main__ - Step 6204: {'lr': 0.0004983258134611479, 'samples': 297792, 'steps': 6203, 'loss/train': 2.833455801010132} +07/25/2024 11:54:34 - INFO - __main__ - Step 6205: {'lr': 0.000498325205622988, 'samples': 297840, 'steps': 6204, 'loss/train': 1.3913524150848389} +07/25/2024 11:54:35 - INFO - __main__ - Step 6206: {'lr': 0.0004983245976748767, 'samples': 297888, 'steps': 6205, 'loss/train': 2.474541664123535} +07/25/2024 11:54:35 - INFO - __main__ - Step 6207: {'lr': 0.0004983239896168142, 'samples': 297936, 'steps': 6206, 'loss/train': 1.9466400146484375} +07/25/2024 11:54:35 - INFO - __main__ - Step 6208: {'lr': 0.0004983233814488008, 'samples': 297984, 'steps': 6207, 'loss/train': 2.068106174468994} +07/25/2024 11:54:35 - INFO - __main__ - Step 6209: {'lr': 0.0004983227731708368, 'samples': 298032, 'steps': 6208, 'loss/train': 1.7851382493972778} +07/25/2024 11:54:36 - INFO - __main__ - Step 6210: {'lr': 0.0004983221647829224, 'samples': 298080, 'steps': 6209, 'loss/train': 1.8802452087402344} +07/25/2024 11:54:36 - INFO - __main__ - Step 6211: {'lr': 0.0004983215562850579, 'samples': 298128, 'steps': 6210, 'loss/train': 2.4347517490386963} +07/25/2024 11:54:36 - INFO - __main__ - Step 6212: {'lr': 0.0004983209476772435, 'samples': 298176, 'steps': 6211, 'loss/train': 1.7361458539962769} +07/25/2024 11:54:37 - INFO - __main__ - Step 6213: {'lr': 0.0004983203389594796, 'samples': 298224, 'steps': 6212, 'loss/train': 2.2397842407226562} +07/25/2024 11:54:37 - INFO - __main__ - Step 6214: {'lr': 0.0004983197301317665, 'samples': 298272, 'steps': 6213, 'loss/train': 1.930147409439087} +07/25/2024 11:54:37 - INFO - __main__ - Step 6215: {'lr': 0.0004983191211941042, 'samples': 298320, 'steps': 6214, 'loss/train': 2.056156635284424} +07/25/2024 11:54:37 - INFO - __main__ - Step 6216: {'lr': 0.0004983185121464933, 'samples': 298368, 'steps': 6215, 'loss/train': 1.9186015129089355} +07/25/2024 11:54:38 - INFO - __main__ - Step 6217: {'lr': 0.0004983179029889339, 'samples': 298416, 'steps': 6216, 'loss/train': 2.4227473735809326} +07/25/2024 11:54:38 - INFO - __main__ - Step 6218: {'lr': 0.0004983172937214263, 'samples': 298464, 'steps': 6217, 'loss/train': 0.18929235637187958} +07/25/2024 11:54:38 - INFO - __main__ - Step 6219: {'lr': 0.0004983166843439707, 'samples': 298512, 'steps': 6218, 'loss/train': 1.882761001586914} +07/25/2024 11:54:39 - INFO - __main__ - Step 6220: {'lr': 0.0004983160748565675, 'samples': 298560, 'steps': 6219, 'loss/train': 2.256957769393921} +07/25/2024 11:54:39 - INFO - __main__ - Step 6221: {'lr': 0.0004983154652592168, 'samples': 298608, 'steps': 6220, 'loss/train': 2.386732578277588} +07/25/2024 11:54:39 - INFO - __main__ - Step 6222: {'lr': 0.000498314855551919, 'samples': 298656, 'steps': 6221, 'loss/train': 1.652479648590088} +07/25/2024 11:54:39 - INFO - __main__ - Step 6223: {'lr': 0.0004983142457346744, 'samples': 298704, 'steps': 6222, 'loss/train': 2.295923948287964} +07/25/2024 11:54:40 - INFO - __main__ - Step 6224: {'lr': 0.0004983136358074833, 'samples': 298752, 'steps': 6223, 'loss/train': 2.391564130783081} +07/25/2024 11:54:40 - INFO - __main__ - Step 6225: {'lr': 0.0004983130257703456, 'samples': 298800, 'steps': 6224, 'loss/train': 2.0755603313446045} +07/25/2024 11:54:40 - INFO - __main__ - Step 6226: {'lr': 0.000498312415623262, 'samples': 298848, 'steps': 6225, 'loss/train': 2.021958827972412} +07/25/2024 11:54:41 - INFO - __main__ - Step 6227: {'lr': 0.0004983118053662327, 'samples': 298896, 'steps': 6226, 'loss/train': 1.9102903604507446} +07/25/2024 11:54:41 - INFO - __main__ - Step 6228: {'lr': 0.0004983111949992578, 'samples': 298944, 'steps': 6227, 'loss/train': 2.018383026123047} +07/25/2024 11:54:41 - INFO - __main__ - Step 6229: {'lr': 0.0004983105845223377, 'samples': 298992, 'steps': 6228, 'loss/train': 1.7087076902389526} +07/25/2024 11:54:41 - INFO - __main__ - Step 6230: {'lr': 0.0004983099739354726, 'samples': 299040, 'steps': 6229, 'loss/train': 2.2642552852630615} +07/25/2024 11:54:42 - INFO - __main__ - Step 6231: {'lr': 0.0004983093632386628, 'samples': 299088, 'steps': 6230, 'loss/train': 2.357893466949463} +07/25/2024 11:54:42 - INFO - __main__ - Step 6232: {'lr': 0.0004983087524319086, 'samples': 299136, 'steps': 6231, 'loss/train': 1.0827540159225464} +07/25/2024 11:54:42 - INFO - __main__ - Step 6233: {'lr': 0.0004983081415152102, 'samples': 299184, 'steps': 6232, 'loss/train': 2.5274438858032227} +07/25/2024 11:54:43 - INFO - __main__ - Step 6234: {'lr': 0.0004983075304885679, 'samples': 299232, 'steps': 6233, 'loss/train': 2.3282840251922607} +07/25/2024 11:54:43 - INFO - __main__ - Step 6235: {'lr': 0.000498306919351982, 'samples': 299280, 'steps': 6234, 'loss/train': 1.7389788627624512} +07/25/2024 11:54:43 - INFO - __main__ - Step 6236: {'lr': 0.0004983063081054528, 'samples': 299328, 'steps': 6235, 'loss/train': 1.309523582458496} +07/25/2024 11:54:43 - INFO - __main__ - Step 6237: {'lr': 0.0004983056967489806, 'samples': 299376, 'steps': 6236, 'loss/train': 2.649482250213623} +07/25/2024 11:54:44 - INFO - __main__ - Step 6238: {'lr': 0.0004983050852825655, 'samples': 299424, 'steps': 6237, 'loss/train': 1.6904296875} +07/25/2024 11:54:44 - INFO - __main__ - Step 6239: {'lr': 0.0004983044737062078, 'samples': 299472, 'steps': 6238, 'loss/train': 1.8961007595062256} +07/25/2024 11:54:44 - INFO - __main__ - Step 6240: {'lr': 0.0004983038620199079, 'samples': 299520, 'steps': 6239, 'loss/train': 2.1141231060028076} +07/25/2024 11:54:45 - INFO - __main__ - Step 6241: {'lr': 0.000498303250223666, 'samples': 299568, 'steps': 6240, 'loss/train': 1.5862102508544922} +07/25/2024 11:54:45 - INFO - __main__ - Step 6242: {'lr': 0.0004983026383174824, 'samples': 299616, 'steps': 6241, 'loss/train': 0.19409558176994324} +07/25/2024 11:54:45 - INFO - __main__ - Step 6243: {'lr': 0.0004983020263013573, 'samples': 299664, 'steps': 6242, 'loss/train': 2.69207763671875} +07/25/2024 11:54:45 - INFO - __main__ - Step 6244: {'lr': 0.0004983014141752911, 'samples': 299712, 'steps': 6243, 'loss/train': 2.5633504390716553} +07/25/2024 11:54:46 - INFO - __main__ - Step 6245: {'lr': 0.0004983008019392839, 'samples': 299760, 'steps': 6244, 'loss/train': 2.5192182064056396} +07/25/2024 11:54:46 - INFO - __main__ - Step 6246: {'lr': 0.0004983001895933362, 'samples': 299808, 'steps': 6245, 'loss/train': 2.062473773956299} +07/25/2024 11:54:46 - INFO - __main__ - Step 6247: {'lr': 0.0004982995771374479, 'samples': 299856, 'steps': 6246, 'loss/train': 2.0860657691955566} +07/25/2024 11:54:47 - INFO - __main__ - Step 6248: {'lr': 0.0004982989645716196, 'samples': 299904, 'steps': 6247, 'loss/train': 2.0392415523529053} +07/25/2024 11:54:47 - INFO - __main__ - Step 6249: {'lr': 0.0004982983518958516, 'samples': 299952, 'steps': 6248, 'loss/train': 2.3374955654144287} +07/25/2024 11:54:47 - INFO - __main__ - Step 6250: {'lr': 0.0004982977391101439, 'samples': 300000, 'steps': 6249, 'loss/train': 2.561762809753418} +07/25/2024 11:54:47 - INFO - __main__ - Step 6251: {'lr': 0.0004982971262144971, 'samples': 300048, 'steps': 6250, 'loss/train': 1.7877376079559326} +07/25/2024 11:54:48 - INFO - __main__ - Step 6252: {'lr': 0.0004982965132089111, 'samples': 300096, 'steps': 6251, 'loss/train': 1.6974996328353882} +07/25/2024 11:54:48 - INFO - __main__ - Step 6253: {'lr': 0.0004982959000933865, 'samples': 300144, 'steps': 6252, 'loss/train': 2.4011387825012207} +07/25/2024 11:54:48 - INFO - __main__ - Step 6254: {'lr': 0.0004982952868679232, 'samples': 300192, 'steps': 6253, 'loss/train': 1.9511895179748535} +07/25/2024 11:54:49 - INFO - __main__ - Step 6255: {'lr': 0.0004982946735325219, 'samples': 300240, 'steps': 6254, 'loss/train': 1.8939603567123413} +07/25/2024 11:54:49 - INFO - __main__ - Step 6256: {'lr': 0.0004982940600871827, 'samples': 300288, 'steps': 6255, 'loss/train': 2.015437602996826} +07/25/2024 11:54:49 - INFO - __main__ - Step 6257: {'lr': 0.0004982934465319058, 'samples': 300336, 'steps': 6256, 'loss/train': 2.162649631500244} +07/25/2024 11:54:49 - INFO - __main__ - Step 6258: {'lr': 0.0004982928328666914, 'samples': 300384, 'steps': 6257, 'loss/train': 1.5912035703659058} +07/25/2024 11:54:50 - INFO - __main__ - Step 6259: {'lr': 0.0004982922190915401, 'samples': 300432, 'steps': 6258, 'loss/train': 2.2014169692993164} +07/25/2024 11:54:50 - INFO - __main__ - Step 6260: {'lr': 0.0004982916052064519, 'samples': 300480, 'steps': 6259, 'loss/train': 1.3474528789520264} +07/25/2024 11:54:50 - INFO - __main__ - Step 6261: {'lr': 0.000498290991211427, 'samples': 300528, 'steps': 6260, 'loss/train': 2.2184622287750244} +07/25/2024 11:54:51 - INFO - __main__ - Step 6262: {'lr': 0.0004982903771064659, 'samples': 300576, 'steps': 6261, 'loss/train': 1.7158864736557007} +07/25/2024 11:54:51 - INFO - __main__ - Step 6263: {'lr': 0.0004982897628915689, 'samples': 300624, 'steps': 6262, 'loss/train': 1.9768643379211426} +07/25/2024 11:54:51 - INFO - __main__ - Step 6264: {'lr': 0.000498289148566736, 'samples': 300672, 'steps': 6263, 'loss/train': 1.702941656112671} +07/25/2024 11:54:51 - INFO - __main__ - Step 6265: {'lr': 0.0004982885341319677, 'samples': 300720, 'steps': 6264, 'loss/train': 2.1886074542999268} +07/25/2024 11:54:52 - INFO - __main__ - Step 6266: {'lr': 0.000498287919587264, 'samples': 300768, 'steps': 6265, 'loss/train': 0.24306641519069672} +07/25/2024 11:54:52 - INFO - __main__ - Step 6267: {'lr': 0.0004982873049326256, 'samples': 300816, 'steps': 6266, 'loss/train': 2.4099411964416504} +07/25/2024 11:54:52 - INFO - __main__ - Step 6268: {'lr': 0.0004982866901680524, 'samples': 300864, 'steps': 6267, 'loss/train': 2.651108980178833} +07/25/2024 11:54:52 - INFO - __main__ - Step 6269: {'lr': 0.000498286075293545, 'samples': 300912, 'steps': 6268, 'loss/train': 2.5843374729156494} +07/25/2024 11:54:53 - INFO - __main__ - Step 6270: {'lr': 0.0004982854603091032, 'samples': 300960, 'steps': 6269, 'loss/train': 2.127983331680298} +07/25/2024 11:54:53 - INFO - __main__ - Step 6271: {'lr': 0.0004982848452147277, 'samples': 301008, 'steps': 6270, 'loss/train': 2.0737674236297607} +07/25/2024 11:54:53 - INFO - __main__ - Step 6272: {'lr': 0.0004982842300104187, 'samples': 301056, 'steps': 6271, 'loss/train': 2.3584139347076416} +07/25/2024 11:54:54 - INFO - __main__ - Step 6273: {'lr': 0.0004982836146961763, 'samples': 301104, 'steps': 6272, 'loss/train': 2.391892671585083} +07/25/2024 11:54:54 - INFO - __main__ - Step 6274: {'lr': 0.0004982829992720009, 'samples': 301152, 'steps': 6273, 'loss/train': 2.9700844287872314} +07/25/2024 11:54:54 - INFO - __main__ - Step 6275: {'lr': 0.0004982823837378928, 'samples': 301200, 'steps': 6274, 'loss/train': 2.0021355152130127} +07/25/2024 11:54:54 - INFO - __main__ - Step 6276: {'lr': 0.0004982817680938521, 'samples': 301248, 'steps': 6275, 'loss/train': 2.826789140701294} +07/25/2024 11:54:55 - INFO - __main__ - Step 6277: {'lr': 0.0004982811523398793, 'samples': 301296, 'steps': 6276, 'loss/train': 2.3970510959625244} +07/25/2024 11:54:55 - INFO - __main__ - Step 6278: {'lr': 0.0004982805364759745, 'samples': 301344, 'steps': 6277, 'loss/train': 1.6300885677337646} +07/25/2024 11:54:55 - INFO - __main__ - Step 6279: {'lr': 0.0004982799205021381, 'samples': 301392, 'steps': 6278, 'loss/train': 2.2240078449249268} +07/25/2024 11:54:56 - INFO - __main__ - Step 6280: {'lr': 0.0004982793044183702, 'samples': 301440, 'steps': 6279, 'loss/train': 2.113370418548584} +07/25/2024 11:54:56 - INFO - __main__ - Step 6281: {'lr': 0.0004982786882246713, 'samples': 301488, 'steps': 6280, 'loss/train': 2.551588535308838} +07/25/2024 11:54:56 - INFO - __main__ - Step 6282: {'lr': 0.0004982780719210415, 'samples': 301536, 'steps': 6281, 'loss/train': 1.7068290710449219} +07/25/2024 11:54:56 - INFO - __main__ - Step 6283: {'lr': 0.000498277455507481, 'samples': 301584, 'steps': 6282, 'loss/train': 2.0771377086639404} +07/25/2024 11:54:57 - INFO - __main__ - Step 6284: {'lr': 0.0004982768389839903, 'samples': 301632, 'steps': 6283, 'loss/train': 2.2914698123931885} +07/25/2024 11:54:57 - INFO - __main__ - Step 6285: {'lr': 0.0004982762223505697, 'samples': 301680, 'steps': 6284, 'loss/train': 2.7086479663848877} +07/25/2024 11:54:57 - INFO - __main__ - Step 6286: {'lr': 0.0004982756056072192, 'samples': 301728, 'steps': 6285, 'loss/train': 2.678985834121704} +07/25/2024 11:54:58 - INFO - __main__ - Step 6287: {'lr': 0.0004982749887539392, 'samples': 301776, 'steps': 6286, 'loss/train': 1.51541006565094} +07/25/2024 11:54:58 - INFO - __main__ - Step 6288: {'lr': 0.0004982743717907301, 'samples': 301824, 'steps': 6287, 'loss/train': 1.412198781967163} +07/25/2024 11:54:58 - INFO - __main__ - Step 6289: {'lr': 0.0004982737547175921, 'samples': 301872, 'steps': 6288, 'loss/train': 2.227374315261841} +07/25/2024 11:54:58 - INFO - __main__ - Step 6290: {'lr': 0.0004982731375345253, 'samples': 301920, 'steps': 6289, 'loss/train': 2.2024433612823486} +07/25/2024 11:54:59 - INFO - __main__ - Step 6291: {'lr': 0.0004982725202415301, 'samples': 301968, 'steps': 6290, 'loss/train': 2.6667377948760986} +07/25/2024 11:54:59 - INFO - __main__ - Step 6292: {'lr': 0.0004982719028386068, 'samples': 302016, 'steps': 6291, 'loss/train': 2.2800352573394775} +07/25/2024 11:54:59 - INFO - __main__ - Step 6293: {'lr': 0.0004982712853257558, 'samples': 302064, 'steps': 6292, 'loss/train': 2.9475717544555664} +07/25/2024 11:55:00 - INFO - __main__ - Step 6294: {'lr': 0.0004982706677029771, 'samples': 302112, 'steps': 6293, 'loss/train': 2.008368968963623} +07/25/2024 11:55:00 - INFO - __main__ - Step 6295: {'lr': 0.0004982700499702712, 'samples': 302160, 'steps': 6294, 'loss/train': 2.202986478805542} +07/25/2024 11:55:00 - INFO - __main__ - Step 6296: {'lr': 0.0004982694321276381, 'samples': 302208, 'steps': 6295, 'loss/train': 2.4203531742095947} +07/25/2024 11:55:00 - INFO - __main__ - Step 6297: {'lr': 0.0004982688141750784, 'samples': 302256, 'steps': 6296, 'loss/train': 1.9400606155395508} +07/25/2024 11:55:01 - INFO - __main__ - Step 6298: {'lr': 0.0004982681961125923, 'samples': 302304, 'steps': 6297, 'loss/train': 2.230621576309204} +07/25/2024 11:55:01 - INFO - __main__ - Step 6299: {'lr': 0.0004982675779401798, 'samples': 302352, 'steps': 6298, 'loss/train': 1.7738969326019287} +07/25/2024 11:55:01 - INFO - __main__ - Step 6300: {'lr': 0.0004982669596578415, 'samples': 302400, 'steps': 6299, 'loss/train': 2.177952289581299} +07/25/2024 11:55:02 - INFO - __main__ - Step 6301: {'lr': 0.0004982663412655775, 'samples': 302448, 'steps': 6300, 'loss/train': 2.004483222961426} +07/25/2024 11:55:02 - INFO - __main__ - Step 6302: {'lr': 0.0004982657227633881, 'samples': 302496, 'steps': 6301, 'loss/train': 1.6239054203033447} +07/25/2024 11:55:02 - INFO - __main__ - Step 6303: {'lr': 0.0004982651041512737, 'samples': 302544, 'steps': 6302, 'loss/train': 2.081238269805908} +07/25/2024 11:55:02 - INFO - __main__ - Step 6304: {'lr': 0.0004982644854292344, 'samples': 302592, 'steps': 6303, 'loss/train': 2.499408721923828} +07/25/2024 11:55:03 - INFO - __main__ - Step 6305: {'lr': 0.0004982638665972705, 'samples': 302640, 'steps': 6304, 'loss/train': 2.4870612621307373} +07/25/2024 11:55:03 - INFO - __main__ - Step 6306: {'lr': 0.0004982632476553823, 'samples': 302688, 'steps': 6305, 'loss/train': 1.775083065032959} +07/25/2024 11:55:03 - INFO - __main__ - Step 6307: {'lr': 0.0004982626286035702, 'samples': 302736, 'steps': 6306, 'loss/train': 2.129509687423706} +07/25/2024 11:55:04 - INFO - __main__ - Step 6308: {'lr': 0.0004982620094418343, 'samples': 302784, 'steps': 6307, 'loss/train': 2.0504205226898193} +07/25/2024 11:55:04 - INFO - __main__ - Step 6309: {'lr': 0.0004982613901701749, 'samples': 302832, 'steps': 6308, 'loss/train': 2.1661407947540283} +07/25/2024 11:55:04 - INFO - __main__ - Step 6310: {'lr': 0.0004982607707885923, 'samples': 302880, 'steps': 6309, 'loss/train': 2.1586079597473145} +07/25/2024 11:55:04 - INFO - __main__ - Step 6311: {'lr': 0.0004982601512970869, 'samples': 302928, 'steps': 6310, 'loss/train': 2.2922871112823486} +07/25/2024 11:55:05 - INFO - __main__ - Step 6312: {'lr': 0.0004982595316956589, 'samples': 302976, 'steps': 6311, 'loss/train': 2.06062912940979} +07/25/2024 11:55:05 - INFO - __main__ - Step 6313: {'lr': 0.0004982589119843084, 'samples': 303024, 'steps': 6312, 'loss/train': 3.360185146331787} +07/25/2024 11:55:05 - INFO - __main__ - Step 6314: {'lr': 0.0004982582921630359, 'samples': 303072, 'steps': 6313, 'loss/train': 2.1811459064483643} +07/25/2024 11:55:06 - INFO - __main__ - Step 6315: {'lr': 0.0004982576722318415, 'samples': 303120, 'steps': 6314, 'loss/train': 2.0250132083892822} +07/25/2024 11:55:06 - INFO - __main__ - Step 6316: {'lr': 0.0004982570521907256, 'samples': 303168, 'steps': 6315, 'loss/train': 2.7158937454223633} +07/25/2024 11:55:06 - INFO - __main__ - Step 6317: {'lr': 0.0004982564320396884, 'samples': 303216, 'steps': 6316, 'loss/train': 3.1629815101623535} +07/25/2024 11:55:06 - INFO - __main__ - Step 6318: {'lr': 0.0004982558117787302, 'samples': 303264, 'steps': 6317, 'loss/train': 1.4814473390579224} +07/25/2024 11:55:07 - INFO - __main__ - Step 6319: {'lr': 0.0004982551914078513, 'samples': 303312, 'steps': 6318, 'loss/train': 2.2539496421813965} +07/25/2024 11:55:07 - INFO - __main__ - Step 6320: {'lr': 0.000498254570927052, 'samples': 303360, 'steps': 6319, 'loss/train': 1.0972836017608643} +07/25/2024 11:55:07 - INFO - __main__ - Step 6321: {'lr': 0.0004982539503363326, 'samples': 303408, 'steps': 6320, 'loss/train': 1.8295398950576782} +07/25/2024 11:55:08 - INFO - __main__ - Step 6322: {'lr': 0.0004982533296356931, 'samples': 303456, 'steps': 6321, 'loss/train': 2.5223731994628906} +07/25/2024 11:55:08 - INFO - __main__ - Step 6323: {'lr': 0.0004982527088251341, 'samples': 303504, 'steps': 6322, 'loss/train': 1.7927708625793457} +07/25/2024 11:55:08 - INFO - __main__ - Step 6324: {'lr': 0.0004982520879046558, 'samples': 303552, 'steps': 6323, 'loss/train': 2.1113083362579346} +07/25/2024 11:55:08 - INFO - __main__ - Step 6325: {'lr': 0.0004982514668742583, 'samples': 303600, 'steps': 6324, 'loss/train': 2.146284341812134} +07/25/2024 11:55:09 - INFO - __main__ - Step 6326: {'lr': 0.0004982508457339422, 'samples': 303648, 'steps': 6325, 'loss/train': 2.2507758140563965} +07/25/2024 11:55:09 - INFO - __main__ - Step 6327: {'lr': 0.0004982502244837074, 'samples': 303696, 'steps': 6326, 'loss/train': 4.214048385620117} +07/25/2024 11:55:09 - INFO - __main__ - Step 6328: {'lr': 0.0004982496031235544, 'samples': 303744, 'steps': 6327, 'loss/train': 2.6510508060455322} +07/25/2024 11:55:10 - INFO - __main__ - Step 6329: {'lr': 0.0004982489816534835, 'samples': 303792, 'steps': 6328, 'loss/train': 2.51064133644104} +07/25/2024 11:55:10 - INFO - __main__ - Step 6330: {'lr': 0.0004982483600734949, 'samples': 303840, 'steps': 6329, 'loss/train': 2.0608978271484375} +07/25/2024 11:55:10 - INFO - __main__ - Step 6331: {'lr': 0.0004982477383835888, 'samples': 303888, 'steps': 6330, 'loss/train': 2.431659460067749} +07/25/2024 11:55:10 - INFO - __main__ - Step 6332: {'lr': 0.0004982471165837657, 'samples': 303936, 'steps': 6331, 'loss/train': 2.594461441040039} +07/25/2024 11:55:11 - INFO - __main__ - Step 6333: {'lr': 0.0004982464946740256, 'samples': 303984, 'steps': 6332, 'loss/train': 2.0256295204162598} +07/25/2024 11:55:11 - INFO - __main__ - Step 6334: {'lr': 0.0004982458726543689, 'samples': 304032, 'steps': 6333, 'loss/train': 2.334141731262207} +07/25/2024 11:55:11 - INFO - __main__ - Step 6335: {'lr': 0.0004982452505247959, 'samples': 304080, 'steps': 6334, 'loss/train': 2.8220818042755127} +07/25/2024 11:55:12 - INFO - __main__ - Step 6336: {'lr': 0.0004982446282853069, 'samples': 304128, 'steps': 6335, 'loss/train': 2.3655166625976562} +07/25/2024 11:55:12 - INFO - __main__ - Step 6337: {'lr': 0.0004982440059359022, 'samples': 304176, 'steps': 6336, 'loss/train': 4.283157825469971} +07/25/2024 11:55:12 - INFO - __main__ - Step 6338: {'lr': 0.000498243383476582, 'samples': 304224, 'steps': 6337, 'loss/train': 2.3692502975463867} +07/25/2024 11:55:12 - INFO - __main__ - Step 6339: {'lr': 0.0004982427609073464, 'samples': 304272, 'steps': 6338, 'loss/train': 2.0411038398742676} +07/25/2024 11:55:13 - INFO - __main__ - Step 6340: {'lr': 0.000498242138228196, 'samples': 304320, 'steps': 6339, 'loss/train': 2.7628026008605957} +07/25/2024 11:55:13 - INFO - __main__ - Step 6341: {'lr': 0.0004982415154391309, 'samples': 304368, 'steps': 6340, 'loss/train': 3.843170166015625} +07/25/2024 11:55:13 - INFO - __main__ - Step 6342: {'lr': 0.0004982408925401514, 'samples': 304416, 'steps': 6341, 'loss/train': 1.759987711906433} +07/25/2024 11:55:13 - INFO - __main__ - Step 6343: {'lr': 0.0004982402695312579, 'samples': 304464, 'steps': 6342, 'loss/train': 2.236321210861206} +07/25/2024 11:55:14 - INFO - __main__ - Step 6344: {'lr': 0.0004982396464124504, 'samples': 304512, 'steps': 6343, 'loss/train': 2.0188565254211426} +07/25/2024 11:55:14 - INFO - __main__ - Step 6345: {'lr': 0.0004982390231837295, 'samples': 304560, 'steps': 6344, 'loss/train': 1.8240710496902466} +07/25/2024 11:55:14 - INFO - __main__ - Step 6346: {'lr': 0.0004982383998450952, 'samples': 304608, 'steps': 6345, 'loss/train': 2.8659098148345947} +07/25/2024 11:55:15 - INFO - __main__ - Step 6347: {'lr': 0.000498237776396548, 'samples': 304656, 'steps': 6346, 'loss/train': 2.121605157852173} +07/25/2024 11:55:15 - INFO - __main__ - Step 6348: {'lr': 0.0004982371528380879, 'samples': 304704, 'steps': 6347, 'loss/train': 2.409874677658081} +07/25/2024 11:55:15 - INFO - __main__ - Step 6349: {'lr': 0.0004982365291697155, 'samples': 304752, 'steps': 6348, 'loss/train': 2.582639217376709} +07/25/2024 11:55:15 - INFO - __main__ - Step 6350: {'lr': 0.0004982359053914309, 'samples': 304800, 'steps': 6349, 'loss/train': 2.3408889770507812} +07/25/2024 11:55:16 - INFO - __main__ - Step 6351: {'lr': 0.0004982352815032342, 'samples': 304848, 'steps': 6350, 'loss/train': 4.349552154541016} +07/25/2024 11:55:16 - INFO - __main__ - Step 6352: {'lr': 0.0004982346575051262, 'samples': 304896, 'steps': 6351, 'loss/train': 1.9474705457687378} +07/25/2024 11:55:16 - INFO - __main__ - Step 6353: {'lr': 0.0004982340333971067, 'samples': 304944, 'steps': 6352, 'loss/train': 2.377638816833496} +07/25/2024 11:55:17 - INFO - __main__ - Step 6354: {'lr': 0.0004982334091791761, 'samples': 304992, 'steps': 6353, 'loss/train': 2.2213165760040283} +07/25/2024 11:55:17 - INFO - __main__ - Step 6355: {'lr': 0.0004982327848513346, 'samples': 305040, 'steps': 6354, 'loss/train': 0.8545429110527039} +07/25/2024 11:55:17 - INFO - __main__ - Step 6356: {'lr': 0.0004982321604135827, 'samples': 305088, 'steps': 6355, 'loss/train': 3.2500197887420654} +07/25/2024 11:55:17 - INFO - __main__ - Step 6357: {'lr': 0.0004982315358659206, 'samples': 305136, 'steps': 6356, 'loss/train': 2.3448801040649414} +07/25/2024 11:55:18 - INFO - __main__ - Step 6358: {'lr': 0.0004982309112083484, 'samples': 305184, 'steps': 6357, 'loss/train': 2.546278715133667} +07/25/2024 11:55:18 - INFO - __main__ - Step 6359: {'lr': 0.0004982302864408667, 'samples': 305232, 'steps': 6358, 'loss/train': 2.1971819400787354} +07/25/2024 11:55:18 - INFO - __main__ - Step 6360: {'lr': 0.0004982296615634754, 'samples': 305280, 'steps': 6359, 'loss/train': 1.8138189315795898} +07/25/2024 11:55:19 - INFO - __main__ - Step 6361: {'lr': 0.000498229036576175, 'samples': 305328, 'steps': 6360, 'loss/train': 5.059732913970947} +07/25/2024 11:55:19 - INFO - __main__ - Step 6362: {'lr': 0.0004982284114789658, 'samples': 305376, 'steps': 6361, 'loss/train': 2.4222254753112793} +07/25/2024 11:55:19 - INFO - __main__ - Step 6363: {'lr': 0.0004982277862718479, 'samples': 305424, 'steps': 6362, 'loss/train': 1.7431912422180176} +07/25/2024 11:55:19 - INFO - __main__ - Step 6364: {'lr': 0.0004982271609548218, 'samples': 305472, 'steps': 6363, 'loss/train': 1.7599416971206665} +07/25/2024 11:55:20 - INFO - __main__ - Step 6365: {'lr': 0.0004982265355278877, 'samples': 305520, 'steps': 6364, 'loss/train': 3.120830535888672} +07/25/2024 11:55:20 - INFO - __main__ - Step 6366: {'lr': 0.0004982259099910458, 'samples': 305568, 'steps': 6365, 'loss/train': 1.7197502851486206} +07/25/2024 11:55:20 - INFO - __main__ - Step 6367: {'lr': 0.0004982252843442964, 'samples': 305616, 'steps': 6366, 'loss/train': 2.6607141494750977} +07/25/2024 11:55:21 - INFO - __main__ - Step 6368: {'lr': 0.0004982246585876397, 'samples': 305664, 'steps': 6367, 'loss/train': 2.252760410308838} +07/25/2024 11:55:21 - INFO - __main__ - Step 6369: {'lr': 0.0004982240327210763, 'samples': 305712, 'steps': 6368, 'loss/train': 2.1088430881500244} +07/25/2024 11:55:21 - INFO - __main__ - Step 6370: {'lr': 0.0004982234067446061, 'samples': 305760, 'steps': 6369, 'loss/train': 2.593764543533325} +07/25/2024 11:55:21 - INFO - __main__ - Step 6371: {'lr': 0.0004982227806582296, 'samples': 305808, 'steps': 6370, 'loss/train': 2.321139097213745} +07/25/2024 11:55:22 - INFO - __main__ - Step 6372: {'lr': 0.0004982221544619469, 'samples': 305856, 'steps': 6371, 'loss/train': 2.2059099674224854} +07/25/2024 11:55:22 - INFO - __main__ - Step 6373: {'lr': 0.0004982215281557585, 'samples': 305904, 'steps': 6372, 'loss/train': 3.116600513458252} +07/25/2024 11:55:22 - INFO - __main__ - Step 6374: {'lr': 0.0004982209017396644, 'samples': 305952, 'steps': 6373, 'loss/train': 2.51298189163208} +07/25/2024 11:55:23 - INFO - __main__ - Step 6375: {'lr': 0.0004982202752136653, 'samples': 306000, 'steps': 6374, 'loss/train': 3.7172935009002686} +07/25/2024 11:55:23 - INFO - __main__ - Step 6376: {'lr': 0.000498219648577761, 'samples': 306048, 'steps': 6375, 'loss/train': 2.2976160049438477} +07/25/2024 11:55:23 - INFO - __main__ - Step 6377: {'lr': 0.0004982190218319521, 'samples': 306096, 'steps': 6376, 'loss/train': 2.524993896484375} +07/25/2024 11:55:23 - INFO - __main__ - Step 6378: {'lr': 0.0004982183949762388, 'samples': 306144, 'steps': 6377, 'loss/train': 2.4830610752105713} +07/25/2024 11:55:24 - INFO - __main__ - Step 6379: {'lr': 0.0004982177680106213, 'samples': 306192, 'steps': 6378, 'loss/train': 2.7947452068328857} +07/25/2024 11:55:24 - INFO - __main__ - Step 6380: {'lr': 0.0004982171409350999, 'samples': 306240, 'steps': 6379, 'loss/train': 2.4031612873077393} +07/25/2024 11:55:24 - INFO - __main__ - Step 6381: {'lr': 0.000498216513749675, 'samples': 306288, 'steps': 6380, 'loss/train': 2.6511995792388916} +07/25/2024 11:55:25 - INFO - __main__ - Step 6382: {'lr': 0.0004982158864543467, 'samples': 306336, 'steps': 6381, 'loss/train': 2.713536262512207} +07/25/2024 11:55:25 - INFO - __main__ - Step 6383: {'lr': 0.0004982152590491153, 'samples': 306384, 'steps': 6382, 'loss/train': 2.4868721961975098} +07/25/2024 11:55:25 - INFO - __main__ - Step 6384: {'lr': 0.0004982146315339811, 'samples': 306432, 'steps': 6383, 'loss/train': 1.6072444915771484} +07/25/2024 11:55:25 - INFO - __main__ - Step 6385: {'lr': 0.0004982140039089446, 'samples': 306480, 'steps': 6384, 'loss/train': 2.1839303970336914} +07/25/2024 11:55:26 - INFO - __main__ - Step 6386: {'lr': 0.0004982133761740059, 'samples': 306528, 'steps': 6385, 'loss/train': 2.042424440383911} +07/25/2024 11:55:26 - INFO - __main__ - Step 6387: {'lr': 0.0004982127483291652, 'samples': 306576, 'steps': 6386, 'loss/train': 2.363541603088379} +07/25/2024 11:55:26 - INFO - __main__ - Step 6388: {'lr': 0.0004982121203744227, 'samples': 306624, 'steps': 6387, 'loss/train': 2.174159526824951} +07/25/2024 11:55:27 - INFO - __main__ - Step 6389: {'lr': 0.000498211492309779, 'samples': 306672, 'steps': 6388, 'loss/train': 2.835249185562134} +07/25/2024 11:55:27 - INFO - __main__ - Step 6390: {'lr': 0.0004982108641352342, 'samples': 306720, 'steps': 6389, 'loss/train': 2.3833205699920654} +07/25/2024 11:55:27 - INFO - __main__ - Step 6391: {'lr': 0.0004982102358507886, 'samples': 306768, 'steps': 6390, 'loss/train': 1.8574798107147217} +07/25/2024 11:55:27 - INFO - __main__ - Step 6392: {'lr': 0.0004982096074564424, 'samples': 306816, 'steps': 6391, 'loss/train': 1.8519874811172485} +07/25/2024 11:55:28 - INFO - __main__ - Step 6393: {'lr': 0.000498208978952196, 'samples': 306864, 'steps': 6392, 'loss/train': 2.0193841457366943} +07/25/2024 11:55:28 - INFO - __main__ - Step 6394: {'lr': 0.0004982083503380496, 'samples': 306912, 'steps': 6393, 'loss/train': 2.078826427459717} +07/25/2024 11:55:28 - INFO - __main__ - Step 6395: {'lr': 0.0004982077216140034, 'samples': 306960, 'steps': 6394, 'loss/train': 2.66469407081604} +07/25/2024 11:55:29 - INFO - __main__ - Step 6396: {'lr': 0.0004982070927800579, 'samples': 307008, 'steps': 6395, 'loss/train': 2.209289789199829} +07/25/2024 11:55:29 - INFO - __main__ - Step 6397: {'lr': 0.0004982064638362132, 'samples': 307056, 'steps': 6396, 'loss/train': 1.8765413761138916} +07/25/2024 11:55:29 - INFO - __main__ - Step 6398: {'lr': 0.0004982058347824696, 'samples': 307104, 'steps': 6397, 'loss/train': 1.841876745223999} +07/25/2024 11:55:29 - INFO - __main__ - Step 6399: {'lr': 0.0004982052056188275, 'samples': 307152, 'steps': 6398, 'loss/train': 3.292166233062744} +07/25/2024 11:55:30 - INFO - __main__ - Step 6400: {'lr': 0.0004982045763452871, 'samples': 307200, 'steps': 6399, 'loss/train': 2.486673593521118} +07/25/2024 11:55:30 - INFO - __main__ - Step 6401: {'lr': 0.0004982039469618485, 'samples': 307248, 'steps': 6400, 'loss/train': 1.923736333847046} +07/25/2024 11:55:30 - INFO - __main__ - Step 6402: {'lr': 0.0004982033174685123, 'samples': 307296, 'steps': 6401, 'loss/train': 3.701662540435791} +07/25/2024 11:55:31 - INFO - __main__ - Step 6403: {'lr': 0.0004982026878652785, 'samples': 307344, 'steps': 6402, 'loss/train': 2.5144236087799072} +07/25/2024 11:55:31 - INFO - __main__ - Step 6404: {'lr': 0.0004982020581521476, 'samples': 307392, 'steps': 6403, 'loss/train': 1.5728727579116821} +07/25/2024 11:55:31 - INFO - __main__ - Step 6405: {'lr': 0.0004982014283291198, 'samples': 307440, 'steps': 6404, 'loss/train': 1.8766416311264038} +07/25/2024 11:55:31 - INFO - __main__ - Step 6406: {'lr': 0.0004982007983961954, 'samples': 307488, 'steps': 6405, 'loss/train': 2.5107693672180176} +07/25/2024 11:55:32 - INFO - __main__ - Step 6407: {'lr': 0.0004982001683533745, 'samples': 307536, 'steps': 6406, 'loss/train': 2.2945189476013184} +07/25/2024 11:55:32 - INFO - __main__ - Step 6408: {'lr': 0.0004981995382006576, 'samples': 307584, 'steps': 6407, 'loss/train': 1.869504690170288} +07/25/2024 11:55:32 - INFO - __main__ - Step 6409: {'lr': 0.0004981989079380449, 'samples': 307632, 'steps': 6408, 'loss/train': 2.324235200881958} +07/25/2024 11:55:32 - INFO - __main__ - Step 6410: {'lr': 0.0004981982775655367, 'samples': 307680, 'steps': 6409, 'loss/train': 2.3133647441864014} +07/25/2024 11:55:33 - INFO - __main__ - Step 6411: {'lr': 0.0004981976470831332, 'samples': 307728, 'steps': 6410, 'loss/train': 1.926622748374939} +07/25/2024 11:55:33 - INFO - __main__ - Step 6412: {'lr': 0.0004981970164908347, 'samples': 307776, 'steps': 6411, 'loss/train': 1.7817633152008057} +07/25/2024 11:55:33 - INFO - __main__ - Step 6413: {'lr': 0.0004981963857886416, 'samples': 307824, 'steps': 6412, 'loss/train': 2.504972457885742} +07/25/2024 11:55:34 - INFO - __main__ - Step 6414: {'lr': 0.0004981957549765541, 'samples': 307872, 'steps': 6413, 'loss/train': 2.307506561279297} +07/25/2024 11:55:34 - INFO - __main__ - Step 6415: {'lr': 0.0004981951240545725, 'samples': 307920, 'steps': 6414, 'loss/train': 3.108870029449463} +07/25/2024 11:55:34 - INFO - __main__ - Step 6416: {'lr': 0.000498194493022697, 'samples': 307968, 'steps': 6415, 'loss/train': 2.147724151611328} +07/25/2024 11:55:34 - INFO - __main__ - Step 6417: {'lr': 0.0004981938618809278, 'samples': 308016, 'steps': 6416, 'loss/train': 2.496999502182007} +07/25/2024 11:55:35 - INFO - __main__ - Step 6418: {'lr': 0.0004981932306292654, 'samples': 308064, 'steps': 6417, 'loss/train': 2.1594974994659424} +07/25/2024 11:55:35 - INFO - __main__ - Step 6419: {'lr': 0.0004981925992677101, 'samples': 308112, 'steps': 6418, 'loss/train': 2.439962387084961} +07/25/2024 11:55:35 - INFO - __main__ - Step 6420: {'lr': 0.0004981919677962621, 'samples': 308160, 'steps': 6419, 'loss/train': 2.209320545196533} +07/25/2024 11:55:36 - INFO - __main__ - Step 6421: {'lr': 0.0004981913362149215, 'samples': 308208, 'steps': 6420, 'loss/train': 2.3447396755218506} +07/25/2024 11:55:36 - INFO - __main__ - Step 6422: {'lr': 0.0004981907045236888, 'samples': 308256, 'steps': 6421, 'loss/train': 2.603930950164795} +07/25/2024 11:55:36 - INFO - __main__ - Step 6423: {'lr': 0.0004981900727225642, 'samples': 308304, 'steps': 6422, 'loss/train': 3.259110450744629} +07/25/2024 11:55:36 - INFO - __main__ - Step 6424: {'lr': 0.000498189440811548, 'samples': 308352, 'steps': 6423, 'loss/train': 2.530545949935913} +07/25/2024 11:55:37 - INFO - __main__ - Step 6425: {'lr': 0.0004981888087906405, 'samples': 308400, 'steps': 6424, 'loss/train': 2.722512722015381} +07/25/2024 11:55:37 - INFO - __main__ - Step 6426: {'lr': 0.0004981881766598418, 'samples': 308448, 'steps': 6425, 'loss/train': 2.006312370300293} +07/25/2024 11:55:37 - INFO - __main__ - Step 6427: {'lr': 0.0004981875444191525, 'samples': 308496, 'steps': 6426, 'loss/train': 2.370349168777466} +07/25/2024 11:55:38 - INFO - __main__ - Step 6428: {'lr': 0.0004981869120685726, 'samples': 308544, 'steps': 6427, 'loss/train': 2.1803572177886963} +07/25/2024 11:55:38 - INFO - __main__ - Step 6429: {'lr': 0.0004981862796081026, 'samples': 308592, 'steps': 6428, 'loss/train': 1.8198336362838745} +07/25/2024 11:55:38 - INFO - __main__ - Step 6430: {'lr': 0.0004981856470377426, 'samples': 308640, 'steps': 6429, 'loss/train': 1.8409782648086548} +07/25/2024 11:55:38 - INFO - __main__ - Step 6431: {'lr': 0.000498185014357493, 'samples': 308688, 'steps': 6430, 'loss/train': 2.060370683670044} +07/25/2024 11:55:39 - INFO - __main__ - Step 6432: {'lr': 0.0004981843815673539, 'samples': 308736, 'steps': 6431, 'loss/train': 1.573608636856079} +07/25/2024 11:55:39 - INFO - __main__ - Step 6433: {'lr': 0.0004981837486673259, 'samples': 308784, 'steps': 6432, 'loss/train': 2.3086001873016357} +07/25/2024 11:55:39 - INFO - __main__ - Step 6434: {'lr': 0.0004981831156574089, 'samples': 308832, 'steps': 6433, 'loss/train': 2.1585209369659424} +07/25/2024 11:55:40 - INFO - __main__ - Step 6435: {'lr': 0.0004981824825376034, 'samples': 308880, 'steps': 6434, 'loss/train': 1.6289775371551514} +07/25/2024 11:55:40 - INFO - __main__ - Step 6436: {'lr': 0.0004981818493079098, 'samples': 308928, 'steps': 6435, 'loss/train': 2.1418724060058594} +07/25/2024 11:55:40 - INFO - __main__ - Step 6437: {'lr': 0.0004981812159683281, 'samples': 308976, 'steps': 6436, 'loss/train': 2.3384313583374023} +07/25/2024 11:55:40 - INFO - __main__ - Step 6438: {'lr': 0.0004981805825188588, 'samples': 309024, 'steps': 6437, 'loss/train': 2.050039529800415} +07/25/2024 11:55:41 - INFO - __main__ - Step 6439: {'lr': 0.000498179948959502, 'samples': 309072, 'steps': 6438, 'loss/train': 2.0259509086608887} +07/25/2024 11:55:41 - INFO - __main__ - Step 6440: {'lr': 0.0004981793152902581, 'samples': 309120, 'steps': 6439, 'loss/train': 2.1789464950561523} +07/25/2024 11:55:41 - INFO - __main__ - Step 6441: {'lr': 0.0004981786815111273, 'samples': 309168, 'steps': 6440, 'loss/train': 2.182832717895508} +07/25/2024 11:55:42 - INFO - __main__ - Step 6442: {'lr': 0.0004981780476221101, 'samples': 309216, 'steps': 6441, 'loss/train': 2.2246387004852295} +07/25/2024 11:55:42 - INFO - __main__ - Step 6443: {'lr': 0.0004981774136232065, 'samples': 309264, 'steps': 6442, 'loss/train': 1.8937305212020874} +07/25/2024 11:55:42 - INFO - __main__ - Step 6444: {'lr': 0.0004981767795144169, 'samples': 309312, 'steps': 6443, 'loss/train': 2.4603216648101807} +07/25/2024 11:55:42 - INFO - __main__ - Step 6445: {'lr': 0.0004981761452957416, 'samples': 309360, 'steps': 6444, 'loss/train': 1.829883098602295} +07/25/2024 11:55:43 - INFO - __main__ - Step 6446: {'lr': 0.0004981755109671808, 'samples': 309408, 'steps': 6445, 'loss/train': 1.7485222816467285} +07/25/2024 11:55:43 - INFO - __main__ - Step 6447: {'lr': 0.0004981748765287349, 'samples': 309456, 'steps': 6446, 'loss/train': 3.146191358566284} +07/25/2024 11:55:43 - INFO - __main__ - Step 6448: {'lr': 0.000498174241980404, 'samples': 309504, 'steps': 6447, 'loss/train': 1.3451216220855713} +07/25/2024 11:55:44 - INFO - __main__ - Step 6449: {'lr': 0.0004981736073221887, 'samples': 309552, 'steps': 6448, 'loss/train': 2.3644371032714844} +07/25/2024 11:55:44 - INFO - __main__ - Step 6450: {'lr': 0.000498172972554089, 'samples': 309600, 'steps': 6449, 'loss/train': 2.121328353881836} +07/25/2024 11:55:44 - INFO - __main__ - Step 6451: {'lr': 0.0004981723376761052, 'samples': 309648, 'steps': 6450, 'loss/train': 2.2049999237060547} +07/25/2024 11:55:44 - INFO - __main__ - Step 6452: {'lr': 0.0004981717026882377, 'samples': 309696, 'steps': 6451, 'loss/train': 1.942948579788208} +07/25/2024 11:55:45 - INFO - __main__ - Step 6453: {'lr': 0.0004981710675904866, 'samples': 309744, 'steps': 6452, 'loss/train': 2.1419825553894043} +07/25/2024 11:55:45 - INFO - __main__ - Step 6454: {'lr': 0.0004981704323828525, 'samples': 309792, 'steps': 6453, 'loss/train': 1.9184064865112305} +07/25/2024 11:55:45 - INFO - __main__ - Step 6455: {'lr': 0.0004981697970653354, 'samples': 309840, 'steps': 6454, 'loss/train': 1.3415240049362183} +07/25/2024 11:55:46 - INFO - __main__ - Step 6456: {'lr': 0.0004981691616379357, 'samples': 309888, 'steps': 6455, 'loss/train': 0.7091363668441772} +07/25/2024 11:55:46 - INFO - __main__ - Step 6457: {'lr': 0.0004981685261006535, 'samples': 309936, 'steps': 6456, 'loss/train': 2.4941024780273438} +07/25/2024 11:55:46 - INFO - __main__ - Step 6458: {'lr': 0.0004981678904534895, 'samples': 309984, 'steps': 6457, 'loss/train': 2.3184714317321777} +07/25/2024 11:55:46 - INFO - __main__ - Step 6459: {'lr': 0.0004981672546964435, 'samples': 310032, 'steps': 6458, 'loss/train': 1.9795933961868286} +07/25/2024 11:55:47 - INFO - __main__ - Step 6460: {'lr': 0.000498166618829516, 'samples': 310080, 'steps': 6459, 'loss/train': 0.6801921725273132} +07/25/2024 11:55:47 - INFO - __main__ - Step 6461: {'lr': 0.0004981659828527074, 'samples': 310128, 'steps': 6460, 'loss/train': 2.1729230880737305} +07/25/2024 11:55:47 - INFO - __main__ - Step 6462: {'lr': 0.0004981653467660178, 'samples': 310176, 'steps': 6461, 'loss/train': 1.9606618881225586} +07/25/2024 11:55:48 - INFO - __main__ - Step 6463: {'lr': 0.0004981647105694474, 'samples': 310224, 'steps': 6462, 'loss/train': 2.338533639907837} +07/25/2024 11:55:48 - INFO - __main__ - Step 6464: {'lr': 0.0004981640742629968, 'samples': 310272, 'steps': 6463, 'loss/train': 1.9817049503326416} +07/25/2024 11:55:48 - INFO - __main__ - Step 6465: {'lr': 0.0004981634378466661, 'samples': 310320, 'steps': 6464, 'loss/train': 2.2880120277404785} +07/25/2024 11:55:48 - INFO - __main__ - Step 6466: {'lr': 0.0004981628013204554, 'samples': 310368, 'steps': 6465, 'loss/train': 2.2725939750671387} +07/25/2024 11:55:49 - INFO - __main__ - Step 6467: {'lr': 0.0004981621646843653, 'samples': 310416, 'steps': 6466, 'loss/train': 1.9003815650939941} +07/25/2024 11:55:49 - INFO - __main__ - Step 6468: {'lr': 0.000498161527938396, 'samples': 310464, 'steps': 6467, 'loss/train': 1.9878343343734741} +07/25/2024 11:55:49 - INFO - __main__ - Step 6469: {'lr': 0.0004981608910825477, 'samples': 310512, 'steps': 6468, 'loss/train': 2.394500970840454} +07/25/2024 11:55:50 - INFO - __main__ - Step 6470: {'lr': 0.0004981602541168207, 'samples': 310560, 'steps': 6469, 'loss/train': 2.0276103019714355} +07/25/2024 11:55:50 - INFO - __main__ - Step 6471: {'lr': 0.0004981596170412152, 'samples': 310608, 'steps': 6470, 'loss/train': 2.9256269931793213} +07/25/2024 11:55:50 - INFO - __main__ - Step 6472: {'lr': 0.0004981589798557316, 'samples': 310656, 'steps': 6471, 'loss/train': 2.472414016723633} +07/25/2024 11:55:50 - INFO - __main__ - Step 6473: {'lr': 0.0004981583425603702, 'samples': 310704, 'steps': 6472, 'loss/train': 2.507472038269043} +07/25/2024 11:55:51 - INFO - __main__ - Step 6474: {'lr': 0.0004981577051551313, 'samples': 310752, 'steps': 6473, 'loss/train': 2.707504987716675} +07/25/2024 11:55:51 - INFO - __main__ - Step 6475: {'lr': 0.0004981570676400151, 'samples': 310800, 'steps': 6474, 'loss/train': 2.4843225479125977} +07/25/2024 11:55:51 - INFO - __main__ - Step 6476: {'lr': 0.0004981564300150218, 'samples': 310848, 'steps': 6475, 'loss/train': 2.3576273918151855} +07/25/2024 11:55:52 - INFO - __main__ - Step 6477: {'lr': 0.0004981557922801519, 'samples': 310896, 'steps': 6476, 'loss/train': 1.8046480417251587} +07/25/2024 11:55:52 - INFO - __main__ - Step 6478: {'lr': 0.0004981551544354055, 'samples': 310944, 'steps': 6477, 'loss/train': 2.090934991836548} +07/25/2024 11:55:52 - INFO - __main__ - Step 6479: {'lr': 0.000498154516480783, 'samples': 310992, 'steps': 6478, 'loss/train': 2.62919545173645} +07/25/2024 11:55:52 - INFO - __main__ - Step 6480: {'lr': 0.0004981538784162845, 'samples': 311040, 'steps': 6479, 'loss/train': 1.852919340133667} +07/25/2024 11:55:53 - INFO - __main__ - Step 6481: {'lr': 0.0004981532402419106, 'samples': 311088, 'steps': 6480, 'loss/train': 0.914466381072998} +07/25/2024 11:55:53 - INFO - __main__ - Step 6482: {'lr': 0.0004981526019576612, 'samples': 311136, 'steps': 6481, 'loss/train': 1.871437430381775} +07/25/2024 11:55:53 - INFO - __main__ - Step 6483: {'lr': 0.000498151963563537, 'samples': 311184, 'steps': 6482, 'loss/train': 1.9952055215835571} +07/25/2024 11:55:54 - INFO - __main__ - Step 6484: {'lr': 0.0004981513250595378, 'samples': 311232, 'steps': 6483, 'loss/train': 1.6996641159057617} +07/25/2024 11:55:54 - INFO - __main__ - Step 6485: {'lr': 0.0004981506864456645, 'samples': 311280, 'steps': 6484, 'loss/train': 2.2059497833251953} +07/25/2024 11:55:54 - INFO - __main__ - Step 6486: {'lr': 0.0004981500477219167, 'samples': 311328, 'steps': 6485, 'loss/train': 1.7685257196426392} +07/25/2024 11:55:54 - INFO - __main__ - Step 6487: {'lr': 0.0004981494088882952, 'samples': 311376, 'steps': 6486, 'loss/train': 2.351592779159546} +07/25/2024 11:55:55 - INFO - __main__ - Step 6488: {'lr': 0.0004981487699448, 'samples': 311424, 'steps': 6487, 'loss/train': 2.4500715732574463} +07/25/2024 11:55:55 - INFO - __main__ - Step 6489: {'lr': 0.0004981481308914315, 'samples': 311472, 'steps': 6488, 'loss/train': 2.2637481689453125} +07/25/2024 11:55:55 - INFO - __main__ - Step 6490: {'lr': 0.0004981474917281901, 'samples': 311520, 'steps': 6489, 'loss/train': 2.2099037170410156} +07/25/2024 11:55:55 - INFO - __main__ - Step 6491: {'lr': 0.0004981468524550757, 'samples': 311568, 'steps': 6490, 'loss/train': 2.487560272216797} +07/25/2024 11:55:56 - INFO - __main__ - Step 6492: {'lr': 0.0004981462130720889, 'samples': 311616, 'steps': 6491, 'loss/train': 2.1830029487609863} +07/25/2024 11:55:56 - INFO - __main__ - Step 6493: {'lr': 0.0004981455735792301, 'samples': 311664, 'steps': 6492, 'loss/train': 1.889325499534607} +07/25/2024 11:55:56 - INFO - __main__ - Step 6494: {'lr': 0.0004981449339764992, 'samples': 311712, 'steps': 6493, 'loss/train': 0.7712419033050537} +07/25/2024 11:55:57 - INFO - __main__ - Step 6495: {'lr': 0.0004981442942638967, 'samples': 311760, 'steps': 6494, 'loss/train': 3.015740394592285} +07/25/2024 11:55:57 - INFO - __main__ - Step 6496: {'lr': 0.000498143654441423, 'samples': 311808, 'steps': 6495, 'loss/train': 2.0133631229400635} +07/25/2024 11:55:57 - INFO - __main__ - Step 6497: {'lr': 0.000498143014509078, 'samples': 311856, 'steps': 6496, 'loss/train': 2.5837900638580322} +07/25/2024 11:55:57 - INFO - __main__ - Step 6498: {'lr': 0.0004981423744668624, 'samples': 311904, 'steps': 6497, 'loss/train': 2.2523152828216553} +07/25/2024 11:55:58 - INFO - __main__ - Step 6499: {'lr': 0.0004981417343147763, 'samples': 311952, 'steps': 6498, 'loss/train': 2.0236740112304688} +07/25/2024 11:55:58 - INFO - __main__ - Step 6500: {'lr': 0.0004981410940528199, 'samples': 312000, 'steps': 6499, 'loss/train': 2.457000970840454} +07/25/2024 11:55:58 - INFO - __main__ - Step 6501: {'lr': 0.0004981404536809937, 'samples': 312048, 'steps': 6500, 'loss/train': 1.2404755353927612} +07/25/2024 11:55:59 - INFO - __main__ - Step 6502: {'lr': 0.0004981398131992978, 'samples': 312096, 'steps': 6501, 'loss/train': 2.7669687271118164} +07/25/2024 11:55:59 - INFO - __main__ - Step 6503: {'lr': 0.0004981391726077326, 'samples': 312144, 'steps': 6502, 'loss/train': 2.6684210300445557} +07/25/2024 11:55:59 - INFO - __main__ - Step 6504: {'lr': 0.0004981385319062982, 'samples': 312192, 'steps': 6503, 'loss/train': 1.8397809267044067} +07/25/2024 11:55:59 - INFO - __main__ - Step 6505: {'lr': 0.000498137891094995, 'samples': 312240, 'steps': 6504, 'loss/train': 0.7032684087753296} +07/25/2024 11:56:00 - INFO - __main__ - Step 6506: {'lr': 0.0004981372501738234, 'samples': 312288, 'steps': 6505, 'loss/train': 2.0170834064483643} +07/25/2024 11:56:00 - INFO - __main__ - Step 6507: {'lr': 0.0004981366091427836, 'samples': 312336, 'steps': 6506, 'loss/train': 2.1373419761657715} +07/25/2024 11:56:00 - INFO - __main__ - Step 6508: {'lr': 0.0004981359680018757, 'samples': 312384, 'steps': 6507, 'loss/train': 2.5136897563934326} +07/25/2024 11:56:01 - INFO - __main__ - Step 6509: {'lr': 0.0004981353267511004, 'samples': 312432, 'steps': 6508, 'loss/train': 2.2271227836608887} +07/25/2024 11:56:01 - INFO - __main__ - Step 6510: {'lr': 0.0004981346853904576, 'samples': 312480, 'steps': 6509, 'loss/train': 1.7043300867080688} +07/25/2024 11:56:01 - INFO - __main__ - Step 6511: {'lr': 0.0004981340439199477, 'samples': 312528, 'steps': 6510, 'loss/train': 2.0404937267303467} +07/25/2024 11:56:01 - INFO - __main__ - Step 6512: {'lr': 0.0004981334023395709, 'samples': 312576, 'steps': 6511, 'loss/train': 2.42657732963562} +07/25/2024 11:56:02 - INFO - __main__ - Step 6513: {'lr': 0.0004981327606493278, 'samples': 312624, 'steps': 6512, 'loss/train': 2.3158068656921387} +07/25/2024 11:56:02 - INFO - __main__ - Step 6514: {'lr': 0.0004981321188492183, 'samples': 312672, 'steps': 6513, 'loss/train': 2.528531551361084} +07/25/2024 11:56:02 - INFO - __main__ - Step 6515: {'lr': 0.0004981314769392428, 'samples': 312720, 'steps': 6514, 'loss/train': 2.6855080127716064} +07/25/2024 11:56:03 - INFO - __main__ - Step 6516: {'lr': 0.0004981308349194018, 'samples': 312768, 'steps': 6515, 'loss/train': 1.7434241771697998} +07/25/2024 11:56:03 - INFO - __main__ - Step 6517: {'lr': 0.0004981301927896954, 'samples': 312816, 'steps': 6516, 'loss/train': 2.412780523300171} +07/25/2024 11:56:03 - INFO - __main__ - Step 6518: {'lr': 0.0004981295505501239, 'samples': 312864, 'steps': 6517, 'loss/train': 2.2455894947052} +07/25/2024 11:56:03 - INFO - __main__ - Step 6519: {'lr': 0.0004981289082006874, 'samples': 312912, 'steps': 6518, 'loss/train': 2.920546293258667} +07/25/2024 11:56:04 - INFO - __main__ - Step 6520: {'lr': 0.0004981282657413866, 'samples': 312960, 'steps': 6519, 'loss/train': 2.2103562355041504} +07/25/2024 11:56:04 - INFO - __main__ - Step 6521: {'lr': 0.0004981276231722215, 'samples': 313008, 'steps': 6520, 'loss/train': 2.543886184692383} +07/25/2024 11:56:04 - INFO - __main__ - Step 6522: {'lr': 0.0004981269804931924, 'samples': 313056, 'steps': 6521, 'loss/train': 2.378412961959839} +07/25/2024 11:56:05 - INFO - __main__ - Step 6523: {'lr': 0.0004981263377042996, 'samples': 313104, 'steps': 6522, 'loss/train': 1.8567814826965332} +07/25/2024 11:56:05 - INFO - __main__ - Step 6524: {'lr': 0.0004981256948055435, 'samples': 313152, 'steps': 6523, 'loss/train': 1.8970121145248413} +07/25/2024 11:56:05 - INFO - __main__ - Step 6525: {'lr': 0.0004981250517969243, 'samples': 313200, 'steps': 6524, 'loss/train': 2.459108591079712} +07/25/2024 11:56:05 - INFO - __main__ - Step 6526: {'lr': 0.0004981244086784421, 'samples': 313248, 'steps': 6525, 'loss/train': 2.0730085372924805} +07/25/2024 11:56:06 - INFO - __main__ - Step 6527: {'lr': 0.0004981237654500976, 'samples': 313296, 'steps': 6526, 'loss/train': 2.512702226638794} +07/25/2024 11:56:06 - INFO - __main__ - Step 6528: {'lr': 0.0004981231221118906, 'samples': 313344, 'steps': 6527, 'loss/train': 2.4722392559051514} +07/25/2024 11:56:06 - INFO - __main__ - Step 6529: {'lr': 0.0004981224786638218, 'samples': 313392, 'steps': 6528, 'loss/train': 1.5404926538467407} +07/25/2024 11:56:07 - INFO - __main__ - Step 6530: {'lr': 0.0004981218351058914, 'samples': 313440, 'steps': 6529, 'loss/train': 2.201627731323242} +07/25/2024 11:56:07 - INFO - __main__ - Step 6531: {'lr': 0.0004981211914380994, 'samples': 313488, 'steps': 6530, 'loss/train': 2.2329020500183105} +07/25/2024 11:56:07 - INFO - __main__ - Step 6532: {'lr': 0.0004981205476604463, 'samples': 313536, 'steps': 6531, 'loss/train': 1.0570042133331299} +07/25/2024 11:56:07 - INFO - __main__ - Step 6533: {'lr': 0.0004981199037729326, 'samples': 313584, 'steps': 6532, 'loss/train': 1.501904010772705} +07/25/2024 11:56:08 - INFO - __main__ - Step 6534: {'lr': 0.0004981192597755582, 'samples': 313632, 'steps': 6533, 'loss/train': 1.9013301134109497} +07/25/2024 11:56:08 - INFO - __main__ - Step 6535: {'lr': 0.0004981186156683235, 'samples': 313680, 'steps': 6534, 'loss/train': 1.6927398443222046} +07/25/2024 11:56:08 - INFO - __main__ - Step 6536: {'lr': 0.0004981179714512289, 'samples': 313728, 'steps': 6535, 'loss/train': 2.2141873836517334} +07/25/2024 11:56:09 - INFO - __main__ - Step 6537: {'lr': 0.0004981173271242746, 'samples': 313776, 'steps': 6536, 'loss/train': 1.9028794765472412} +07/25/2024 11:56:09 - INFO - __main__ - Step 6538: {'lr': 0.0004981166826874609, 'samples': 313824, 'steps': 6537, 'loss/train': 2.0820443630218506} +07/25/2024 11:56:09 - INFO - __main__ - Step 6539: {'lr': 0.0004981160381407881, 'samples': 313872, 'steps': 6538, 'loss/train': 2.430056571960449} +07/25/2024 11:56:09 - INFO - __main__ - Step 6540: {'lr': 0.0004981153934842564, 'samples': 313920, 'steps': 6539, 'loss/train': 1.9350401163101196} +07/25/2024 11:56:10 - INFO - __main__ - Step 6541: {'lr': 0.0004981147487178663, 'samples': 313968, 'steps': 6540, 'loss/train': 2.312253952026367} +07/25/2024 11:56:10 - INFO - __main__ - Step 6542: {'lr': 0.0004981141038416178, 'samples': 314016, 'steps': 6541, 'loss/train': 2.708615779876709} +07/25/2024 11:56:10 - INFO - __main__ - Step 6543: {'lr': 0.0004981134588555114, 'samples': 314064, 'steps': 6542, 'loss/train': 2.95539927482605} +07/25/2024 11:56:11 - INFO - __main__ - Step 6544: {'lr': 0.0004981128137595473, 'samples': 314112, 'steps': 6543, 'loss/train': 2.1440978050231934} +07/25/2024 11:56:11 - INFO - __main__ - Step 6545: {'lr': 0.0004981121685537258, 'samples': 314160, 'steps': 6544, 'loss/train': 1.860526204109192} +07/25/2024 11:56:11 - INFO - __main__ - Step 6546: {'lr': 0.0004981115232380472, 'samples': 314208, 'steps': 6545, 'loss/train': 2.377213954925537} +07/25/2024 11:56:11 - INFO - __main__ - Step 6547: {'lr': 0.0004981108778125117, 'samples': 314256, 'steps': 6546, 'loss/train': 1.817160725593567} +07/25/2024 11:56:12 - INFO - __main__ - Step 6548: {'lr': 0.0004981102322771197, 'samples': 314304, 'steps': 6547, 'loss/train': 2.093641757965088} +07/25/2024 11:56:12 - INFO - __main__ - Step 6549: {'lr': 0.0004981095866318715, 'samples': 314352, 'steps': 6548, 'loss/train': 2.3798913955688477} +07/25/2024 11:56:12 - INFO - __main__ - Step 6550: {'lr': 0.0004981089408767672, 'samples': 314400, 'steps': 6549, 'loss/train': 2.3663501739501953} +07/25/2024 11:56:13 - INFO - __main__ - Step 6551: {'lr': 0.0004981082950118072, 'samples': 314448, 'steps': 6550, 'loss/train': 2.537241220474243} +07/25/2024 11:56:13 - INFO - __main__ - Step 6552: {'lr': 0.0004981076490369919, 'samples': 314496, 'steps': 6551, 'loss/train': 2.293058156967163} +07/25/2024 11:56:13 - INFO - __main__ - Step 6553: {'lr': 0.0004981070029523215, 'samples': 314544, 'steps': 6552, 'loss/train': 2.436269760131836} +07/25/2024 11:56:13 - INFO - __main__ - Step 6554: {'lr': 0.0004981063567577963, 'samples': 314592, 'steps': 6553, 'loss/train': 1.278603434562683} +07/25/2024 11:56:14 - INFO - __main__ - Step 6555: {'lr': 0.0004981057104534165, 'samples': 314640, 'steps': 6554, 'loss/train': 2.0569632053375244} +07/25/2024 11:56:14 - INFO - __main__ - Step 6556: {'lr': 0.0004981050640391824, 'samples': 314688, 'steps': 6555, 'loss/train': 1.74604332447052} +07/25/2024 11:56:14 - INFO - __main__ - Step 6557: {'lr': 0.0004981044175150943, 'samples': 314736, 'steps': 6556, 'loss/train': 1.6022039651870728} +07/25/2024 11:56:15 - INFO - __main__ - Step 6558: {'lr': 0.0004981037708811526, 'samples': 314784, 'steps': 6557, 'loss/train': 2.121415853500366} +07/25/2024 11:56:15 - INFO - __main__ - Step 6559: {'lr': 0.0004981031241373575, 'samples': 314832, 'steps': 6558, 'loss/train': 2.3782315254211426} +07/25/2024 11:56:15 - INFO - __main__ - Step 6560: {'lr': 0.0004981024772837092, 'samples': 314880, 'steps': 6559, 'loss/train': 2.179485321044922} +07/25/2024 11:56:15 - INFO - __main__ - Step 6561: {'lr': 0.0004981018303202082, 'samples': 314928, 'steps': 6560, 'loss/train': 2.101386070251465} +07/25/2024 11:56:16 - INFO - __main__ - Step 6562: {'lr': 0.0004981011832468546, 'samples': 314976, 'steps': 6561, 'loss/train': 1.6890997886657715} +07/25/2024 11:56:16 - INFO - __main__ - Step 6563: {'lr': 0.0004981005360636488, 'samples': 315024, 'steps': 6562, 'loss/train': 1.7044962644577026} +07/25/2024 11:56:16 - INFO - __main__ - Step 6564: {'lr': 0.000498099888770591, 'samples': 315072, 'steps': 6563, 'loss/train': 1.9000555276870728} +07/25/2024 11:56:17 - INFO - __main__ - Step 6565: {'lr': 0.0004980992413676815, 'samples': 315120, 'steps': 6564, 'loss/train': 2.622692346572876} +07/25/2024 11:56:17 - INFO - __main__ - Step 6566: {'lr': 0.0004980985938549206, 'samples': 315168, 'steps': 6565, 'loss/train': 2.416208505630493} +07/25/2024 11:56:17 - INFO - __main__ - Step 6567: {'lr': 0.0004980979462323086, 'samples': 315216, 'steps': 6566, 'loss/train': 2.9522624015808105} +07/25/2024 11:56:17 - INFO - __main__ - Step 6568: {'lr': 0.0004980972984998458, 'samples': 315264, 'steps': 6567, 'loss/train': 2.213850975036621} +07/25/2024 11:56:18 - INFO - __main__ - Step 6569: {'lr': 0.0004980966506575325, 'samples': 315312, 'steps': 6568, 'loss/train': 2.132258176803589} +07/25/2024 11:56:18 - INFO - __main__ - Step 6570: {'lr': 0.0004980960027053689, 'samples': 315360, 'steps': 6569, 'loss/train': 2.43533992767334} +07/25/2024 11:56:18 - INFO - __main__ - Step 6571: {'lr': 0.0004980953546433553, 'samples': 315408, 'steps': 6570, 'loss/train': 2.212949275970459} +07/25/2024 11:56:18 - INFO - __main__ - Step 6572: {'lr': 0.0004980947064714921, 'samples': 315456, 'steps': 6571, 'loss/train': 2.3108890056610107} +07/25/2024 11:56:19 - INFO - __main__ - Step 6573: {'lr': 0.0004980940581897795, 'samples': 315504, 'steps': 6572, 'loss/train': 2.4417622089385986} +07/25/2024 11:56:19 - INFO - __main__ - Step 6574: {'lr': 0.0004980934097982178, 'samples': 315552, 'steps': 6573, 'loss/train': 1.8943947553634644} +07/25/2024 11:56:19 - INFO - __main__ - Step 6575: {'lr': 0.0004980927612968073, 'samples': 315600, 'steps': 6574, 'loss/train': 1.7979190349578857} +07/25/2024 11:56:20 - INFO - __main__ - Step 6576: {'lr': 0.0004980921126855483, 'samples': 315648, 'steps': 6575, 'loss/train': 2.1400203704833984} +07/25/2024 11:56:20 - INFO - __main__ - Step 6577: {'lr': 0.0004980914639644411, 'samples': 315696, 'steps': 6576, 'loss/train': 2.013770341873169} +07/25/2024 11:56:20 - INFO - __main__ - Step 6578: {'lr': 0.0004980908151334859, 'samples': 315744, 'steps': 6577, 'loss/train': 1.4776157140731812} +07/25/2024 11:56:20 - INFO - __main__ - Step 6579: {'lr': 0.000498090166192683, 'samples': 315792, 'steps': 6578, 'loss/train': 2.0632293224334717} +07/25/2024 11:56:21 - INFO - __main__ - Step 6580: {'lr': 0.0004980895171420327, 'samples': 315840, 'steps': 6579, 'loss/train': 2.0061497688293457} +07/25/2024 11:56:21 - INFO - __main__ - Step 6581: {'lr': 0.0004980888679815353, 'samples': 315888, 'steps': 6580, 'loss/train': 1.3675986528396606} +07/25/2024 11:56:21 - INFO - __main__ - Step 6582: {'lr': 0.0004980882187111912, 'samples': 315936, 'steps': 6581, 'loss/train': 2.334306001663208} +07/25/2024 11:56:22 - INFO - __main__ - Step 6583: {'lr': 0.0004980875693310006, 'samples': 315984, 'steps': 6582, 'loss/train': 2.460282564163208} +07/25/2024 11:56:22 - INFO - __main__ - Step 6584: {'lr': 0.0004980869198409636, 'samples': 316032, 'steps': 6583, 'loss/train': 1.721280813217163} +07/25/2024 11:56:22 - INFO - __main__ - Step 6585: {'lr': 0.0004980862702410809, 'samples': 316080, 'steps': 6584, 'loss/train': 2.3010365962982178} +07/25/2024 11:56:22 - INFO - __main__ - Step 6586: {'lr': 0.0004980856205313524, 'samples': 316128, 'steps': 6585, 'loss/train': 1.6584155559539795} +07/25/2024 11:56:23 - INFO - __main__ - Step 6587: {'lr': 0.0004980849707117786, 'samples': 316176, 'steps': 6586, 'loss/train': 2.5151960849761963} +07/25/2024 11:56:23 - INFO - __main__ - Step 6588: {'lr': 0.0004980843207823596, 'samples': 316224, 'steps': 6587, 'loss/train': 2.7458114624023438} +07/25/2024 11:56:23 - INFO - __main__ - Step 6589: {'lr': 0.000498083670743096, 'samples': 316272, 'steps': 6588, 'loss/train': 1.8187626600265503} +07/25/2024 11:56:24 - INFO - __main__ - Step 6590: {'lr': 0.0004980830205939879, 'samples': 316320, 'steps': 6589, 'loss/train': 2.356245756149292} +07/25/2024 11:56:24 - INFO - __main__ - Step 6591: {'lr': 0.0004980823703350355, 'samples': 316368, 'steps': 6590, 'loss/train': 2.609393358230591} +07/25/2024 11:56:24 - INFO - __main__ - Step 6592: {'lr': 0.0004980817199662391, 'samples': 316416, 'steps': 6591, 'loss/train': 1.5959765911102295} +07/25/2024 11:56:24 - INFO - __main__ - Step 6593: {'lr': 0.0004980810694875992, 'samples': 316464, 'steps': 6592, 'loss/train': 1.55056631565094} +07/25/2024 11:56:25 - INFO - __main__ - Step 6594: {'lr': 0.0004980804188991159, 'samples': 316512, 'steps': 6593, 'loss/train': 2.5362699031829834} +07/25/2024 11:56:25 - INFO - __main__ - Step 6595: {'lr': 0.0004980797682007896, 'samples': 316560, 'steps': 6594, 'loss/train': 2.0824949741363525} +07/25/2024 11:56:25 - INFO - __main__ - Step 6596: {'lr': 0.0004980791173926205, 'samples': 316608, 'steps': 6595, 'loss/train': 2.0075902938842773} +07/25/2024 11:56:26 - INFO - __main__ - Step 6597: {'lr': 0.0004980784664746088, 'samples': 316656, 'steps': 6596, 'loss/train': 2.5341708660125732} +07/25/2024 11:56:26 - INFO - __main__ - Step 6598: {'lr': 0.000498077815446755, 'samples': 316704, 'steps': 6597, 'loss/train': 2.4993529319763184} +07/25/2024 11:56:26 - INFO - __main__ - Step 6599: {'lr': 0.0004980771643090593, 'samples': 316752, 'steps': 6598, 'loss/train': 1.8452980518341064} +07/25/2024 11:56:26 - INFO - __main__ - Step 6600: {'lr': 0.000498076513061522, 'samples': 316800, 'steps': 6599, 'loss/train': 2.5400991439819336} +07/25/2024 11:56:27 - INFO - __main__ - Step 6601: {'lr': 0.0004980758617041433, 'samples': 316848, 'steps': 6600, 'loss/train': 2.186279773712158} +07/25/2024 11:56:27 - INFO - __main__ - Step 6602: {'lr': 0.0004980752102369236, 'samples': 316896, 'steps': 6601, 'loss/train': 2.4885404109954834} +07/25/2024 11:56:27 - INFO - __main__ - Step 6603: {'lr': 0.0004980745586598632, 'samples': 316944, 'steps': 6602, 'loss/train': 2.0732903480529785} +07/25/2024 11:56:28 - INFO - __main__ - Step 6604: {'lr': 0.0004980739069729623, 'samples': 316992, 'steps': 6603, 'loss/train': 2.9118974208831787} +07/25/2024 11:56:28 - INFO - __main__ - Step 6605: {'lr': 0.0004980732551762212, 'samples': 317040, 'steps': 6604, 'loss/train': 1.7537651062011719} +07/25/2024 11:56:28 - INFO - __main__ - Step 6606: {'lr': 0.0004980726032696403, 'samples': 317088, 'steps': 6605, 'loss/train': 2.182089328765869} +07/25/2024 11:56:28 - INFO - __main__ - Step 6607: {'lr': 0.0004980719512532197, 'samples': 317136, 'steps': 6606, 'loss/train': 2.2892799377441406} +07/25/2024 11:56:29 - INFO - __main__ - Step 6608: {'lr': 0.0004980712991269599, 'samples': 317184, 'steps': 6607, 'loss/train': 1.6338157653808594} +07/25/2024 11:56:29 - INFO - __main__ - Step 6609: {'lr': 0.000498070646890861, 'samples': 317232, 'steps': 6608, 'loss/train': 2.3479063510894775} +07/25/2024 11:56:29 - INFO - __main__ - Step 6610: {'lr': 0.0004980699945449233, 'samples': 317280, 'steps': 6609, 'loss/train': 1.6348068714141846} +07/25/2024 11:56:30 - INFO - __main__ - Step 6611: {'lr': 0.0004980693420891472, 'samples': 317328, 'steps': 6610, 'loss/train': 2.1549761295318604} +07/25/2024 11:56:30 - INFO - __main__ - Step 6612: {'lr': 0.000498068689523533, 'samples': 317376, 'steps': 6611, 'loss/train': 1.881298542022705} +07/25/2024 11:56:30 - INFO - __main__ - Step 6613: {'lr': 0.000498068036848081, 'samples': 317424, 'steps': 6612, 'loss/train': 2.1545777320861816} +07/25/2024 11:56:30 - INFO - __main__ - Step 6614: {'lr': 0.0004980673840627913, 'samples': 317472, 'steps': 6613, 'loss/train': 2.4312174320220947} +07/25/2024 11:56:31 - INFO - __main__ - Step 6615: {'lr': 0.0004980667311676643, 'samples': 317520, 'steps': 6614, 'loss/train': 2.6223347187042236} +07/25/2024 11:56:31 - INFO - __main__ - Step 6616: {'lr': 0.0004980660781627003, 'samples': 317568, 'steps': 6615, 'loss/train': 2.222205877304077} +07/25/2024 11:56:31 - INFO - __main__ - Step 6617: {'lr': 0.0004980654250478997, 'samples': 317616, 'steps': 6616, 'loss/train': 2.0741491317749023} +07/25/2024 11:56:32 - INFO - __main__ - Step 6618: {'lr': 0.0004980647718232626, 'samples': 317664, 'steps': 6617, 'loss/train': 2.364076614379883} +07/25/2024 11:56:32 - INFO - __main__ - Step 6619: {'lr': 0.0004980641184887894, 'samples': 317712, 'steps': 6618, 'loss/train': 2.2439768314361572} +07/25/2024 11:56:32 - INFO - __main__ - Step 6620: {'lr': 0.0004980634650444804, 'samples': 317760, 'steps': 6619, 'loss/train': 2.4211249351501465} +07/25/2024 11:56:32 - INFO - __main__ - Step 6621: {'lr': 0.0004980628114903358, 'samples': 317808, 'steps': 6620, 'loss/train': 2.5881388187408447} +07/25/2024 11:56:33 - INFO - __main__ - Step 6622: {'lr': 0.0004980621578263559, 'samples': 317856, 'steps': 6621, 'loss/train': 2.4946768283843994} +07/25/2024 11:56:33 - INFO - __main__ - Step 6623: {'lr': 0.0004980615040525411, 'samples': 317904, 'steps': 6622, 'loss/train': 2.418386936187744} +07/25/2024 11:56:33 - INFO - __main__ - Step 6624: {'lr': 0.0004980608501688916, 'samples': 317952, 'steps': 6623, 'loss/train': 2.1534016132354736} +07/25/2024 11:56:34 - INFO - __main__ - Step 6625: {'lr': 0.0004980601961754077, 'samples': 318000, 'steps': 6624, 'loss/train': 1.7870327234268188} +07/25/2024 11:56:34 - INFO - __main__ - Step 6626: {'lr': 0.0004980595420720896, 'samples': 318048, 'steps': 6625, 'loss/train': 2.5948328971862793} +07/25/2024 11:56:34 - INFO - __main__ - Step 6627: {'lr': 0.0004980588878589379, 'samples': 318096, 'steps': 6626, 'loss/train': 2.2526679039001465} +07/25/2024 11:56:34 - INFO - __main__ - Step 6628: {'lr': 0.0004980582335359525, 'samples': 318144, 'steps': 6627, 'loss/train': 2.0663070678710938} +07/25/2024 11:56:35 - INFO - __main__ - Step 6629: {'lr': 0.0004980575791031338, 'samples': 318192, 'steps': 6628, 'loss/train': 2.359553098678589} +07/25/2024 11:56:35 - INFO - __main__ - Step 6630: {'lr': 0.0004980569245604824, 'samples': 318240, 'steps': 6629, 'loss/train': 2.059014081954956} +07/25/2024 11:56:35 - INFO - __main__ - Step 6631: {'lr': 0.0004980562699079982, 'samples': 318288, 'steps': 6630, 'loss/train': 2.0175561904907227} +07/25/2024 11:56:36 - INFO - __main__ - Step 6632: {'lr': 0.0004980556151456815, 'samples': 318336, 'steps': 6631, 'loss/train': 1.564887285232544} +07/25/2024 11:56:36 - INFO - __main__ - Step 6633: {'lr': 0.0004980549602735329, 'samples': 318384, 'steps': 6632, 'loss/train': 2.5667715072631836} +07/25/2024 11:56:36 - INFO - __main__ - Step 6634: {'lr': 0.0004980543052915525, 'samples': 318432, 'steps': 6633, 'loss/train': 2.1012871265411377} +07/25/2024 11:56:36 - INFO - __main__ - Step 6635: {'lr': 0.0004980536501997405, 'samples': 318480, 'steps': 6634, 'loss/train': 2.0443625450134277} +07/25/2024 11:56:37 - INFO - __main__ - Step 6636: {'lr': 0.0004980529949980975, 'samples': 318528, 'steps': 6635, 'loss/train': 1.8983228206634521} +07/25/2024 11:56:37 - INFO - __main__ - Step 6637: {'lr': 0.0004980523396866235, 'samples': 318576, 'steps': 6636, 'loss/train': 1.7323294878005981} +07/25/2024 11:56:37 - INFO - __main__ - Step 6638: {'lr': 0.0004980516842653188, 'samples': 318624, 'steps': 6637, 'loss/train': 2.2578461170196533} +07/25/2024 11:56:37 - INFO - __main__ - Step 6639: {'lr': 0.0004980510287341838, 'samples': 318672, 'steps': 6638, 'loss/train': 2.363373279571533} +07/25/2024 11:56:38 - INFO - __main__ - Step 6640: {'lr': 0.0004980503730932187, 'samples': 318720, 'steps': 6639, 'loss/train': 2.2181341648101807} +07/25/2024 11:56:38 - INFO - __main__ - Step 6641: {'lr': 0.0004980497173424239, 'samples': 318768, 'steps': 6640, 'loss/train': 2.2722184658050537} +07/25/2024 11:56:38 - INFO - __main__ - Step 6642: {'lr': 0.0004980490614817997, 'samples': 318816, 'steps': 6641, 'loss/train': 2.4226136207580566} +07/25/2024 11:56:39 - INFO - __main__ - Step 6643: {'lr': 0.0004980484055113462, 'samples': 318864, 'steps': 6642, 'loss/train': 1.9604469537734985} +07/25/2024 11:56:39 - INFO - __main__ - Step 6644: {'lr': 0.0004980477494310639, 'samples': 318912, 'steps': 6643, 'loss/train': 1.8433958292007446} +07/25/2024 11:56:39 - INFO - __main__ - Step 6645: {'lr': 0.000498047093240953, 'samples': 318960, 'steps': 6644, 'loss/train': 2.0165252685546875} +07/25/2024 11:56:39 - INFO - __main__ - Step 6646: {'lr': 0.0004980464369410138, 'samples': 319008, 'steps': 6645, 'loss/train': 2.32534122467041} +07/25/2024 11:56:40 - INFO - __main__ - Step 6647: {'lr': 0.0004980457805312465, 'samples': 319056, 'steps': 6646, 'loss/train': 2.424766778945923} +07/25/2024 11:56:40 - INFO - __main__ - Step 6648: {'lr': 0.0004980451240116516, 'samples': 319104, 'steps': 6647, 'loss/train': 2.32047438621521} +07/25/2024 11:56:40 - INFO - __main__ - Step 6649: {'lr': 0.0004980444673822292, 'samples': 319152, 'steps': 6648, 'loss/train': 2.1282923221588135} +07/25/2024 11:56:41 - INFO - __main__ - Step 6650: {'lr': 0.0004980438106429797, 'samples': 319200, 'steps': 6649, 'loss/train': 2.511461019515991} +07/25/2024 11:56:41 - INFO - __main__ - Step 6651: {'lr': 0.0004980431537939033, 'samples': 319248, 'steps': 6650, 'loss/train': 2.3025524616241455} +07/25/2024 11:56:41 - INFO - __main__ - Step 6652: {'lr': 0.0004980424968350004, 'samples': 319296, 'steps': 6651, 'loss/train': 2.0207676887512207} +07/25/2024 11:56:41 - INFO - __main__ - Step 6653: {'lr': 0.0004980418397662712, 'samples': 319344, 'steps': 6652, 'loss/train': 2.042009115219116} +07/25/2024 11:56:42 - INFO - __main__ - Step 6654: {'lr': 0.0004980411825877161, 'samples': 319392, 'steps': 6653, 'loss/train': 1.6707849502563477} +07/25/2024 11:56:42 - INFO - __main__ - Step 6655: {'lr': 0.0004980405252993352, 'samples': 319440, 'steps': 6654, 'loss/train': 2.123425006866455} +07/25/2024 11:56:42 - INFO - __main__ - Step 6656: {'lr': 0.0004980398679011289, 'samples': 319488, 'steps': 6655, 'loss/train': 2.1968297958374023} +07/25/2024 11:56:43 - INFO - __main__ - Step 6657: {'lr': 0.0004980392103930975, 'samples': 319536, 'steps': 6656, 'loss/train': 1.853422999382019} +07/25/2024 11:56:43 - INFO - __main__ - Step 6658: {'lr': 0.0004980385527752414, 'samples': 319584, 'steps': 6657, 'loss/train': 1.992798089981079} +07/25/2024 11:56:43 - INFO - __main__ - Step 6659: {'lr': 0.0004980378950475607, 'samples': 319632, 'steps': 6658, 'loss/train': 2.203834056854248} +07/25/2024 11:56:43 - INFO - __main__ - Step 6660: {'lr': 0.0004980372372100557, 'samples': 319680, 'steps': 6659, 'loss/train': 1.9038984775543213} +07/25/2024 11:56:44 - INFO - __main__ - Step 6661: {'lr': 0.000498036579262727, 'samples': 319728, 'steps': 6660, 'loss/train': 1.9497334957122803} +07/25/2024 11:56:44 - INFO - __main__ - Step 6662: {'lr': 0.0004980359212055744, 'samples': 319776, 'steps': 6661, 'loss/train': 2.4039549827575684} +07/25/2024 11:56:44 - INFO - __main__ - Step 6663: {'lr': 0.0004980352630385986, 'samples': 319824, 'steps': 6662, 'loss/train': 2.407848596572876} +07/25/2024 11:56:45 - INFO - __main__ - Step 6664: {'lr': 0.0004980346047617996, 'samples': 319872, 'steps': 6663, 'loss/train': 2.100785493850708} +07/25/2024 11:56:45 - INFO - __main__ - Step 6665: {'lr': 0.0004980339463751779, 'samples': 319920, 'steps': 6664, 'loss/train': 2.546212911605835} +07/25/2024 11:56:45 - INFO - __main__ - Step 6666: {'lr': 0.0004980332878787337, 'samples': 319968, 'steps': 6665, 'loss/train': 2.2437667846679688} +07/25/2024 11:56:45 - INFO - __main__ - Step 6667: {'lr': 0.0004980326292724674, 'samples': 320016, 'steps': 6666, 'loss/train': 1.796337366104126} +07/25/2024 11:56:46 - INFO - __main__ - Step 6668: {'lr': 0.0004980319705563791, 'samples': 320064, 'steps': 6667, 'loss/train': 2.3983545303344727} +07/25/2024 11:56:46 - INFO - __main__ - Step 6669: {'lr': 0.0004980313117304693, 'samples': 320112, 'steps': 6668, 'loss/train': 2.2576887607574463} +07/25/2024 11:56:46 - INFO - __main__ - Step 6670: {'lr': 0.0004980306527947381, 'samples': 320160, 'steps': 6669, 'loss/train': 2.0760951042175293} +07/25/2024 11:56:47 - INFO - __main__ - Step 6671: {'lr': 0.0004980299937491858, 'samples': 320208, 'steps': 6670, 'loss/train': 1.8696926832199097} +07/25/2024 11:56:47 - INFO - __main__ - Step 6672: {'lr': 0.0004980293345938128, 'samples': 320256, 'steps': 6671, 'loss/train': 2.70341420173645} +07/25/2024 11:56:47 - INFO - __main__ - Step 6673: {'lr': 0.0004980286753286195, 'samples': 320304, 'steps': 6672, 'loss/train': 1.9591240882873535} +07/25/2024 11:56:47 - INFO - __main__ - Step 6674: {'lr': 0.0004980280159536059, 'samples': 320352, 'steps': 6673, 'loss/train': 2.514618158340454} +07/25/2024 11:56:48 - INFO - __main__ - Step 6675: {'lr': 0.0004980273564687725, 'samples': 320400, 'steps': 6674, 'loss/train': 1.7387406826019287} +07/25/2024 11:56:48 - INFO - __main__ - Step 6676: {'lr': 0.0004980266968741197, 'samples': 320448, 'steps': 6675, 'loss/train': 2.057314157485962} +07/25/2024 11:56:48 - INFO - __main__ - Step 6677: {'lr': 0.0004980260371696473, 'samples': 320496, 'steps': 6676, 'loss/train': 2.1879162788391113} +07/25/2024 11:56:49 - INFO - __main__ - Step 6678: {'lr': 0.0004980253773553561, 'samples': 320544, 'steps': 6677, 'loss/train': 1.741060733795166} +07/25/2024 11:56:49 - INFO - __main__ - Step 6679: {'lr': 0.0004980247174312463, 'samples': 320592, 'steps': 6678, 'loss/train': 2.452789545059204} +07/25/2024 11:56:49 - INFO - __main__ - Step 6680: {'lr': 0.0004980240573973179, 'samples': 320640, 'steps': 6679, 'loss/train': 2.2051949501037598} +07/25/2024 11:56:49 - INFO - __main__ - Step 6681: {'lr': 0.0004980233972535716, 'samples': 320688, 'steps': 6680, 'loss/train': 2.214839220046997} +07/25/2024 11:56:50 - INFO - __main__ - Step 6682: {'lr': 0.0004980227370000073, 'samples': 320736, 'steps': 6681, 'loss/train': 1.8562778234481812} +07/25/2024 11:56:50 - INFO - __main__ - Step 6683: {'lr': 0.0004980220766366257, 'samples': 320784, 'steps': 6682, 'loss/train': 2.264486074447632} +07/25/2024 11:56:50 - INFO - __main__ - Step 6684: {'lr': 0.0004980214161634267, 'samples': 320832, 'steps': 6683, 'loss/train': 2.389840841293335} +07/25/2024 11:56:51 - INFO - __main__ - Step 6685: {'lr': 0.0004980207555804108, 'samples': 320880, 'steps': 6684, 'loss/train': 2.049376964569092} +07/25/2024 11:56:51 - INFO - __main__ - Step 6686: {'lr': 0.0004980200948875782, 'samples': 320928, 'steps': 6685, 'loss/train': 2.4483418464660645} +07/25/2024 11:56:51 - INFO - __main__ - Step 6687: {'lr': 0.0004980194340849294, 'samples': 320976, 'steps': 6686, 'loss/train': 2.2969632148742676} +07/25/2024 11:56:51 - INFO - __main__ - Step 6688: {'lr': 0.0004980187731724644, 'samples': 321024, 'steps': 6687, 'loss/train': 2.0113909244537354} +07/25/2024 11:56:52 - INFO - __main__ - Step 6689: {'lr': 0.0004980181121501838, 'samples': 321072, 'steps': 6688, 'loss/train': 1.640637755393982} +07/25/2024 11:56:52 - INFO - __main__ - Step 6690: {'lr': 0.0004980174510180877, 'samples': 321120, 'steps': 6689, 'loss/train': 2.151486396789551} +07/25/2024 11:56:52 - INFO - __main__ - Step 6691: {'lr': 0.0004980167897761763, 'samples': 321168, 'steps': 6690, 'loss/train': 2.032503128051758} +07/25/2024 11:56:53 - INFO - __main__ - Step 6692: {'lr': 0.0004980161284244501, 'samples': 321216, 'steps': 6691, 'loss/train': 2.1329433917999268} +07/25/2024 11:56:53 - INFO - __main__ - Step 6693: {'lr': 0.0004980154669629093, 'samples': 321264, 'steps': 6692, 'loss/train': 2.4598002433776855} +07/25/2024 11:56:53 - INFO - __main__ - Step 6694: {'lr': 0.0004980148053915542, 'samples': 321312, 'steps': 6693, 'loss/train': 2.223186731338501} +07/25/2024 11:56:53 - INFO - __main__ - Step 6695: {'lr': 0.000498014143710385, 'samples': 321360, 'steps': 6694, 'loss/train': 1.9077646732330322} +07/25/2024 11:56:54 - INFO - __main__ - Step 6696: {'lr': 0.0004980134819194022, 'samples': 321408, 'steps': 6695, 'loss/train': 2.4011728763580322} +07/25/2024 11:56:54 - INFO - __main__ - Step 6697: {'lr': 0.0004980128200186059, 'samples': 321456, 'steps': 6696, 'loss/train': 2.0793285369873047} +07/25/2024 11:56:54 - INFO - __main__ - Step 6698: {'lr': 0.0004980121580079966, 'samples': 321504, 'steps': 6697, 'loss/train': 2.039001941680908} +07/25/2024 11:56:55 - INFO - __main__ - Step 6699: {'lr': 0.0004980114958875743, 'samples': 321552, 'steps': 6698, 'loss/train': 2.3168115615844727} +07/25/2024 11:56:55 - INFO - __main__ - Step 6700: {'lr': 0.0004980108336573396, 'samples': 321600, 'steps': 6699, 'loss/train': 1.9992798566818237} +07/25/2024 11:56:55 - INFO - __main__ - Step 6701: {'lr': 0.0004980101713172925, 'samples': 321648, 'steps': 6700, 'loss/train': 2.179696798324585} +07/25/2024 11:56:55 - INFO - __main__ - Step 6702: {'lr': 0.0004980095088674335, 'samples': 321696, 'steps': 6701, 'loss/train': 2.149935483932495} +07/25/2024 11:56:56 - INFO - __main__ - Step 6703: {'lr': 0.000498008846307763, 'samples': 321744, 'steps': 6702, 'loss/train': 2.2367167472839355} +07/25/2024 11:56:56 - INFO - __main__ - Step 6704: {'lr': 0.000498008183638281, 'samples': 321792, 'steps': 6703, 'loss/train': 1.77776038646698} +07/25/2024 11:56:56 - INFO - __main__ - Step 6705: {'lr': 0.000498007520858988, 'samples': 321840, 'steps': 6704, 'loss/train': 1.5498846769332886} +07/25/2024 11:56:56 - INFO - __main__ - Step 6706: {'lr': 0.0004980068579698841, 'samples': 321888, 'steps': 6705, 'loss/train': 1.5415451526641846} +07/25/2024 11:56:57 - INFO - __main__ - Step 6707: {'lr': 0.0004980061949709698, 'samples': 321936, 'steps': 6706, 'loss/train': 2.0985023975372314} +07/25/2024 11:56:57 - INFO - __main__ - Step 6708: {'lr': 0.0004980055318622451, 'samples': 321984, 'steps': 6707, 'loss/train': 2.0094192028045654} +07/25/2024 11:56:57 - INFO - __main__ - Step 6709: {'lr': 0.0004980048686437108, 'samples': 322032, 'steps': 6708, 'loss/train': 2.083383083343506} +07/25/2024 11:56:58 - INFO - __main__ - Step 6710: {'lr': 0.0004980042053153667, 'samples': 322080, 'steps': 6709, 'loss/train': 2.359842538833618} +07/25/2024 11:56:58 - INFO - __main__ - Step 6711: {'lr': 0.0004980035418772135, 'samples': 322128, 'steps': 6710, 'loss/train': 2.920017719268799} +07/25/2024 11:56:58 - INFO - __main__ - Step 6712: {'lr': 0.000498002878329251, 'samples': 322176, 'steps': 6711, 'loss/train': 2.1068055629730225} +07/25/2024 11:56:58 - INFO - __main__ - Step 6713: {'lr': 0.0004980022146714799, 'samples': 322224, 'steps': 6712, 'loss/train': 1.3973264694213867} +07/25/2024 11:56:59 - INFO - __main__ - Step 6714: {'lr': 0.0004980015509039004, 'samples': 322272, 'steps': 6713, 'loss/train': 2.3961074352264404} +07/25/2024 11:56:59 - INFO - __main__ - Step 6715: {'lr': 0.0004980008870265128, 'samples': 322320, 'steps': 6714, 'loss/train': 2.0287811756134033} +07/25/2024 11:56:59 - INFO - __main__ - Step 6716: {'lr': 0.0004980002230393172, 'samples': 322368, 'steps': 6715, 'loss/train': 2.2158961296081543} +07/25/2024 11:57:00 - INFO - __main__ - Step 6717: {'lr': 0.0004979995589423141, 'samples': 322416, 'steps': 6716, 'loss/train': 2.468855142593384} +07/25/2024 11:57:00 - INFO - __main__ - Step 6718: {'lr': 0.0004979988947355038, 'samples': 322464, 'steps': 6717, 'loss/train': 2.4836463928222656} +07/25/2024 11:57:00 - INFO - __main__ - Step 6719: {'lr': 0.0004979982304188866, 'samples': 322512, 'steps': 6718, 'loss/train': 1.451221227645874} +07/25/2024 11:57:00 - INFO - __main__ - Step 6720: {'lr': 0.0004979975659924627, 'samples': 322560, 'steps': 6719, 'loss/train': 2.4166314601898193} +07/25/2024 11:57:01 - INFO - __main__ - Step 6721: {'lr': 0.0004979969014562324, 'samples': 322608, 'steps': 6720, 'loss/train': 1.6949104070663452} +07/25/2024 11:57:01 - INFO - __main__ - Step 6722: {'lr': 0.0004979962368101959, 'samples': 322656, 'steps': 6721, 'loss/train': 2.6304242610931396} +07/25/2024 11:57:01 - INFO - __main__ - Step 6723: {'lr': 0.0004979955720543537, 'samples': 322704, 'steps': 6722, 'loss/train': 2.002500057220459} +07/25/2024 11:57:02 - INFO - __main__ - Step 6724: {'lr': 0.0004979949071887061, 'samples': 322752, 'steps': 6723, 'loss/train': 1.9460654258728027} +07/25/2024 11:57:02 - INFO - __main__ - Step 6725: {'lr': 0.0004979942422132532, 'samples': 322800, 'steps': 6724, 'loss/train': 1.9885200262069702} +07/25/2024 11:57:02 - INFO - __main__ - Step 6726: {'lr': 0.0004979935771279954, 'samples': 322848, 'steps': 6725, 'loss/train': 2.415024518966675} +07/25/2024 11:57:02 - INFO - __main__ - Step 6727: {'lr': 0.0004979929119329331, 'samples': 322896, 'steps': 6726, 'loss/train': 1.8682714700698853} +07/25/2024 11:57:03 - INFO - __main__ - Step 6728: {'lr': 0.0004979922466280664, 'samples': 322944, 'steps': 6727, 'loss/train': 2.8227336406707764} +07/25/2024 11:57:03 - INFO - __main__ - Step 6729: {'lr': 0.0004979915812133957, 'samples': 322992, 'steps': 6728, 'loss/train': 2.5967745780944824} +07/25/2024 11:57:03 - INFO - __main__ - Step 6730: {'lr': 0.0004979909156889212, 'samples': 323040, 'steps': 6729, 'loss/train': 1.9273455142974854} +07/25/2024 11:57:04 - INFO - __main__ - Step 6731: {'lr': 0.0004979902500546434, 'samples': 323088, 'steps': 6730, 'loss/train': 2.1610770225524902} +07/25/2024 11:57:04 - INFO - __main__ - Step 6732: {'lr': 0.0004979895843105624, 'samples': 323136, 'steps': 6731, 'loss/train': 2.13757586479187} +07/25/2024 11:57:04 - INFO - __main__ - Step 6733: {'lr': 0.0004979889184566786, 'samples': 323184, 'steps': 6732, 'loss/train': 2.348285436630249} +07/25/2024 11:57:04 - INFO - __main__ - Step 6734: {'lr': 0.0004979882524929922, 'samples': 323232, 'steps': 6733, 'loss/train': 2.2334585189819336} +07/25/2024 11:57:05 - INFO - __main__ - Step 6735: {'lr': 0.0004979875864195036, 'samples': 323280, 'steps': 6734, 'loss/train': 2.8316965103149414} +07/25/2024 11:57:05 - INFO - __main__ - Step 6736: {'lr': 0.0004979869202362129, 'samples': 323328, 'steps': 6735, 'loss/train': 1.9832885265350342} +07/25/2024 11:57:05 - INFO - __main__ - Step 6737: {'lr': 0.0004979862539431207, 'samples': 323376, 'steps': 6736, 'loss/train': 1.2895525693893433} +07/25/2024 11:57:06 - INFO - __main__ - Step 6738: {'lr': 0.0004979855875402271, 'samples': 323424, 'steps': 6737, 'loss/train': 2.571213960647583} +07/25/2024 11:57:06 - INFO - __main__ - Step 6739: {'lr': 0.0004979849210275325, 'samples': 323472, 'steps': 6738, 'loss/train': 2.2406063079833984} +07/25/2024 11:57:06 - INFO - __main__ - Step 6740: {'lr': 0.000497984254405037, 'samples': 323520, 'steps': 6739, 'loss/train': 1.0553078651428223} +07/25/2024 11:57:06 - INFO - __main__ - Step 6741: {'lr': 0.0004979835876727411, 'samples': 323568, 'steps': 6740, 'loss/train': 2.315297842025757} +07/25/2024 11:57:07 - INFO - __main__ - Step 6742: {'lr': 0.0004979829208306449, 'samples': 323616, 'steps': 6741, 'loss/train': 2.4309096336364746} +07/25/2024 11:57:07 - INFO - __main__ - Step 6743: {'lr': 0.000497982253878749, 'samples': 323664, 'steps': 6742, 'loss/train': 0.586361289024353} +07/25/2024 11:57:07 - INFO - __main__ - Step 6744: {'lr': 0.0004979815868170534, 'samples': 323712, 'steps': 6743, 'loss/train': 2.3021018505096436} +07/25/2024 11:57:08 - INFO - __main__ - Step 6745: {'lr': 0.0004979809196455585, 'samples': 323760, 'steps': 6744, 'loss/train': 1.6655324697494507} +07/25/2024 11:57:08 - INFO - __main__ - Step 6746: {'lr': 0.0004979802523642645, 'samples': 323808, 'steps': 6745, 'loss/train': 2.4103140830993652} +07/25/2024 11:57:08 - INFO - __main__ - Step 6747: {'lr': 0.000497979584973172, 'samples': 323856, 'steps': 6746, 'loss/train': 1.865107774734497} +07/25/2024 11:57:08 - INFO - __main__ - Step 6748: {'lr': 0.0004979789174722809, 'samples': 323904, 'steps': 6747, 'loss/train': 1.7670707702636719} +07/25/2024 11:57:09 - INFO - __main__ - Step 6749: {'lr': 0.0004979782498615918, 'samples': 323952, 'steps': 6748, 'loss/train': 2.1824705600738525} +07/25/2024 11:57:09 - INFO - __main__ - Step 6750: {'lr': 0.0004979775821411049, 'samples': 324000, 'steps': 6749, 'loss/train': 2.2608330249786377} +07/25/2024 11:57:09 - INFO - __main__ - Step 6751: {'lr': 0.0004979769143108203, 'samples': 324048, 'steps': 6750, 'loss/train': 2.6682803630828857} +07/25/2024 11:57:10 - INFO - __main__ - Step 6752: {'lr': 0.0004979762463707387, 'samples': 324096, 'steps': 6751, 'loss/train': 2.1860742568969727} +07/25/2024 11:57:10 - INFO - __main__ - Step 6753: {'lr': 0.0004979755783208601, 'samples': 324144, 'steps': 6752, 'loss/train': 2.713505983352661} +07/25/2024 11:57:10 - INFO - __main__ - Step 6754: {'lr': 0.0004979749101611847, 'samples': 324192, 'steps': 6753, 'loss/train': 2.1355230808258057} +07/25/2024 11:57:10 - INFO - __main__ - Step 6755: {'lr': 0.0004979742418917131, 'samples': 324240, 'steps': 6754, 'loss/train': 2.2597596645355225} +07/25/2024 11:57:11 - INFO - __main__ - Step 6756: {'lr': 0.0004979735735124454, 'samples': 324288, 'steps': 6755, 'loss/train': 1.9234040975570679} +07/25/2024 11:57:11 - INFO - __main__ - Step 6757: {'lr': 0.000497972905023382, 'samples': 324336, 'steps': 6756, 'loss/train': 2.4450056552886963} +07/25/2024 11:57:11 - INFO - __main__ - Step 6758: {'lr': 0.0004979722364245231, 'samples': 324384, 'steps': 6757, 'loss/train': 2.3580098152160645} +07/25/2024 11:57:12 - INFO - __main__ - Step 6759: {'lr': 0.0004979715677158692, 'samples': 324432, 'steps': 6758, 'loss/train': 2.8101398944854736} +07/25/2024 11:57:12 - INFO - __main__ - Step 6760: {'lr': 0.0004979708988974202, 'samples': 324480, 'steps': 6759, 'loss/train': 2.2400152683258057} +07/25/2024 11:57:12 - INFO - __main__ - Step 6761: {'lr': 0.0004979702299691768, 'samples': 324528, 'steps': 6760, 'loss/train': 1.1569206714630127} +07/25/2024 11:57:12 - INFO - __main__ - Step 6762: {'lr': 0.000497969560931139, 'samples': 324576, 'steps': 6761, 'loss/train': 1.7052624225616455} +07/25/2024 11:57:13 - INFO - __main__ - Step 6763: {'lr': 0.0004979688917833072, 'samples': 324624, 'steps': 6762, 'loss/train': 2.272569179534912} +07/25/2024 11:57:13 - INFO - __main__ - Step 6764: {'lr': 0.000497968222525682, 'samples': 324672, 'steps': 6763, 'loss/train': 1.5162349939346313} +07/25/2024 11:57:13 - INFO - __main__ - Step 6765: {'lr': 0.0004979675531582631, 'samples': 324720, 'steps': 6764, 'loss/train': 1.9677581787109375} +07/25/2024 11:57:13 - INFO - __main__ - Step 6766: {'lr': 0.0004979668836810514, 'samples': 324768, 'steps': 6765, 'loss/train': 2.1524717807769775} +07/25/2024 11:57:14 - INFO - __main__ - Step 6767: {'lr': 0.0004979662140940467, 'samples': 324816, 'steps': 6766, 'loss/train': 2.158936023712158} +07/25/2024 11:57:14 - INFO - __main__ - Step 6768: {'lr': 0.0004979655443972496, 'samples': 324864, 'steps': 6767, 'loss/train': 2.220290422439575} +07/25/2024 11:57:14 - INFO - __main__ - Step 6769: {'lr': 0.0004979648745906602, 'samples': 324912, 'steps': 6768, 'loss/train': 1.2796696424484253} +07/25/2024 11:57:15 - INFO - __main__ - Step 6770: {'lr': 0.0004979642046742791, 'samples': 324960, 'steps': 6769, 'loss/train': 2.115025043487549} +07/25/2024 11:57:15 - INFO - __main__ - Step 6771: {'lr': 0.0004979635346481062, 'samples': 325008, 'steps': 6770, 'loss/train': 2.536411762237549} +07/25/2024 11:57:15 - INFO - __main__ - Step 6772: {'lr': 0.0004979628645121421, 'samples': 325056, 'steps': 6771, 'loss/train': 2.403308153152466} +07/25/2024 11:57:15 - INFO - __main__ - Step 6773: {'lr': 0.000497962194266387, 'samples': 325104, 'steps': 6772, 'loss/train': 2.036587953567505} +07/25/2024 11:57:16 - INFO - __main__ - Step 6774: {'lr': 0.0004979615239108411, 'samples': 325152, 'steps': 6773, 'loss/train': 2.0392088890075684} +07/25/2024 11:57:16 - INFO - __main__ - Step 6775: {'lr': 0.0004979608534455049, 'samples': 325200, 'steps': 6774, 'loss/train': 2.071387767791748} +07/25/2024 11:57:16 - INFO - __main__ - Step 6776: {'lr': 0.0004979601828703784, 'samples': 325248, 'steps': 6775, 'loss/train': 2.3542118072509766} +07/25/2024 11:57:17 - INFO - __main__ - Step 6777: {'lr': 0.0004979595121854623, 'samples': 325296, 'steps': 6776, 'loss/train': 2.2034685611724854} +07/25/2024 11:57:17 - INFO - __main__ - Step 6778: {'lr': 0.0004979588413907565, 'samples': 325344, 'steps': 6777, 'loss/train': 1.4477152824401855} +07/25/2024 11:57:17 - INFO - __main__ - Step 6779: {'lr': 0.0004979581704862616, 'samples': 325392, 'steps': 6778, 'loss/train': 1.90921151638031} +07/25/2024 11:57:17 - INFO - __main__ - Step 6780: {'lr': 0.0004979574994719777, 'samples': 325440, 'steps': 6779, 'loss/train': 1.6662957668304443} +07/25/2024 11:57:18 - INFO - __main__ - Step 6781: {'lr': 0.0004979568283479052, 'samples': 325488, 'steps': 6780, 'loss/train': 2.2884209156036377} +07/25/2024 11:57:18 - INFO - __main__ - Step 6782: {'lr': 0.0004979561571140442, 'samples': 325536, 'steps': 6781, 'loss/train': 2.3317320346832275} +07/25/2024 11:57:18 - INFO - __main__ - Step 6783: {'lr': 0.0004979554857703954, 'samples': 325584, 'steps': 6782, 'loss/train': 2.799994707107544} +07/25/2024 11:57:19 - INFO - __main__ - Step 6784: {'lr': 0.0004979548143169588, 'samples': 325632, 'steps': 6783, 'loss/train': 2.671328544616699} +07/25/2024 11:57:19 - INFO - __main__ - Step 6785: {'lr': 0.0004979541427537347, 'samples': 325680, 'steps': 6784, 'loss/train': 1.604885458946228} +07/25/2024 11:57:19 - INFO - __main__ - Step 6786: {'lr': 0.0004979534710807236, 'samples': 325728, 'steps': 6785, 'loss/train': 1.987146258354187} +07/25/2024 11:57:19 - INFO - __main__ - Step 6787: {'lr': 0.0004979527992979254, 'samples': 325776, 'steps': 6786, 'loss/train': 2.1892168521881104} +07/25/2024 11:57:20 - INFO - __main__ - Step 6788: {'lr': 0.0004979521274053408, 'samples': 325824, 'steps': 6787, 'loss/train': 1.8906059265136719} +07/25/2024 11:57:20 - INFO - __main__ - Step 6789: {'lr': 0.00049795145540297, 'samples': 325872, 'steps': 6788, 'loss/train': 1.1223188638687134} +07/25/2024 11:57:20 - INFO - __main__ - Step 6790: {'lr': 0.0004979507832908131, 'samples': 325920, 'steps': 6789, 'loss/train': 1.5838133096694946} +07/25/2024 11:57:21 - INFO - __main__ - Step 6791: {'lr': 0.0004979501110688705, 'samples': 325968, 'steps': 6790, 'loss/train': 2.0360732078552246} +07/25/2024 11:57:21 - INFO - __main__ - Step 6792: {'lr': 0.0004979494387371427, 'samples': 326016, 'steps': 6791, 'loss/train': 1.9493170976638794} +07/25/2024 11:57:21 - INFO - __main__ - Step 6793: {'lr': 0.0004979487662956298, 'samples': 326064, 'steps': 6792, 'loss/train': 1.9907331466674805} +07/25/2024 11:57:21 - INFO - __main__ - Step 6794: {'lr': 0.0004979480937443322, 'samples': 326112, 'steps': 6793, 'loss/train': 2.407794713973999} +07/25/2024 11:57:22 - INFO - __main__ - Step 6795: {'lr': 0.00049794742108325, 'samples': 326160, 'steps': 6794, 'loss/train': 2.648529529571533} +07/25/2024 11:57:22 - INFO - __main__ - Step 6796: {'lr': 0.0004979467483123836, 'samples': 326208, 'steps': 6795, 'loss/train': 2.6060707569122314} +07/25/2024 11:57:22 - INFO - __main__ - Step 6797: {'lr': 0.0004979460754317334, 'samples': 326256, 'steps': 6796, 'loss/train': 2.209639310836792} +07/25/2024 11:57:23 - INFO - __main__ - Step 6798: {'lr': 0.0004979454024412996, 'samples': 326304, 'steps': 6797, 'loss/train': 1.9010320901870728} +07/25/2024 11:57:23 - INFO - __main__ - Step 6799: {'lr': 0.0004979447293410826, 'samples': 326352, 'steps': 6798, 'loss/train': 2.2836837768554688} +07/25/2024 11:57:23 - INFO - __main__ - Step 6800: {'lr': 0.0004979440561310826, 'samples': 326400, 'steps': 6799, 'loss/train': 2.5712716579437256} +07/25/2024 11:57:23 - INFO - __main__ - Step 6801: {'lr': 0.0004979433828112998, 'samples': 326448, 'steps': 6800, 'loss/train': 2.534816265106201} +07/25/2024 11:57:24 - INFO - __main__ - Step 6802: {'lr': 0.0004979427093817348, 'samples': 326496, 'steps': 6801, 'loss/train': 1.8389918804168701} +07/25/2024 11:57:24 - INFO - __main__ - Step 6803: {'lr': 0.0004979420358423876, 'samples': 326544, 'steps': 6802, 'loss/train': 2.034909248352051} +07/25/2024 11:57:24 - INFO - __main__ - Step 6804: {'lr': 0.0004979413621932587, 'samples': 326592, 'steps': 6803, 'loss/train': 1.5086363554000854} +07/25/2024 11:57:25 - INFO - __main__ - Step 6805: {'lr': 0.0004979406884343481, 'samples': 326640, 'steps': 6804, 'loss/train': 1.8933240175247192} +07/25/2024 11:57:25 - INFO - __main__ - Step 6806: {'lr': 0.0004979400145656565, 'samples': 326688, 'steps': 6805, 'loss/train': 2.2884209156036377} +07/25/2024 11:57:25 - INFO - __main__ - Step 6807: {'lr': 0.0004979393405871841, 'samples': 326736, 'steps': 6806, 'loss/train': 3.007859230041504} +07/25/2024 11:57:25 - INFO - __main__ - Step 6808: {'lr': 0.0004979386664989309, 'samples': 326784, 'steps': 6807, 'loss/train': 2.0122182369232178} +07/25/2024 11:57:26 - INFO - __main__ - Step 6809: {'lr': 0.0004979379923008974, 'samples': 326832, 'steps': 6808, 'loss/train': 2.449932098388672} +07/25/2024 11:57:26 - INFO - __main__ - Step 6810: {'lr': 0.0004979373179930841, 'samples': 326880, 'steps': 6809, 'loss/train': 2.077388286590576} +07/25/2024 11:57:26 - INFO - __main__ - Step 6811: {'lr': 0.000497936643575491, 'samples': 326928, 'steps': 6810, 'loss/train': 2.064382791519165} +07/25/2024 11:57:27 - INFO - __main__ - Step 6812: {'lr': 0.0004979359690481185, 'samples': 326976, 'steps': 6811, 'loss/train': 1.6624658107757568} +07/25/2024 11:57:27 - INFO - __main__ - Step 6813: {'lr': 0.0004979352944109668, 'samples': 327024, 'steps': 6812, 'loss/train': 1.6466255187988281} +07/25/2024 11:57:27 - INFO - __main__ - Step 6814: {'lr': 0.0004979346196640365, 'samples': 327072, 'steps': 6813, 'loss/train': 1.3631941080093384} +07/25/2024 11:57:27 - INFO - __main__ - Step 6815: {'lr': 0.0004979339448073274, 'samples': 327120, 'steps': 6814, 'loss/train': 2.015258312225342} +07/25/2024 11:57:28 - INFO - __main__ - Step 6816: {'lr': 0.0004979332698408403, 'samples': 327168, 'steps': 6815, 'loss/train': 1.9611936807632446} +07/25/2024 11:57:28 - INFO - __main__ - Step 6817: {'lr': 0.0004979325947645753, 'samples': 327216, 'steps': 6816, 'loss/train': 2.0295543670654297} +07/25/2024 11:57:28 - INFO - __main__ - Step 6818: {'lr': 0.0004979319195785326, 'samples': 327264, 'steps': 6817, 'loss/train': 2.124751091003418} +07/25/2024 11:57:29 - INFO - __main__ - Step 6819: {'lr': 0.0004979312442827127, 'samples': 327312, 'steps': 6818, 'loss/train': 2.1517493724823} +07/25/2024 11:57:29 - INFO - __main__ - Step 6820: {'lr': 0.0004979305688771158, 'samples': 327360, 'steps': 6819, 'loss/train': 1.7783150672912598} +07/25/2024 11:57:29 - INFO - __main__ - Step 6821: {'lr': 0.0004979298933617421, 'samples': 327408, 'steps': 6820, 'loss/train': 1.9381941556930542} +07/25/2024 11:57:29 - INFO - __main__ - Step 6822: {'lr': 0.0004979292177365921, 'samples': 327456, 'steps': 6821, 'loss/train': 2.464414119720459} +07/25/2024 11:57:30 - INFO - __main__ - Step 6823: {'lr': 0.0004979285420016659, 'samples': 327504, 'steps': 6822, 'loss/train': 1.6810007095336914} +07/25/2024 11:57:30 - INFO - __main__ - Step 6824: {'lr': 0.000497927866156964, 'samples': 327552, 'steps': 6823, 'loss/train': 1.9065442085266113} +07/25/2024 11:57:30 - INFO - __main__ - Step 6825: {'lr': 0.0004979271902024865, 'samples': 327600, 'steps': 6824, 'loss/train': 2.3040525913238525} +07/25/2024 11:57:31 - INFO - __main__ - Step 6826: {'lr': 0.0004979265141382337, 'samples': 327648, 'steps': 6825, 'loss/train': 1.4474776983261108} +07/25/2024 11:57:31 - INFO - __main__ - Step 6827: {'lr': 0.0004979258379642061, 'samples': 327696, 'steps': 6826, 'loss/train': 1.3870140314102173} +07/25/2024 11:57:31 - INFO - __main__ - Step 6828: {'lr': 0.0004979251616804038, 'samples': 327744, 'steps': 6827, 'loss/train': 1.5932741165161133} +07/25/2024 11:57:31 - INFO - __main__ - Step 6829: {'lr': 0.0004979244852868273, 'samples': 327792, 'steps': 6828, 'loss/train': 2.8791086673736572} +07/25/2024 11:57:32 - INFO - __main__ - Step 6830: {'lr': 0.0004979238087834768, 'samples': 327840, 'steps': 6829, 'loss/train': 2.1680257320404053} +07/25/2024 11:57:32 - INFO - __main__ - Step 6831: {'lr': 0.0004979231321703524, 'samples': 327888, 'steps': 6830, 'loss/train': 2.846409559249878} +07/25/2024 11:57:32 - INFO - __main__ - Step 6832: {'lr': 0.0004979224554474547, 'samples': 327936, 'steps': 6831, 'loss/train': 2.280302047729492} +07/25/2024 11:57:33 - INFO - __main__ - Step 6833: {'lr': 0.0004979217786147839, 'samples': 327984, 'steps': 6832, 'loss/train': 2.389209747314453} +07/25/2024 11:57:33 - INFO - __main__ - Step 6834: {'lr': 0.0004979211016723403, 'samples': 328032, 'steps': 6833, 'loss/train': 0.7162644267082214} +07/25/2024 11:57:33 - INFO - __main__ - Step 6835: {'lr': 0.0004979204246201241, 'samples': 328080, 'steps': 6834, 'loss/train': 2.1490280628204346} +07/25/2024 11:57:33 - INFO - __main__ - Step 6836: {'lr': 0.0004979197474581357, 'samples': 328128, 'steps': 6835, 'loss/train': 2.0662429332733154} +07/25/2024 11:57:34 - INFO - __main__ - Step 6837: {'lr': 0.0004979190701863755, 'samples': 328176, 'steps': 6836, 'loss/train': 2.108666181564331} +07/25/2024 11:57:34 - INFO - __main__ - Step 6838: {'lr': 0.0004979183928048435, 'samples': 328224, 'steps': 6837, 'loss/train': 1.9324275255203247} +07/25/2024 11:57:34 - INFO - __main__ - Step 6839: {'lr': 0.0004979177153135403, 'samples': 328272, 'steps': 6838, 'loss/train': 1.8745583295822144} +07/25/2024 11:57:34 - INFO - __main__ - Step 6840: {'lr': 0.0004979170377124661, 'samples': 328320, 'steps': 6839, 'loss/train': 2.415374755859375} +07/25/2024 11:57:35 - INFO - __main__ - Step 6841: {'lr': 0.0004979163600016211, 'samples': 328368, 'steps': 6840, 'loss/train': 1.9233007431030273} +07/25/2024 11:57:35 - INFO - __main__ - Step 6842: {'lr': 0.0004979156821810058, 'samples': 328416, 'steps': 6841, 'loss/train': 4.092950820922852} +07/25/2024 11:57:35 - INFO - __main__ - Step 6843: {'lr': 0.0004979150042506203, 'samples': 328464, 'steps': 6842, 'loss/train': 2.6310834884643555} +07/25/2024 11:57:36 - INFO - __main__ - Step 6844: {'lr': 0.0004979143262104649, 'samples': 328512, 'steps': 6843, 'loss/train': 0.5664240717887878} +07/25/2024 11:57:36 - INFO - __main__ - Step 6845: {'lr': 0.0004979136480605401, 'samples': 328560, 'steps': 6844, 'loss/train': 2.1111338138580322} +07/25/2024 11:57:36 - INFO - __main__ - Step 6846: {'lr': 0.0004979129698008461, 'samples': 328608, 'steps': 6845, 'loss/train': 2.28867244720459} +07/25/2024 11:57:36 - INFO - __main__ - Step 6847: {'lr': 0.0004979122914313831, 'samples': 328656, 'steps': 6846, 'loss/train': 2.1698713302612305} +07/25/2024 11:57:37 - INFO - __main__ - Step 6848: {'lr': 0.0004979116129521516, 'samples': 328704, 'steps': 6847, 'loss/train': 1.7393537759780884} +07/25/2024 11:57:37 - INFO - __main__ - Step 6849: {'lr': 0.0004979109343631517, 'samples': 328752, 'steps': 6848, 'loss/train': 2.1656088829040527} +07/25/2024 11:57:37 - INFO - __main__ - Step 6850: {'lr': 0.0004979102556643837, 'samples': 328800, 'steps': 6849, 'loss/train': 2.019238233566284} +07/25/2024 11:57:38 - INFO - __main__ - Step 6851: {'lr': 0.0004979095768558482, 'samples': 328848, 'steps': 6850, 'loss/train': 1.9363473653793335} +07/25/2024 11:57:38 - INFO - __main__ - Step 6852: {'lr': 0.0004979088979375451, 'samples': 328896, 'steps': 6851, 'loss/train': 2.4054391384124756} +07/25/2024 11:57:38 - INFO - __main__ - Step 6853: {'lr': 0.0004979082189094749, 'samples': 328944, 'steps': 6852, 'loss/train': 2.689713954925537} +07/25/2024 11:57:38 - INFO - __main__ - Step 6854: {'lr': 0.000497907539771638, 'samples': 328992, 'steps': 6853, 'loss/train': 1.769310712814331} +07/25/2024 11:57:39 - INFO - __main__ - Step 6855: {'lr': 0.0004979068605240345, 'samples': 329040, 'steps': 6854, 'loss/train': 2.765225887298584} +07/25/2024 11:57:39 - INFO - __main__ - Step 6856: {'lr': 0.0004979061811666648, 'samples': 329088, 'steps': 6855, 'loss/train': 2.4800398349761963} +07/25/2024 11:57:39 - INFO - __main__ - Step 6857: {'lr': 0.0004979055016995292, 'samples': 329136, 'steps': 6856, 'loss/train': 1.940165638923645} +07/25/2024 11:57:40 - INFO - __main__ - Step 6858: {'lr': 0.000497904822122628, 'samples': 329184, 'steps': 6857, 'loss/train': 1.9583334922790527} +07/25/2024 11:57:40 - INFO - __main__ - Step 6859: {'lr': 0.0004979041424359615, 'samples': 329232, 'steps': 6858, 'loss/train': 1.889244794845581} +07/25/2024 11:57:40 - INFO - __main__ - Step 6860: {'lr': 0.0004979034626395299, 'samples': 329280, 'steps': 6859, 'loss/train': 2.2824957370758057} +07/25/2024 11:57:40 - INFO - __main__ - Step 6861: {'lr': 0.0004979027827333337, 'samples': 329328, 'steps': 6860, 'loss/train': 1.8499807119369507} +07/25/2024 11:57:41 - INFO - __main__ - Step 6862: {'lr': 0.0004979021027173731, 'samples': 329376, 'steps': 6861, 'loss/train': 2.356520652770996} +07/25/2024 11:57:41 - INFO - __main__ - Step 6863: {'lr': 0.0004979014225916483, 'samples': 329424, 'steps': 6862, 'loss/train': 2.13728666305542} +07/25/2024 11:57:41 - INFO - __main__ - Step 6864: {'lr': 0.0004979007423561597, 'samples': 329472, 'steps': 6863, 'loss/train': 1.9680207967758179} +07/25/2024 11:57:42 - INFO - __main__ - Step 6865: {'lr': 0.0004979000620109078, 'samples': 329520, 'steps': 6864, 'loss/train': 2.295619010925293} +07/25/2024 11:57:42 - INFO - __main__ - Step 6866: {'lr': 0.0004978993815558925, 'samples': 329568, 'steps': 6865, 'loss/train': 3.977438449859619} +07/25/2024 11:57:42 - INFO - __main__ - Step 6867: {'lr': 0.0004978987009911143, 'samples': 329616, 'steps': 6866, 'loss/train': 2.5477490425109863} +07/25/2024 11:57:42 - INFO - __main__ - Step 6868: {'lr': 0.0004978980203165736, 'samples': 329664, 'steps': 6867, 'loss/train': 1.0374503135681152} +07/25/2024 11:57:43 - INFO - __main__ - Step 6869: {'lr': 0.0004978973395322705, 'samples': 329712, 'steps': 6868, 'loss/train': 2.710308790206909} +07/25/2024 11:57:43 - INFO - __main__ - Step 6870: {'lr': 0.0004978966586382054, 'samples': 329760, 'steps': 6869, 'loss/train': 1.9680095911026} +07/25/2024 11:57:43 - INFO - __main__ - Step 6871: {'lr': 0.0004978959776343788, 'samples': 329808, 'steps': 6870, 'loss/train': 2.502190589904785} +07/25/2024 11:57:44 - INFO - __main__ - Step 6872: {'lr': 0.0004978952965207906, 'samples': 329856, 'steps': 6871, 'loss/train': 2.2976596355438232} +07/25/2024 11:57:44 - INFO - __main__ - Step 6873: {'lr': 0.0004978946152974414, 'samples': 329904, 'steps': 6872, 'loss/train': 2.1457221508026123} +07/25/2024 11:57:44 - INFO - __main__ - Step 6874: {'lr': 0.0004978939339643313, 'samples': 329952, 'steps': 6873, 'loss/train': 2.2038238048553467} +07/25/2024 11:57:44 - INFO - __main__ - Step 6875: {'lr': 0.0004978932525214609, 'samples': 330000, 'steps': 6874, 'loss/train': 2.6269211769104004} +07/25/2024 11:57:45 - INFO - __main__ - Step 6876: {'lr': 0.0004978925709688302, 'samples': 330048, 'steps': 6875, 'loss/train': 1.1968932151794434} +07/25/2024 11:57:45 - INFO - __main__ - Step 6877: {'lr': 0.0004978918893064396, 'samples': 330096, 'steps': 6876, 'loss/train': 2.582860231399536} +07/25/2024 11:57:45 - INFO - __main__ - Step 6878: {'lr': 0.0004978912075342894, 'samples': 330144, 'steps': 6877, 'loss/train': 2.414492607116699} +07/25/2024 11:57:46 - INFO - __main__ - Step 6879: {'lr': 0.00049789052565238, 'samples': 330192, 'steps': 6878, 'loss/train': 2.890195369720459} +07/25/2024 11:57:46 - INFO - __main__ - Step 6880: {'lr': 0.0004978898436607116, 'samples': 330240, 'steps': 6879, 'loss/train': 2.5647032260894775} +07/25/2024 11:57:46 - INFO - __main__ - Step 6881: {'lr': 0.0004978891615592845, 'samples': 330288, 'steps': 6880, 'loss/train': 2.323742389678955} +07/25/2024 11:57:46 - INFO - __main__ - Step 6882: {'lr': 0.000497888479348099, 'samples': 330336, 'steps': 6881, 'loss/train': 2.2315902709960938} +07/25/2024 11:57:47 - INFO - __main__ - Step 6883: {'lr': 0.0004978877970271555, 'samples': 330384, 'steps': 6882, 'loss/train': 1.8119628429412842} +07/25/2024 11:57:47 - INFO - __main__ - Step 6884: {'lr': 0.0004978871145964542, 'samples': 330432, 'steps': 6883, 'loss/train': 1.6177512407302856} +07/25/2024 11:57:47 - INFO - __main__ - Step 6885: {'lr': 0.0004978864320559954, 'samples': 330480, 'steps': 6884, 'loss/train': 1.8537737131118774} +07/25/2024 11:57:48 - INFO - __main__ - Step 6886: {'lr': 0.0004978857494057795, 'samples': 330528, 'steps': 6885, 'loss/train': 2.3858249187469482} +07/25/2024 11:57:48 - INFO - __main__ - Step 6887: {'lr': 0.0004978850666458066, 'samples': 330576, 'steps': 6886, 'loss/train': 2.11712908744812} +07/25/2024 11:57:48 - INFO - __main__ - Step 6888: {'lr': 0.0004978843837760773, 'samples': 330624, 'steps': 6887, 'loss/train': 1.8598381280899048} +07/25/2024 11:57:48 - INFO - __main__ - Step 6889: {'lr': 0.0004978837007965916, 'samples': 330672, 'steps': 6888, 'loss/train': 2.5590391159057617} +07/25/2024 11:57:49 - INFO - __main__ - Step 6890: {'lr': 0.00049788301770735, 'samples': 330720, 'steps': 6889, 'loss/train': 3.8379886150360107} +07/25/2024 11:57:49 - INFO - __main__ - Step 6891: {'lr': 0.0004978823345083528, 'samples': 330768, 'steps': 6890, 'loss/train': 2.5664024353027344} +07/25/2024 11:57:49 - INFO - __main__ - Step 6892: {'lr': 0.0004978816511996003, 'samples': 330816, 'steps': 6891, 'loss/train': 2.4430010318756104} +07/25/2024 11:57:50 - INFO - __main__ - Step 6893: {'lr': 0.0004978809677810926, 'samples': 330864, 'steps': 6892, 'loss/train': 2.0624260902404785} +07/25/2024 11:57:50 - INFO - __main__ - Step 6894: {'lr': 0.0004978802842528302, 'samples': 330912, 'steps': 6893, 'loss/train': 1.9425685405731201} +07/25/2024 11:57:50 - INFO - __main__ - Step 6895: {'lr': 0.0004978796006148133, 'samples': 330960, 'steps': 6894, 'loss/train': 2.2288074493408203} +07/25/2024 11:57:50 - INFO - __main__ - Step 6896: {'lr': 0.0004978789168670424, 'samples': 331008, 'steps': 6895, 'loss/train': 1.258954644203186} +07/25/2024 11:57:51 - INFO - __main__ - Step 6897: {'lr': 0.0004978782330095175, 'samples': 331056, 'steps': 6896, 'loss/train': 1.6674373149871826} +07/25/2024 11:57:51 - INFO - __main__ - Step 6898: {'lr': 0.0004978775490422392, 'samples': 331104, 'steps': 6897, 'loss/train': 2.289271831512451} +07/25/2024 11:57:51 - INFO - __main__ - Step 6899: {'lr': 0.0004978768649652076, 'samples': 331152, 'steps': 6898, 'loss/train': 1.9056622982025146} +07/25/2024 11:57:52 - INFO - __main__ - Step 6900: {'lr': 0.0004978761807784231, 'samples': 331200, 'steps': 6899, 'loss/train': 1.1763262748718262} +07/25/2024 11:57:52 - INFO - __main__ - Step 6901: {'lr': 0.0004978754964818859, 'samples': 331248, 'steps': 6900, 'loss/train': 2.267815589904785} +07/25/2024 11:57:52 - INFO - __main__ - Step 6902: {'lr': 0.0004978748120755965, 'samples': 331296, 'steps': 6901, 'loss/train': 3.2062313556671143} +07/25/2024 11:57:52 - INFO - __main__ - Step 6903: {'lr': 0.000497874127559555, 'samples': 331344, 'steps': 6902, 'loss/train': 2.886702299118042} +07/25/2024 11:57:53 - INFO - __main__ - Step 6904: {'lr': 0.0004978734429337618, 'samples': 331392, 'steps': 6903, 'loss/train': 1.6336792707443237} +07/25/2024 11:57:53 - INFO - __main__ - Step 6905: {'lr': 0.0004978727581982171, 'samples': 331440, 'steps': 6904, 'loss/train': 2.0745575428009033} +07/25/2024 11:57:53 - INFO - __main__ - Step 6906: {'lr': 0.0004978720733529214, 'samples': 331488, 'steps': 6905, 'loss/train': 2.79382061958313} +07/25/2024 11:57:54 - INFO - __main__ - Step 6907: {'lr': 0.0004978713883978748, 'samples': 331536, 'steps': 6906, 'loss/train': 1.2229511737823486} +07/25/2024 11:57:54 - INFO - __main__ - Step 6908: {'lr': 0.0004978707033330778, 'samples': 331584, 'steps': 6907, 'loss/train': 1.9538692235946655} +07/25/2024 11:57:54 - INFO - __main__ - Step 6909: {'lr': 0.0004978700181585305, 'samples': 331632, 'steps': 6908, 'loss/train': 1.8544325828552246} +07/25/2024 11:57:54 - INFO - __main__ - Step 6910: {'lr': 0.0004978693328742335, 'samples': 331680, 'steps': 6909, 'loss/train': 2.33708119392395} +07/25/2024 11:57:55 - INFO - __main__ - Step 6911: {'lr': 0.0004978686474801867, 'samples': 331728, 'steps': 6910, 'loss/train': 0.8088752031326294} +07/25/2024 11:57:55 - INFO - __main__ - Step 6912: {'lr': 0.0004978679619763906, 'samples': 331776, 'steps': 6911, 'loss/train': 2.061478853225708} +07/25/2024 11:57:55 - INFO - __main__ - Step 6913: {'lr': 0.0004978672763628455, 'samples': 331824, 'steps': 6912, 'loss/train': 2.332885503768921} +07/25/2024 11:57:55 - INFO - __main__ - Step 6914: {'lr': 0.0004978665906395518, 'samples': 331872, 'steps': 6913, 'loss/train': 3.8169310092926025} +07/25/2024 11:57:56 - INFO - __main__ - Step 6915: {'lr': 0.0004978659048065098, 'samples': 331920, 'steps': 6914, 'loss/train': 1.542627215385437} +07/25/2024 11:57:56 - INFO - __main__ - Step 6916: {'lr': 0.0004978652188637196, 'samples': 331968, 'steps': 6915, 'loss/train': 2.3443973064422607} +07/25/2024 11:57:56 - INFO - __main__ - Step 6917: {'lr': 0.0004978645328111817, 'samples': 332016, 'steps': 6916, 'loss/train': 2.0422239303588867} +07/25/2024 11:57:57 - INFO - __main__ - Step 6918: {'lr': 0.0004978638466488963, 'samples': 332064, 'steps': 6917, 'loss/train': 1.821502685546875} +07/25/2024 11:57:57 - INFO - __main__ - Step 6919: {'lr': 0.0004978631603768637, 'samples': 332112, 'steps': 6918, 'loss/train': 1.867828369140625} +07/25/2024 11:57:57 - INFO - __main__ - Step 6920: {'lr': 0.0004978624739950842, 'samples': 332160, 'steps': 6919, 'loss/train': 1.6357969045639038} +07/25/2024 11:57:57 - INFO - __main__ - Step 6921: {'lr': 0.0004978617875035583, 'samples': 332208, 'steps': 6920, 'loss/train': 2.082016944885254} +07/25/2024 11:57:58 - INFO - __main__ - Step 6922: {'lr': 0.0004978611009022859, 'samples': 332256, 'steps': 6921, 'loss/train': 1.593661904335022} +07/25/2024 11:57:58 - INFO - __main__ - Step 6923: {'lr': 0.0004978604141912678, 'samples': 332304, 'steps': 6922, 'loss/train': 2.233912706375122} +07/25/2024 11:57:58 - INFO - __main__ - Step 6924: {'lr': 0.0004978597273705039, 'samples': 332352, 'steps': 6923, 'loss/train': 1.1685582399368286} +07/25/2024 11:57:59 - INFO - __main__ - Step 6925: {'lr': 0.0004978590404399948, 'samples': 332400, 'steps': 6924, 'loss/train': 2.20115065574646} +07/25/2024 11:57:59 - INFO - __main__ - Step 6926: {'lr': 0.0004978583533997405, 'samples': 332448, 'steps': 6925, 'loss/train': 2.2211687564849854} +07/25/2024 11:57:59 - INFO - __main__ - Step 6927: {'lr': 0.0004978576662497415, 'samples': 332496, 'steps': 6926, 'loss/train': 3.026923418045044} +07/25/2024 11:57:59 - INFO - __main__ - Step 6928: {'lr': 0.0004978569789899982, 'samples': 332544, 'steps': 6927, 'loss/train': 2.4285242557525635} +07/25/2024 11:58:00 - INFO - __main__ - Step 6929: {'lr': 0.0004978562916205106, 'samples': 332592, 'steps': 6928, 'loss/train': 2.157313823699951} +07/25/2024 11:58:00 - INFO - __main__ - Step 6930: {'lr': 0.0004978556041412793, 'samples': 332640, 'steps': 6929, 'loss/train': 0.9426770806312561} +07/25/2024 11:58:00 - INFO - __main__ - Step 6931: {'lr': 0.0004978549165523044, 'samples': 332688, 'steps': 6930, 'loss/train': 1.814819097518921} +07/25/2024 11:58:01 - INFO - __main__ - Step 6932: {'lr': 0.0004978542288535863, 'samples': 332736, 'steps': 6931, 'loss/train': 0.9682119488716125} +07/25/2024 11:58:01 - INFO - __main__ - Step 6933: {'lr': 0.0004978535410451254, 'samples': 332784, 'steps': 6932, 'loss/train': 2.1507983207702637} +07/25/2024 11:58:01 - INFO - __main__ - Step 6934: {'lr': 0.0004978528531269218, 'samples': 332832, 'steps': 6933, 'loss/train': 2.051398992538452} +07/25/2024 11:58:01 - INFO - __main__ - Step 6935: {'lr': 0.000497852165098976, 'samples': 332880, 'steps': 6934, 'loss/train': 1.427878737449646} +07/25/2024 11:58:02 - INFO - __main__ - Step 6936: {'lr': 0.0004978514769612881, 'samples': 332928, 'steps': 6935, 'loss/train': 2.0065829753875732} +07/25/2024 11:58:02 - INFO - __main__ - Step 6937: {'lr': 0.0004978507887138584, 'samples': 332976, 'steps': 6936, 'loss/train': 2.1792352199554443} +07/25/2024 11:58:02 - INFO - __main__ - Step 6938: {'lr': 0.0004978501003566875, 'samples': 333024, 'steps': 6937, 'loss/train': 3.4972829818725586} +07/25/2024 11:58:03 - INFO - __main__ - Step 6939: {'lr': 0.0004978494118897754, 'samples': 333072, 'steps': 6938, 'loss/train': 1.7347131967544556} +07/25/2024 11:58:03 - INFO - __main__ - Step 6940: {'lr': 0.0004978487233131227, 'samples': 333120, 'steps': 6939, 'loss/train': 2.1677145957946777} +07/25/2024 11:58:03 - INFO - __main__ - Step 6941: {'lr': 0.0004978480346267294, 'samples': 333168, 'steps': 6940, 'loss/train': 2.1781961917877197} +07/25/2024 11:58:03 - INFO - __main__ - Step 6942: {'lr': 0.000497847345830596, 'samples': 333216, 'steps': 6941, 'loss/train': 1.9622207880020142} +07/25/2024 11:58:04 - INFO - __main__ - Step 6943: {'lr': 0.0004978466569247227, 'samples': 333264, 'steps': 6942, 'loss/train': 2.307741165161133} +07/25/2024 11:58:04 - INFO - __main__ - Step 6944: {'lr': 0.0004978459679091098, 'samples': 333312, 'steps': 6943, 'loss/train': 2.3020503520965576} +07/25/2024 11:58:04 - INFO - __main__ - Step 6945: {'lr': 0.0004978452787837577, 'samples': 333360, 'steps': 6944, 'loss/train': 2.004831314086914} +07/25/2024 11:58:05 - INFO - __main__ - Step 6946: {'lr': 0.0004978445895486666, 'samples': 333408, 'steps': 6945, 'loss/train': 0.8869512677192688} +07/25/2024 11:58:05 - INFO - __main__ - Step 6947: {'lr': 0.0004978439002038369, 'samples': 333456, 'steps': 6946, 'loss/train': 2.3223907947540283} +07/25/2024 11:58:05 - INFO - __main__ - Step 6948: {'lr': 0.0004978432107492689, 'samples': 333504, 'steps': 6947, 'loss/train': 1.1837292909622192} +07/25/2024 11:58:05 - INFO - __main__ - Step 6949: {'lr': 0.0004978425211849629, 'samples': 333552, 'steps': 6948, 'loss/train': 1.8852052688598633} +07/25/2024 11:58:06 - INFO - __main__ - Step 6950: {'lr': 0.0004978418315109191, 'samples': 333600, 'steps': 6949, 'loss/train': 1.204803228378296} +07/25/2024 11:58:06 - INFO - __main__ - Step 6951: {'lr': 0.0004978411417271379, 'samples': 333648, 'steps': 6950, 'loss/train': 2.825190782546997} +07/25/2024 11:58:06 - INFO - __main__ - Step 6952: {'lr': 0.0004978404518336196, 'samples': 333696, 'steps': 6951, 'loss/train': 2.019463062286377} +07/25/2024 11:58:07 - INFO - __main__ - Step 6953: {'lr': 0.0004978397618303646, 'samples': 333744, 'steps': 6952, 'loss/train': 1.2139633893966675} +07/25/2024 11:58:07 - INFO - __main__ - Step 6954: {'lr': 0.0004978390717173728, 'samples': 333792, 'steps': 6953, 'loss/train': 0.8532254099845886} +07/25/2024 11:58:07 - INFO - __main__ - Step 6955: {'lr': 0.000497838381494645, 'samples': 333840, 'steps': 6954, 'loss/train': 2.29148268699646} +07/25/2024 11:58:07 - INFO - __main__ - Step 6956: {'lr': 0.0004978376911621814, 'samples': 333888, 'steps': 6955, 'loss/train': 2.045212984085083} +07/25/2024 11:58:08 - INFO - __main__ - Step 6957: {'lr': 0.000497837000719982, 'samples': 333936, 'steps': 6956, 'loss/train': 1.7160072326660156} +07/25/2024 11:58:08 - INFO - __main__ - Step 6958: {'lr': 0.0004978363101680475, 'samples': 333984, 'steps': 6957, 'loss/train': 2.665306568145752} +07/25/2024 11:58:08 - INFO - __main__ - Step 6959: {'lr': 0.0004978356195063779, 'samples': 334032, 'steps': 6958, 'loss/train': 2.227764129638672} +07/25/2024 11:58:09 - INFO - __main__ - Step 6960: {'lr': 0.0004978349287349738, 'samples': 334080, 'steps': 6959, 'loss/train': 1.9285567998886108} +07/25/2024 11:58:09 - INFO - __main__ - Step 6961: {'lr': 0.0004978342378538351, 'samples': 334128, 'steps': 6960, 'loss/train': 2.0602757930755615} +07/25/2024 11:58:09 - INFO - __main__ - Step 6962: {'lr': 0.0004978335468629625, 'samples': 334176, 'steps': 6961, 'loss/train': 3.4338479042053223} +07/25/2024 11:58:09 - INFO - __main__ - Step 6963: {'lr': 0.0004978328557623562, 'samples': 334224, 'steps': 6962, 'loss/train': 0.8783045411109924} +07/25/2024 11:58:10 - INFO - __main__ - Step 6964: {'lr': 0.0004978321645520162, 'samples': 334272, 'steps': 6963, 'loss/train': 1.6949279308319092} +07/25/2024 11:58:10 - INFO - __main__ - Step 6965: {'lr': 0.0004978314732319433, 'samples': 334320, 'steps': 6964, 'loss/train': 2.216416835784912} +07/25/2024 11:58:10 - INFO - __main__ - Step 6966: {'lr': 0.0004978307818021376, 'samples': 334368, 'steps': 6965, 'loss/train': 2.2653675079345703} +07/25/2024 11:58:11 - INFO - __main__ - Step 6967: {'lr': 0.0004978300902625991, 'samples': 334416, 'steps': 6966, 'loss/train': 2.1156840324401855} +07/25/2024 11:58:11 - INFO - __main__ - Step 6968: {'lr': 0.0004978293986133286, 'samples': 334464, 'steps': 6967, 'loss/train': 2.235786199569702} +07/25/2024 11:58:11 - INFO - __main__ - Step 6969: {'lr': 0.0004978287068543262, 'samples': 334512, 'steps': 6968, 'loss/train': 1.9661763906478882} +07/25/2024 11:58:11 - INFO - __main__ - Step 6970: {'lr': 0.0004978280149855922, 'samples': 334560, 'steps': 6969, 'loss/train': 1.2851202487945557} +07/25/2024 11:58:12 - INFO - __main__ - Step 6971: {'lr': 0.0004978273230071267, 'samples': 334608, 'steps': 6970, 'loss/train': 2.2600584030151367} +07/25/2024 11:58:12 - INFO - __main__ - Step 6972: {'lr': 0.0004978266309189304, 'samples': 334656, 'steps': 6971, 'loss/train': 1.1215986013412476} +07/25/2024 11:58:12 - INFO - __main__ - Step 6973: {'lr': 0.0004978259387210033, 'samples': 334704, 'steps': 6972, 'loss/train': 1.820479393005371} +07/25/2024 11:58:13 - INFO - __main__ - Step 6974: {'lr': 0.0004978252464133459, 'samples': 334752, 'steps': 6973, 'loss/train': 1.3397912979125977} +07/25/2024 11:58:13 - INFO - __main__ - Step 6975: {'lr': 0.0004978245539959585, 'samples': 334800, 'steps': 6974, 'loss/train': 2.7610435485839844} +07/25/2024 11:58:13 - INFO - __main__ - Step 6976: {'lr': 0.0004978238614688411, 'samples': 334848, 'steps': 6975, 'loss/train': 2.1242475509643555} +07/25/2024 11:58:13 - INFO - __main__ - Step 6977: {'lr': 0.0004978231688319944, 'samples': 334896, 'steps': 6976, 'loss/train': 2.0117084980010986} +07/25/2024 11:58:14 - INFO - __main__ - Step 6978: {'lr': 0.0004978224760854186, 'samples': 334944, 'steps': 6977, 'loss/train': 1.6060876846313477} +07/25/2024 11:58:14 - INFO - __main__ - Step 6979: {'lr': 0.0004978217832291138, 'samples': 334992, 'steps': 6978, 'loss/train': 2.164158821105957} +07/25/2024 11:58:14 - INFO - __main__ - Step 6980: {'lr': 0.0004978210902630805, 'samples': 335040, 'steps': 6979, 'loss/train': 2.270871162414551} +07/25/2024 11:58:15 - INFO - __main__ - Step 6981: {'lr': 0.000497820397187319, 'samples': 335088, 'steps': 6980, 'loss/train': 2.2097291946411133} +07/25/2024 11:58:15 - INFO - __main__ - Step 6982: {'lr': 0.0004978197040018295, 'samples': 335136, 'steps': 6981, 'loss/train': 2.436587333679199} +07/25/2024 11:58:15 - INFO - __main__ - Step 6983: {'lr': 0.0004978190107066125, 'samples': 335184, 'steps': 6982, 'loss/train': 2.562523126602173} +07/25/2024 11:58:15 - INFO - __main__ - Step 6984: {'lr': 0.0004978183173016681, 'samples': 335232, 'steps': 6983, 'loss/train': 1.9508286714553833} +07/25/2024 11:58:16 - INFO - __main__ - Step 6985: {'lr': 0.0004978176237869967, 'samples': 335280, 'steps': 6984, 'loss/train': 2.2920398712158203} +07/25/2024 11:58:16 - INFO - __main__ - Step 6986: {'lr': 0.0004978169301625986, 'samples': 335328, 'steps': 6985, 'loss/train': 3.1531150341033936} +07/25/2024 11:58:16 - INFO - __main__ - Step 6987: {'lr': 0.0004978162364284742, 'samples': 335376, 'steps': 6986, 'loss/train': 0.866560161113739} +07/25/2024 11:58:16 - INFO - __main__ - Step 6988: {'lr': 0.0004978155425846236, 'samples': 335424, 'steps': 6987, 'loss/train': 1.4102543592453003} +07/25/2024 11:58:17 - INFO - __main__ - Step 6989: {'lr': 0.0004978148486310472, 'samples': 335472, 'steps': 6988, 'loss/train': 1.865784764289856} +07/25/2024 11:58:17 - INFO - __main__ - Step 6990: {'lr': 0.0004978141545677454, 'samples': 335520, 'steps': 6989, 'loss/train': 1.5782181024551392} +07/25/2024 11:58:17 - INFO - __main__ - Step 6991: {'lr': 0.0004978134603947184, 'samples': 335568, 'steps': 6990, 'loss/train': 2.136044502258301} +07/25/2024 11:58:18 - INFO - __main__ - Step 6992: {'lr': 0.0004978127661119667, 'samples': 335616, 'steps': 6991, 'loss/train': 2.1173694133758545} +07/25/2024 11:58:18 - INFO - __main__ - Step 6993: {'lr': 0.0004978120717194902, 'samples': 335664, 'steps': 6992, 'loss/train': 2.1607820987701416} +07/25/2024 11:58:18 - INFO - __main__ - Step 6994: {'lr': 0.0004978113772172897, 'samples': 335712, 'steps': 6993, 'loss/train': 2.1712961196899414} +07/25/2024 11:58:18 - INFO - __main__ - Step 6995: {'lr': 0.000497810682605365, 'samples': 335760, 'steps': 6994, 'loss/train': 2.4081778526306152} +07/25/2024 11:58:19 - INFO - __main__ - Step 6996: {'lr': 0.0004978099878837169, 'samples': 335808, 'steps': 6995, 'loss/train': 1.1798982620239258} +07/25/2024 11:58:19 - INFO - __main__ - Step 6997: {'lr': 0.0004978092930523453, 'samples': 335856, 'steps': 6996, 'loss/train': 0.7421680092811584} +07/25/2024 11:58:19 - INFO - __main__ - Step 6998: {'lr': 0.0004978085981112508, 'samples': 335904, 'steps': 6997, 'loss/train': 1.8188506364822388} +07/25/2024 11:58:20 - INFO - __main__ - Step 6999: {'lr': 0.0004978079030604336, 'samples': 335952, 'steps': 6998, 'loss/train': 2.6599652767181396} +07/25/2024 11:58:20 - INFO - __main__ - Step 7000: {'lr': 0.000497807207899894, 'samples': 336000, 'steps': 6999, 'loss/train': 2.2672038078308105} +07/25/2024 11:58:20 - INFO - __main__ - Step 7001: {'lr': 0.0004978065126296323, 'samples': 336048, 'steps': 7000, 'loss/train': 2.324842691421509} +07/25/2024 11:58:20 - INFO - __main__ - Step 7002: {'lr': 0.0004978058172496488, 'samples': 336096, 'steps': 7001, 'loss/train': 1.726943016052246} +07/25/2024 11:58:21 - INFO - __main__ - Step 7003: {'lr': 0.0004978051217599439, 'samples': 336144, 'steps': 7002, 'loss/train': 1.7022855281829834} +07/25/2024 11:58:21 - INFO - __main__ - Step 7004: {'lr': 0.0004978044261605177, 'samples': 336192, 'steps': 7003, 'loss/train': 1.9711424112319946} +07/25/2024 11:58:21 - INFO - __main__ - Step 7005: {'lr': 0.0004978037304513707, 'samples': 336240, 'steps': 7004, 'loss/train': 2.5655910968780518} +07/25/2024 11:58:22 - INFO - __main__ - Step 7006: {'lr': 0.0004978030346325032, 'samples': 336288, 'steps': 7005, 'loss/train': 1.8571022748947144} +07/25/2024 11:58:22 - INFO - __main__ - Step 7007: {'lr': 0.0004978023387039155, 'samples': 336336, 'steps': 7006, 'loss/train': 2.7492728233337402} +07/25/2024 11:58:22 - INFO - __main__ - Step 7008: {'lr': 0.0004978016426656078, 'samples': 336384, 'steps': 7007, 'loss/train': 2.1292481422424316} +07/25/2024 11:58:22 - INFO - __main__ - Step 7009: {'lr': 0.0004978009465175804, 'samples': 336432, 'steps': 7008, 'loss/train': 2.071148157119751} +07/25/2024 11:58:23 - INFO - __main__ - Step 7010: {'lr': 0.0004978002502598338, 'samples': 336480, 'steps': 7009, 'loss/train': 2.9535396099090576} +07/25/2024 11:58:23 - INFO - __main__ - Step 7011: {'lr': 0.0004977995538923681, 'samples': 336528, 'steps': 7010, 'loss/train': 1.9420452117919922} +07/25/2024 11:58:23 - INFO - __main__ - Step 7012: {'lr': 0.0004977988574151838, 'samples': 336576, 'steps': 7011, 'loss/train': 1.4356329441070557} +07/25/2024 11:58:24 - INFO - __main__ - Step 7013: {'lr': 0.0004977981608282811, 'samples': 336624, 'steps': 7012, 'loss/train': 1.879124402999878} +07/25/2024 11:58:24 - INFO - __main__ - Step 7014: {'lr': 0.0004977974641316603, 'samples': 336672, 'steps': 7013, 'loss/train': 1.8372321128845215} +07/25/2024 11:58:24 - INFO - __main__ - Step 7015: {'lr': 0.0004977967673253217, 'samples': 336720, 'steps': 7014, 'loss/train': 2.271183729171753} +07/25/2024 11:58:24 - INFO - __main__ - Step 7016: {'lr': 0.0004977960704092656, 'samples': 336768, 'steps': 7015, 'loss/train': 1.943554162979126} +07/25/2024 11:58:25 - INFO - __main__ - Step 7017: {'lr': 0.0004977953733834923, 'samples': 336816, 'steps': 7016, 'loss/train': 1.7321572303771973} +07/25/2024 11:58:25 - INFO - __main__ - Step 7018: {'lr': 0.0004977946762480023, 'samples': 336864, 'steps': 7017, 'loss/train': 1.7610958814620972} +07/25/2024 11:58:25 - INFO - __main__ - Step 7019: {'lr': 0.0004977939790027957, 'samples': 336912, 'steps': 7018, 'loss/train': 2.6307549476623535} +07/25/2024 11:58:26 - INFO - __main__ - Step 7020: {'lr': 0.0004977932816478728, 'samples': 336960, 'steps': 7019, 'loss/train': 1.1431655883789062} +07/25/2024 11:58:26 - INFO - __main__ - Step 7021: {'lr': 0.0004977925841832341, 'samples': 337008, 'steps': 7020, 'loss/train': 0.8690206408500671} +07/25/2024 11:58:26 - INFO - __main__ - Step 7022: {'lr': 0.0004977918866088798, 'samples': 337056, 'steps': 7021, 'loss/train': 1.6625192165374756} +07/25/2024 11:58:26 - INFO - __main__ - Step 7023: {'lr': 0.00049779118892481, 'samples': 337104, 'steps': 7022, 'loss/train': 2.652949333190918} +07/25/2024 11:58:27 - INFO - __main__ - Step 7024: {'lr': 0.0004977904911310254, 'samples': 337152, 'steps': 7023, 'loss/train': 2.469343662261963} +07/25/2024 11:58:27 - INFO - __main__ - Step 7025: {'lr': 0.0004977897932275261, 'samples': 337200, 'steps': 7024, 'loss/train': 2.0520999431610107} +07/25/2024 11:58:27 - INFO - __main__ - Step 7026: {'lr': 0.0004977890952143122, 'samples': 337248, 'steps': 7025, 'loss/train': 2.1314682960510254} +07/25/2024 11:58:28 - INFO - __main__ - Step 7027: {'lr': 0.0004977883970913845, 'samples': 337296, 'steps': 7026, 'loss/train': 2.3746728897094727} +07/25/2024 11:58:28 - INFO - __main__ - Step 7028: {'lr': 0.0004977876988587429, 'samples': 337344, 'steps': 7027, 'loss/train': 1.914371371269226} +07/25/2024 11:58:28 - INFO - __main__ - Step 7029: {'lr': 0.0004977870005163879, 'samples': 337392, 'steps': 7028, 'loss/train': 2.0476796627044678} +07/25/2024 11:58:28 - INFO - __main__ - Step 7030: {'lr': 0.0004977863020643197, 'samples': 337440, 'steps': 7029, 'loss/train': 2.303997755050659} +07/25/2024 11:58:29 - INFO - __main__ - Step 7031: {'lr': 0.0004977856035025388, 'samples': 337488, 'steps': 7030, 'loss/train': 2.86661434173584} +07/25/2024 11:58:29 - INFO - __main__ - Step 7032: {'lr': 0.0004977849048310453, 'samples': 337536, 'steps': 7031, 'loss/train': 1.7723145484924316} +07/25/2024 11:58:29 - INFO - __main__ - Step 7033: {'lr': 0.0004977842060498395, 'samples': 337584, 'steps': 7032, 'loss/train': 2.0793421268463135} +07/25/2024 11:58:30 - INFO - __main__ - Step 7034: {'lr': 0.000497783507158922, 'samples': 337632, 'steps': 7033, 'loss/train': 2.3820040225982666} +07/25/2024 11:58:30 - INFO - __main__ - Step 7035: {'lr': 0.0004977828081582927, 'samples': 337680, 'steps': 7034, 'loss/train': 1.4188873767852783} +07/25/2024 11:58:30 - INFO - __main__ - Step 7036: {'lr': 0.0004977821090479522, 'samples': 337728, 'steps': 7035, 'loss/train': 2.226135492324829} +07/25/2024 11:58:30 - INFO - __main__ - Step 7037: {'lr': 0.0004977814098279008, 'samples': 337776, 'steps': 7036, 'loss/train': 1.936822533607483} +07/25/2024 11:58:31 - INFO - __main__ - Step 7038: {'lr': 0.0004977807104981386, 'samples': 337824, 'steps': 7037, 'loss/train': 2.252814769744873} +07/25/2024 11:58:31 - INFO - __main__ - Step 7039: {'lr': 0.0004977800110586661, 'samples': 337872, 'steps': 7038, 'loss/train': 1.849129557609558} +07/25/2024 11:58:31 - INFO - __main__ - Step 7040: {'lr': 0.0004977793115094835, 'samples': 337920, 'steps': 7039, 'loss/train': 2.3439693450927734} +07/25/2024 11:58:32 - INFO - __main__ - Step 7041: {'lr': 0.0004977786118505913, 'samples': 337968, 'steps': 7040, 'loss/train': 1.5066039562225342} +07/25/2024 11:58:32 - INFO - __main__ - Step 7042: {'lr': 0.0004977779120819895, 'samples': 338016, 'steps': 7041, 'loss/train': 2.27665638923645} +07/25/2024 11:58:32 - INFO - __main__ - Step 7043: {'lr': 0.0004977772122036787, 'samples': 338064, 'steps': 7042, 'loss/train': 2.4677162170410156} +07/25/2024 11:58:32 - INFO - __main__ - Step 7044: {'lr': 0.0004977765122156591, 'samples': 338112, 'steps': 7043, 'loss/train': 1.382359266281128} +07/25/2024 11:58:33 - INFO - __main__ - Step 7045: {'lr': 0.000497775812117931, 'samples': 338160, 'steps': 7044, 'loss/train': 0.6785849928855896} +07/25/2024 11:58:33 - INFO - __main__ - Step 7046: {'lr': 0.0004977751119104946, 'samples': 338208, 'steps': 7045, 'loss/train': 1.5667006969451904} +07/25/2024 11:58:33 - INFO - __main__ - Step 7047: {'lr': 0.0004977744115933504, 'samples': 338256, 'steps': 7046, 'loss/train': 2.3839776515960693} +07/25/2024 11:58:34 - INFO - __main__ - Step 7048: {'lr': 0.0004977737111664987, 'samples': 338304, 'steps': 7047, 'loss/train': 3.1344330310821533} +07/25/2024 11:58:34 - INFO - __main__ - Step 7049: {'lr': 0.0004977730106299397, 'samples': 338352, 'steps': 7048, 'loss/train': 1.785314917564392} +07/25/2024 11:58:34 - INFO - __main__ - Step 7050: {'lr': 0.0004977723099836738, 'samples': 338400, 'steps': 7049, 'loss/train': 1.972821593284607} +07/25/2024 11:58:34 - INFO - __main__ - Step 7051: {'lr': 0.0004977716092277011, 'samples': 338448, 'steps': 7050, 'loss/train': 1.3725675344467163} +07/25/2024 11:58:35 - INFO - __main__ - Step 7052: {'lr': 0.0004977709083620222, 'samples': 338496, 'steps': 7051, 'loss/train': 1.9367239475250244} +07/25/2024 11:58:35 - INFO - __main__ - Step 7053: {'lr': 0.0004977702073866373, 'samples': 338544, 'steps': 7052, 'loss/train': 2.235955238342285} +07/25/2024 11:58:35 - INFO - __main__ - Step 7054: {'lr': 0.0004977695063015466, 'samples': 338592, 'steps': 7053, 'loss/train': 1.7746084928512573} +07/25/2024 11:58:36 - INFO - __main__ - Step 7055: {'lr': 0.0004977688051067506, 'samples': 338640, 'steps': 7054, 'loss/train': 2.595621109008789} +07/25/2024 11:58:36 - INFO - __main__ - Step 7056: {'lr': 0.0004977681038022495, 'samples': 338688, 'steps': 7055, 'loss/train': 2.1029529571533203} +07/25/2024 11:58:36 - INFO - __main__ - Step 7057: {'lr': 0.0004977674023880435, 'samples': 338736, 'steps': 7056, 'loss/train': 1.9431239366531372} +07/25/2024 11:58:36 - INFO - __main__ - Step 7058: {'lr': 0.0004977667008641332, 'samples': 338784, 'steps': 7057, 'loss/train': 2.322545289993286} +07/25/2024 11:58:37 - INFO - __main__ - Step 7059: {'lr': 0.0004977659992305187, 'samples': 338832, 'steps': 7058, 'loss/train': 2.5936028957366943} +07/25/2024 11:58:37 - INFO - __main__ - Step 7060: {'lr': 0.0004977652974872003, 'samples': 338880, 'steps': 7059, 'loss/train': 2.1578750610351562} +07/25/2024 11:58:37 - INFO - __main__ - Step 7061: {'lr': 0.0004977645956341784, 'samples': 338928, 'steps': 7060, 'loss/train': 1.6768628358840942} +07/25/2024 11:58:37 - INFO - __main__ - Step 7062: {'lr': 0.0004977638936714533, 'samples': 338976, 'steps': 7061, 'loss/train': 2.3337924480438232} +07/25/2024 11:58:38 - INFO - __main__ - Step 7063: {'lr': 0.0004977631915990253, 'samples': 339024, 'steps': 7062, 'loss/train': 1.8965386152267456} +07/25/2024 11:58:38 - INFO - __main__ - Step 7064: {'lr': 0.0004977624894168947, 'samples': 339072, 'steps': 7063, 'loss/train': 2.2482593059539795} +07/25/2024 11:58:38 - INFO - __main__ - Step 7065: {'lr': 0.0004977617871250617, 'samples': 339120, 'steps': 7064, 'loss/train': 1.355419397354126} +07/25/2024 11:58:39 - INFO - __main__ - Step 7066: {'lr': 0.0004977610847235269, 'samples': 339168, 'steps': 7065, 'loss/train': 1.5933235883712769} +07/25/2024 11:58:39 - INFO - __main__ - Step 7067: {'lr': 0.0004977603822122903, 'samples': 339216, 'steps': 7066, 'loss/train': 2.2308175563812256} +07/25/2024 11:58:39 - INFO - __main__ - Step 7068: {'lr': 0.0004977596795913523, 'samples': 339264, 'steps': 7067, 'loss/train': 2.247422933578491} +07/25/2024 11:58:39 - INFO - __main__ - Step 7069: {'lr': 0.0004977589768607135, 'samples': 339312, 'steps': 7068, 'loss/train': 0.721264660358429} +07/25/2024 11:58:40 - INFO - __main__ - Step 7070: {'lr': 0.0004977582740203737, 'samples': 339360, 'steps': 7069, 'loss/train': 2.0679588317871094} +07/25/2024 11:58:40 - INFO - __main__ - Step 7071: {'lr': 0.0004977575710703337, 'samples': 339408, 'steps': 7070, 'loss/train': 2.3596689701080322} +07/25/2024 11:58:40 - INFO - __main__ - Step 7072: {'lr': 0.0004977568680105934, 'samples': 339456, 'steps': 7071, 'loss/train': 3.2498393058776855} +07/25/2024 11:58:41 - INFO - __main__ - Step 7073: {'lr': 0.0004977561648411534, 'samples': 339504, 'steps': 7072, 'loss/train': 1.2623745203018188} +07/25/2024 11:58:41 - INFO - __main__ - Step 7074: {'lr': 0.0004977554615620139, 'samples': 339552, 'steps': 7073, 'loss/train': 2.0883359909057617} +07/25/2024 11:58:41 - INFO - __main__ - Step 7075: {'lr': 0.0004977547581731753, 'samples': 339600, 'steps': 7074, 'loss/train': 2.3644790649414062} +07/25/2024 11:58:41 - INFO - __main__ - Step 7076: {'lr': 0.0004977540546746377, 'samples': 339648, 'steps': 7075, 'loss/train': 1.6939589977264404} +07/25/2024 11:58:42 - INFO - __main__ - Step 7077: {'lr': 0.0004977533510664016, 'samples': 339696, 'steps': 7076, 'loss/train': 1.4679127931594849} +07/25/2024 11:58:42 - INFO - __main__ - Step 7078: {'lr': 0.0004977526473484672, 'samples': 339744, 'steps': 7077, 'loss/train': 2.4689414501190186} +07/25/2024 11:58:42 - INFO - __main__ - Step 7079: {'lr': 0.000497751943520835, 'samples': 339792, 'steps': 7078, 'loss/train': 1.9188847541809082} +07/25/2024 11:58:43 - INFO - __main__ - Step 7080: {'lr': 0.0004977512395835051, 'samples': 339840, 'steps': 7079, 'loss/train': 2.1861209869384766} +07/25/2024 11:58:43 - INFO - __main__ - Step 7081: {'lr': 0.0004977505355364779, 'samples': 339888, 'steps': 7080, 'loss/train': 2.167046070098877} +07/25/2024 11:58:43 - INFO - __main__ - Step 7082: {'lr': 0.0004977498313797537, 'samples': 339936, 'steps': 7081, 'loss/train': 1.8094040155410767} +07/25/2024 11:58:43 - INFO - __main__ - Step 7083: {'lr': 0.0004977491271133329, 'samples': 339984, 'steps': 7082, 'loss/train': 1.8223878145217896} +07/25/2024 11:58:44 - INFO - __main__ - Step 7084: {'lr': 0.0004977484227372157, 'samples': 340032, 'steps': 7083, 'loss/train': 2.0593302249908447} +07/25/2024 11:58:44 - INFO - __main__ - Step 7085: {'lr': 0.0004977477182514024, 'samples': 340080, 'steps': 7084, 'loss/train': 1.8645820617675781} +07/25/2024 11:58:44 - INFO - __main__ - Step 7086: {'lr': 0.0004977470136558932, 'samples': 340128, 'steps': 7085, 'loss/train': 1.9894487857818604} +07/25/2024 11:58:45 - INFO - __main__ - Step 7087: {'lr': 0.0004977463089506888, 'samples': 340176, 'steps': 7086, 'loss/train': 0.7061074376106262} +07/25/2024 11:58:45 - INFO - __main__ - Step 7088: {'lr': 0.0004977456041357892, 'samples': 340224, 'steps': 7087, 'loss/train': 2.452089309692383} +07/25/2024 11:58:45 - INFO - __main__ - Step 7089: {'lr': 0.0004977448992111947, 'samples': 340272, 'steps': 7088, 'loss/train': 2.577801465988159} +07/25/2024 11:58:45 - INFO - __main__ - Step 7090: {'lr': 0.0004977441941769058, 'samples': 340320, 'steps': 7089, 'loss/train': 1.9748752117156982} +07/25/2024 11:58:46 - INFO - __main__ - Step 7091: {'lr': 0.0004977434890329227, 'samples': 340368, 'steps': 7090, 'loss/train': 2.099581480026245} +07/25/2024 11:58:46 - INFO - __main__ - Step 7092: {'lr': 0.0004977427837792457, 'samples': 340416, 'steps': 7091, 'loss/train': 2.072906732559204} +07/25/2024 11:58:46 - INFO - __main__ - Step 7093: {'lr': 0.0004977420784158752, 'samples': 340464, 'steps': 7092, 'loss/train': 0.5836149454116821} +07/25/2024 11:58:47 - INFO - __main__ - Step 7094: {'lr': 0.0004977413729428114, 'samples': 340512, 'steps': 7093, 'loss/train': 2.5159482955932617} +07/25/2024 11:58:47 - INFO - __main__ - Step 7095: {'lr': 0.0004977406673600547, 'samples': 340560, 'steps': 7094, 'loss/train': 2.361894130706787} +07/25/2024 11:58:47 - INFO - __main__ - Step 7096: {'lr': 0.0004977399616676052, 'samples': 340608, 'steps': 7095, 'loss/train': 3.369385242462158} +07/25/2024 11:58:47 - INFO - __main__ - Step 7097: {'lr': 0.0004977392558654636, 'samples': 340656, 'steps': 7096, 'loss/train': 1.8656110763549805} +07/25/2024 11:58:48 - INFO - __main__ - Step 7098: {'lr': 0.0004977385499536299, 'samples': 340704, 'steps': 7097, 'loss/train': 0.43691185116767883} +07/25/2024 11:58:48 - INFO - __main__ - Step 7099: {'lr': 0.0004977378439321045, 'samples': 340752, 'steps': 7098, 'loss/train': 2.0828890800476074} +07/25/2024 11:58:48 - INFO - __main__ - Step 7100: {'lr': 0.0004977371378008878, 'samples': 340800, 'steps': 7099, 'loss/train': 2.1556098461151123} +07/25/2024 11:58:49 - INFO - __main__ - Step 7101: {'lr': 0.00049773643155998, 'samples': 340848, 'steps': 7100, 'loss/train': 1.6245107650756836} +07/25/2024 11:58:49 - INFO - __main__ - Step 7102: {'lr': 0.0004977357252093814, 'samples': 340896, 'steps': 7101, 'loss/train': 2.0508761405944824} +07/25/2024 11:58:49 - INFO - __main__ - Step 7103: {'lr': 0.0004977350187490925, 'samples': 340944, 'steps': 7102, 'loss/train': 2.5136256217956543} +07/25/2024 11:58:49 - INFO - __main__ - Step 7104: {'lr': 0.0004977343121791134, 'samples': 340992, 'steps': 7103, 'loss/train': 2.300117015838623} +07/25/2024 11:58:50 - INFO - __main__ - Step 7105: {'lr': 0.0004977336054994444, 'samples': 341040, 'steps': 7104, 'loss/train': 2.157843828201294} +07/25/2024 11:58:50 - INFO - __main__ - Step 7106: {'lr': 0.000497732898710086, 'samples': 341088, 'steps': 7105, 'loss/train': 2.02323842048645} +07/25/2024 11:58:50 - INFO - __main__ - Step 7107: {'lr': 0.0004977321918110384, 'samples': 341136, 'steps': 7106, 'loss/train': 2.0595390796661377} +07/25/2024 11:58:51 - INFO - __main__ - Step 7108: {'lr': 0.0004977314848023019, 'samples': 341184, 'steps': 7107, 'loss/train': 1.971150517463684} +07/25/2024 11:58:51 - INFO - __main__ - Step 7109: {'lr': 0.0004977307776838768, 'samples': 341232, 'steps': 7108, 'loss/train': 2.289017677307129} +07/25/2024 11:58:51 - INFO - __main__ - Step 7110: {'lr': 0.0004977300704557636, 'samples': 341280, 'steps': 7109, 'loss/train': 2.117478847503662} +07/25/2024 11:58:51 - INFO - __main__ - Step 7111: {'lr': 0.0004977293631179623, 'samples': 341328, 'steps': 7110, 'loss/train': 2.459813117980957} +07/25/2024 11:58:52 - INFO - __main__ - Step 7112: {'lr': 0.0004977286556704734, 'samples': 341376, 'steps': 7111, 'loss/train': 2.4469192028045654} +07/25/2024 11:58:52 - INFO - __main__ - Step 7113: {'lr': 0.0004977279481132973, 'samples': 341424, 'steps': 7112, 'loss/train': 2.2321784496307373} +07/25/2024 11:58:52 - INFO - __main__ - Step 7114: {'lr': 0.0004977272404464341, 'samples': 341472, 'steps': 7113, 'loss/train': 2.170132637023926} +07/25/2024 11:58:53 - INFO - __main__ - Step 7115: {'lr': 0.0004977265326698843, 'samples': 341520, 'steps': 7114, 'loss/train': 2.296046257019043} +07/25/2024 11:58:53 - INFO - __main__ - Step 7116: {'lr': 0.000497725824783648, 'samples': 341568, 'steps': 7115, 'loss/train': 2.045941114425659} +07/25/2024 11:58:53 - INFO - __main__ - Step 7117: {'lr': 0.0004977251167877258, 'samples': 341616, 'steps': 7116, 'loss/train': 0.5843177437782288} +07/25/2024 11:58:53 - INFO - __main__ - Step 7118: {'lr': 0.0004977244086821178, 'samples': 341664, 'steps': 7117, 'loss/train': 2.1488876342773438} +07/25/2024 11:58:54 - INFO - __main__ - Step 7119: {'lr': 0.0004977237004668245, 'samples': 341712, 'steps': 7118, 'loss/train': 2.5365121364593506} +07/25/2024 11:58:54 - INFO - __main__ - Step 7120: {'lr': 0.0004977229921418459, 'samples': 341760, 'steps': 7119, 'loss/train': 3.424165725708008} +07/25/2024 11:58:54 - INFO - __main__ - Step 7121: {'lr': 0.0004977222837071826, 'samples': 341808, 'steps': 7120, 'loss/train': 2.973377227783203} +07/25/2024 11:58:55 - INFO - __main__ - Step 7122: {'lr': 0.0004977215751628347, 'samples': 341856, 'steps': 7121, 'loss/train': 0.3787662386894226} +07/25/2024 11:58:55 - INFO - __main__ - Step 7123: {'lr': 0.0004977208665088028, 'samples': 341904, 'steps': 7122, 'loss/train': 1.3422532081604004} +07/25/2024 11:58:55 - INFO - __main__ - Step 7124: {'lr': 0.0004977201577450868, 'samples': 341952, 'steps': 7123, 'loss/train': 2.516744375228882} +07/25/2024 11:58:55 - INFO - __main__ - Step 7125: {'lr': 0.0004977194488716875, 'samples': 342000, 'steps': 7124, 'loss/train': 2.3349926471710205} +07/25/2024 11:58:56 - INFO - __main__ - Step 7126: {'lr': 0.0004977187398886049, 'samples': 342048, 'steps': 7125, 'loss/train': 1.0173407793045044} +07/25/2024 11:58:56 - INFO - __main__ - Step 7127: {'lr': 0.0004977180307958394, 'samples': 342096, 'steps': 7126, 'loss/train': 2.5922539234161377} +07/25/2024 11:58:56 - INFO - __main__ - Step 7128: {'lr': 0.0004977173215933911, 'samples': 342144, 'steps': 7127, 'loss/train': 2.256559371948242} +07/25/2024 11:58:56 - INFO - __main__ - Step 7129: {'lr': 0.0004977166122812608, 'samples': 342192, 'steps': 7128, 'loss/train': 3.6708452701568604} +07/25/2024 11:58:57 - INFO - __main__ - Step 7130: {'lr': 0.0004977159028594484, 'samples': 342240, 'steps': 7129, 'loss/train': 2.546417713165283} +07/25/2024 11:58:57 - INFO - __main__ - Step 7131: {'lr': 0.0004977151933279543, 'samples': 342288, 'steps': 7130, 'loss/train': 1.9704649448394775} +07/25/2024 11:58:57 - INFO - __main__ - Step 7132: {'lr': 0.0004977144836867788, 'samples': 342336, 'steps': 7131, 'loss/train': 1.9215972423553467} +07/25/2024 11:58:58 - INFO - __main__ - Step 7133: {'lr': 0.0004977137739359223, 'samples': 342384, 'steps': 7132, 'loss/train': 2.148597478866577} +07/25/2024 11:58:58 - INFO - __main__ - Step 7134: {'lr': 0.0004977130640753852, 'samples': 342432, 'steps': 7133, 'loss/train': 2.309457540512085} +07/25/2024 11:58:58 - INFO - __main__ - Step 7135: {'lr': 0.0004977123541051676, 'samples': 342480, 'steps': 7134, 'loss/train': 1.707999348640442} +07/25/2024 11:58:58 - INFO - __main__ - Step 7136: {'lr': 0.0004977116440252699, 'samples': 342528, 'steps': 7135, 'loss/train': 2.3222734928131104} +07/25/2024 11:58:59 - INFO - __main__ - Step 7137: {'lr': 0.0004977109338356925, 'samples': 342576, 'steps': 7136, 'loss/train': 2.8900513648986816} +07/25/2024 11:58:59 - INFO - __main__ - Step 7138: {'lr': 0.0004977102235364356, 'samples': 342624, 'steps': 7137, 'loss/train': 2.116649866104126} +07/25/2024 11:58:59 - INFO - __main__ - Step 7139: {'lr': 0.0004977095131274995, 'samples': 342672, 'steps': 7138, 'loss/train': 1.4880156517028809} +07/25/2024 11:59:00 - INFO - __main__ - Step 7140: {'lr': 0.0004977088026088847, 'samples': 342720, 'steps': 7139, 'loss/train': 1.8601579666137695} +07/25/2024 11:59:00 - INFO - __main__ - Step 7141: {'lr': 0.0004977080919805914, 'samples': 342768, 'steps': 7140, 'loss/train': 1.460324764251709} +07/25/2024 11:59:00 - INFO - __main__ - Step 7142: {'lr': 0.0004977073812426197, 'samples': 342816, 'steps': 7141, 'loss/train': 2.3428494930267334} +07/25/2024 11:59:00 - INFO - __main__ - Step 7143: {'lr': 0.0004977066703949702, 'samples': 342864, 'steps': 7142, 'loss/train': 2.5457372665405273} +07/25/2024 11:59:01 - INFO - __main__ - Step 7144: {'lr': 0.0004977059594376431, 'samples': 342912, 'steps': 7143, 'loss/train': 3.072821617126465} +07/25/2024 11:59:01 - INFO - __main__ - Step 7145: {'lr': 0.0004977052483706388, 'samples': 342960, 'steps': 7144, 'loss/train': 2.5574419498443604} +07/25/2024 11:59:01 - INFO - __main__ - Step 7146: {'lr': 0.0004977045371939576, 'samples': 343008, 'steps': 7145, 'loss/train': 0.2206895649433136} +07/25/2024 11:59:02 - INFO - __main__ - Step 7147: {'lr': 0.0004977038259075996, 'samples': 343056, 'steps': 7146, 'loss/train': 1.882585883140564} +07/25/2024 11:59:02 - INFO - __main__ - Step 7148: {'lr': 0.0004977031145115654, 'samples': 343104, 'steps': 7147, 'loss/train': 2.022953510284424} +07/25/2024 11:59:02 - INFO - __main__ - Step 7149: {'lr': 0.0004977024030058552, 'samples': 343152, 'steps': 7148, 'loss/train': 2.048521041870117} +07/25/2024 11:59:02 - INFO - __main__ - Step 7150: {'lr': 0.0004977016913904694, 'samples': 343200, 'steps': 7149, 'loss/train': 1.7802084684371948} +07/25/2024 11:59:03 - INFO - __main__ - Step 7151: {'lr': 0.0004977009796654082, 'samples': 343248, 'steps': 7150, 'loss/train': 2.5202112197875977} +07/25/2024 11:59:03 - INFO - __main__ - Step 7152: {'lr': 0.0004977002678306718, 'samples': 343296, 'steps': 7151, 'loss/train': 2.462700128555298} +07/25/2024 11:59:03 - INFO - __main__ - Step 7153: {'lr': 0.0004976995558862608, 'samples': 343344, 'steps': 7152, 'loss/train': 2.2974071502685547} +07/25/2024 11:59:04 - INFO - __main__ - Step 7154: {'lr': 0.0004976988438321753, 'samples': 343392, 'steps': 7153, 'loss/train': 2.5837910175323486} +07/25/2024 11:59:04 - INFO - __main__ - Step 7155: {'lr': 0.0004976981316684157, 'samples': 343440, 'steps': 7154, 'loss/train': 1.7799350023269653} +07/25/2024 11:59:04 - INFO - __main__ - Step 7156: {'lr': 0.0004976974193949824, 'samples': 343488, 'steps': 7155, 'loss/train': 2.397562026977539} +07/25/2024 11:59:04 - INFO - __main__ - Step 7157: {'lr': 0.0004976967070118755, 'samples': 343536, 'steps': 7156, 'loss/train': 2.083571434020996} +07/25/2024 11:59:05 - INFO - __main__ - Step 7158: {'lr': 0.0004976959945190955, 'samples': 343584, 'steps': 7157, 'loss/train': 2.11670184135437} +07/25/2024 11:59:05 - INFO - __main__ - Step 7159: {'lr': 0.0004976952819166427, 'samples': 343632, 'steps': 7158, 'loss/train': 2.1829445362091064} +07/25/2024 11:59:05 - INFO - __main__ - Step 7160: {'lr': 0.0004976945692045173, 'samples': 343680, 'steps': 7159, 'loss/train': 2.15486741065979} +07/25/2024 11:59:06 - INFO - __main__ - Step 7161: {'lr': 0.0004976938563827197, 'samples': 343728, 'steps': 7160, 'loss/train': 2.6083052158355713} +07/25/2024 11:59:06 - INFO - __main__ - Step 7162: {'lr': 0.0004976931434512501, 'samples': 343776, 'steps': 7161, 'loss/train': 1.8634260892868042} +07/25/2024 11:59:06 - INFO - __main__ - Step 7163: {'lr': 0.0004976924304101091, 'samples': 343824, 'steps': 7162, 'loss/train': 1.428043007850647} +07/25/2024 11:59:06 - INFO - __main__ - Step 7164: {'lr': 0.0004976917172592967, 'samples': 343872, 'steps': 7163, 'loss/train': 2.440260648727417} +07/25/2024 11:59:07 - INFO - __main__ - Step 7165: {'lr': 0.0004976910039988134, 'samples': 343920, 'steps': 7164, 'loss/train': 2.0464069843292236} +07/25/2024 11:59:07 - INFO - __main__ - Step 7166: {'lr': 0.0004976902906286596, 'samples': 343968, 'steps': 7165, 'loss/train': 2.11161208152771} +07/25/2024 11:59:07 - INFO - __main__ - Step 7167: {'lr': 0.0004976895771488353, 'samples': 344016, 'steps': 7166, 'loss/train': 2.492567300796509} +07/25/2024 11:59:08 - INFO - __main__ - Step 7168: {'lr': 0.000497688863559341, 'samples': 344064, 'steps': 7167, 'loss/train': 3.152615547180176} +07/25/2024 11:59:08 - INFO - __main__ - Step 7169: {'lr': 0.000497688149860177, 'samples': 344112, 'steps': 7168, 'loss/train': 2.405320167541504} +07/25/2024 11:59:08 - INFO - __main__ - Step 7170: {'lr': 0.0004976874360513437, 'samples': 344160, 'steps': 7169, 'loss/train': 0.30890339612960815} +07/25/2024 11:59:08 - INFO - __main__ - Step 7171: {'lr': 0.0004976867221328414, 'samples': 344208, 'steps': 7170, 'loss/train': 2.1153438091278076} +07/25/2024 11:59:09 - INFO - __main__ - Step 7172: {'lr': 0.0004976860081046703, 'samples': 344256, 'steps': 7171, 'loss/train': 1.6832486391067505} +07/25/2024 11:59:09 - INFO - __main__ - Step 7173: {'lr': 0.0004976852939668307, 'samples': 344304, 'steps': 7172, 'loss/train': 2.046682596206665} +07/25/2024 11:59:09 - INFO - __main__ - Step 7174: {'lr': 0.000497684579719323, 'samples': 344352, 'steps': 7173, 'loss/train': 1.7503166198730469} +07/25/2024 11:59:10 - INFO - __main__ - Step 7175: {'lr': 0.0004976838653621476, 'samples': 344400, 'steps': 7174, 'loss/train': 2.5842864513397217} +07/25/2024 11:59:10 - INFO - __main__ - Step 7176: {'lr': 0.0004976831508953047, 'samples': 344448, 'steps': 7175, 'loss/train': 2.0365850925445557} +07/25/2024 11:59:10 - INFO - __main__ - Step 7177: {'lr': 0.0004976824363187946, 'samples': 344496, 'steps': 7176, 'loss/train': 1.9129767417907715} +07/25/2024 11:59:10 - INFO - __main__ - Step 7178: {'lr': 0.0004976817216326177, 'samples': 344544, 'steps': 7177, 'loss/train': 2.4894533157348633} +07/25/2024 11:59:11 - INFO - __main__ - Step 7179: {'lr': 0.0004976810068367742, 'samples': 344592, 'steps': 7178, 'loss/train': 2.68800950050354} +07/25/2024 11:59:11 - INFO - __main__ - Step 7180: {'lr': 0.0004976802919312646, 'samples': 344640, 'steps': 7179, 'loss/train': 0.9566047191619873} +07/25/2024 11:59:11 - INFO - __main__ - Step 7181: {'lr': 0.000497679576916089, 'samples': 344688, 'steps': 7180, 'loss/train': 2.1218032836914062} +07/25/2024 11:59:12 - INFO - __main__ - Step 7182: {'lr': 0.0004976788617912478, 'samples': 344736, 'steps': 7181, 'loss/train': 2.099698781967163} +07/25/2024 11:59:12 - INFO - __main__ - Step 7183: {'lr': 0.0004976781465567414, 'samples': 344784, 'steps': 7182, 'loss/train': 2.4216318130493164} +07/25/2024 11:59:12 - INFO - __main__ - Step 7184: {'lr': 0.0004976774312125702, 'samples': 344832, 'steps': 7183, 'loss/train': 1.9756771326065063} +07/25/2024 11:59:12 - INFO - __main__ - Step 7185: {'lr': 0.0004976767157587341, 'samples': 344880, 'steps': 7184, 'loss/train': 2.9068734645843506} +07/25/2024 11:59:13 - INFO - __main__ - Step 7186: {'lr': 0.000497676000195234, 'samples': 344928, 'steps': 7185, 'loss/train': 1.867326259613037} +07/25/2024 11:59:13 - INFO - __main__ - Step 7187: {'lr': 0.0004976752845220697, 'samples': 344976, 'steps': 7186, 'loss/train': 1.2433463335037231} +07/25/2024 11:59:13 - INFO - __main__ - Step 7188: {'lr': 0.0004976745687392418, 'samples': 345024, 'steps': 7187, 'loss/train': 2.0122296810150146} +07/25/2024 11:59:14 - INFO - __main__ - Step 7189: {'lr': 0.0004976738528467504, 'samples': 345072, 'steps': 7188, 'loss/train': 1.9712857007980347} +07/25/2024 11:59:14 - INFO - __main__ - Step 7190: {'lr': 0.0004976731368445961, 'samples': 345120, 'steps': 7189, 'loss/train': 2.573197841644287} +07/25/2024 11:59:14 - INFO - __main__ - Step 7191: {'lr': 0.000497672420732779, 'samples': 345168, 'steps': 7190, 'loss/train': 2.585952043533325} +07/25/2024 11:59:14 - INFO - __main__ - Step 7192: {'lr': 0.0004976717045112995, 'samples': 345216, 'steps': 7191, 'loss/train': 3.029906749725342} +07/25/2024 11:59:15 - INFO - __main__ - Step 7193: {'lr': 0.000497670988180158, 'samples': 345264, 'steps': 7192, 'loss/train': 2.256535768508911} +07/25/2024 11:59:15 - INFO - __main__ - Step 7194: {'lr': 0.0004976702717393546, 'samples': 345312, 'steps': 7193, 'loss/train': 0.3196803629398346} +07/25/2024 11:59:15 - INFO - __main__ - Step 7195: {'lr': 0.0004976695551888899, 'samples': 345360, 'steps': 7194, 'loss/train': 2.1869020462036133} +07/25/2024 11:59:16 - INFO - __main__ - Step 7196: {'lr': 0.0004976688385287639, 'samples': 345408, 'steps': 7195, 'loss/train': 2.0592782497406006} +07/25/2024 11:59:16 - INFO - __main__ - Step 7197: {'lr': 0.0004976681217589772, 'samples': 345456, 'steps': 7196, 'loss/train': 2.044752836227417} +07/25/2024 11:59:16 - INFO - __main__ - Step 7198: {'lr': 0.0004976674048795299, 'samples': 345504, 'steps': 7197, 'loss/train': 2.001380443572998} +07/25/2024 11:59:16 - INFO - __main__ - Step 7199: {'lr': 0.0004976666878904225, 'samples': 345552, 'steps': 7198, 'loss/train': 2.470842123031616} +07/25/2024 11:59:17 - INFO - __main__ - Step 7200: {'lr': 0.0004976659707916552, 'samples': 345600, 'steps': 7199, 'loss/train': 2.333390712738037} +07/25/2024 11:59:17 - INFO - __main__ - Step 7201: {'lr': 0.0004976652535832283, 'samples': 345648, 'steps': 7200, 'loss/train': 1.9025486707687378} +07/25/2024 11:59:17 - INFO - __main__ - Step 7202: {'lr': 0.0004976645362651422, 'samples': 345696, 'steps': 7201, 'loss/train': 1.9667222499847412} +07/25/2024 11:59:18 - INFO - __main__ - Step 7203: {'lr': 0.0004976638188373972, 'samples': 345744, 'steps': 7202, 'loss/train': 2.2643990516662598} +07/25/2024 11:59:18 - INFO - __main__ - Step 7204: {'lr': 0.0004976631012999935, 'samples': 345792, 'steps': 7203, 'loss/train': 2.0505692958831787} +07/25/2024 11:59:18 - INFO - __main__ - Step 7205: {'lr': 0.0004976623836529316, 'samples': 345840, 'steps': 7204, 'loss/train': 1.4856683015823364} +07/25/2024 11:59:18 - INFO - __main__ - Step 7206: {'lr': 0.0004976616658962118, 'samples': 345888, 'steps': 7205, 'loss/train': 1.0877829790115356} +07/25/2024 11:59:19 - INFO - __main__ - Step 7207: {'lr': 0.0004976609480298342, 'samples': 345936, 'steps': 7206, 'loss/train': 1.774837851524353} +07/25/2024 11:59:19 - INFO - __main__ - Step 7208: {'lr': 0.0004976602300537994, 'samples': 345984, 'steps': 7207, 'loss/train': 1.925068974494934} +07/25/2024 11:59:19 - INFO - __main__ - Step 7209: {'lr': 0.0004976595119681076, 'samples': 346032, 'steps': 7208, 'loss/train': 2.151128053665161} +07/25/2024 11:59:20 - INFO - __main__ - Step 7210: {'lr': 0.000497658793772759, 'samples': 346080, 'steps': 7209, 'loss/train': 1.9885365962982178} +07/25/2024 11:59:20 - INFO - __main__ - Step 7211: {'lr': 0.0004976580754677541, 'samples': 346128, 'steps': 7210, 'loss/train': 1.2983403205871582} +07/25/2024 11:59:20 - INFO - __main__ - Step 7212: {'lr': 0.0004976573570530931, 'samples': 346176, 'steps': 7211, 'loss/train': 2.100029945373535} +07/25/2024 11:59:20 - INFO - __main__ - Step 7213: {'lr': 0.0004976566385287764, 'samples': 346224, 'steps': 7212, 'loss/train': 2.243044137954712} +07/25/2024 11:59:21 - INFO - __main__ - Step 7214: {'lr': 0.0004976559198948043, 'samples': 346272, 'steps': 7213, 'loss/train': 2.43719220161438} +07/25/2024 11:59:21 - INFO - __main__ - Step 7215: {'lr': 0.000497655201151177, 'samples': 346320, 'steps': 7214, 'loss/train': 2.6018130779266357} +07/25/2024 11:59:21 - INFO - __main__ - Step 7216: {'lr': 0.000497654482297895, 'samples': 346368, 'steps': 7215, 'loss/train': 2.0977625846862793} +07/25/2024 11:59:21 - INFO - __main__ - Step 7217: {'lr': 0.0004976537633349585, 'samples': 346416, 'steps': 7216, 'loss/train': 2.103278636932373} +07/25/2024 11:59:22 - INFO - __main__ - Step 7218: {'lr': 0.0004976530442623679, 'samples': 346464, 'steps': 7217, 'loss/train': 0.32339322566986084} +07/25/2024 11:59:22 - INFO - __main__ - Step 7219: {'lr': 0.0004976523250801234, 'samples': 346512, 'steps': 7218, 'loss/train': 2.3982667922973633} +07/25/2024 11:59:22 - INFO - __main__ - Step 7220: {'lr': 0.0004976516057882255, 'samples': 346560, 'steps': 7219, 'loss/train': 2.91843581199646} +07/25/2024 11:59:23 - INFO - __main__ - Step 7221: {'lr': 0.0004976508863866743, 'samples': 346608, 'steps': 7220, 'loss/train': 2.4327030181884766} +07/25/2024 11:59:23 - INFO - __main__ - Step 7222: {'lr': 0.0004976501668754702, 'samples': 346656, 'steps': 7221, 'loss/train': 1.9197460412979126} +07/25/2024 11:59:23 - INFO - __main__ - Step 7223: {'lr': 0.0004976494472546136, 'samples': 346704, 'steps': 7222, 'loss/train': 2.359584093093872} +07/25/2024 11:59:23 - INFO - __main__ - Step 7224: {'lr': 0.0004976487275241048, 'samples': 346752, 'steps': 7223, 'loss/train': 2.7923946380615234} +07/25/2024 11:59:24 - INFO - __main__ - Step 7225: {'lr': 0.000497648007683944, 'samples': 346800, 'steps': 7224, 'loss/train': 1.9937225580215454} +07/25/2024 11:59:24 - INFO - __main__ - Step 7226: {'lr': 0.0004976472877341317, 'samples': 346848, 'steps': 7225, 'loss/train': 2.0811803340911865} +07/25/2024 11:59:24 - INFO - __main__ - Step 7227: {'lr': 0.000497646567674668, 'samples': 346896, 'steps': 7226, 'loss/train': 2.4019227027893066} +07/25/2024 11:59:25 - INFO - __main__ - Step 7228: {'lr': 0.0004976458475055535, 'samples': 346944, 'steps': 7227, 'loss/train': 2.3514325618743896} +07/25/2024 11:59:25 - INFO - __main__ - Step 7229: {'lr': 0.0004976451272267883, 'samples': 346992, 'steps': 7228, 'loss/train': 2.04838228225708} +07/25/2024 11:59:25 - INFO - __main__ - Step 7230: {'lr': 0.0004976444068383728, 'samples': 347040, 'steps': 7229, 'loss/train': 2.338761806488037} +07/25/2024 11:59:25 - INFO - __main__ - Step 7231: {'lr': 0.0004976436863403072, 'samples': 347088, 'steps': 7230, 'loss/train': 2.3044004440307617} +07/25/2024 11:59:26 - INFO - __main__ - Step 7232: {'lr': 0.000497642965732592, 'samples': 347136, 'steps': 7231, 'loss/train': 2.024397134780884} +07/25/2024 11:59:26 - INFO - __main__ - Step 7233: {'lr': 0.0004976422450152274, 'samples': 347184, 'steps': 7232, 'loss/train': 2.6834006309509277} +07/25/2024 11:59:26 - INFO - __main__ - Step 7234: {'lr': 0.0004976415241882136, 'samples': 347232, 'steps': 7233, 'loss/train': 2.523869037628174} +07/25/2024 11:59:27 - INFO - __main__ - Step 7235: {'lr': 0.0004976408032515513, 'samples': 347280, 'steps': 7234, 'loss/train': 1.6060696840286255} +07/25/2024 11:59:27 - INFO - __main__ - Step 7236: {'lr': 0.0004976400822052405, 'samples': 347328, 'steps': 7235, 'loss/train': 2.3695499897003174} +07/25/2024 11:59:27 - INFO - __main__ - Step 7237: {'lr': 0.0004976393610492816, 'samples': 347376, 'steps': 7236, 'loss/train': 1.9666956663131714} +07/25/2024 11:59:27 - INFO - __main__ - Step 7238: {'lr': 0.0004976386397836749, 'samples': 347424, 'steps': 7237, 'loss/train': 2.4493539333343506} +07/25/2024 11:59:28 - INFO - __main__ - Step 7239: {'lr': 0.0004976379184084208, 'samples': 347472, 'steps': 7238, 'loss/train': 2.3278005123138428} +07/25/2024 11:59:28 - INFO - __main__ - Step 7240: {'lr': 0.0004976371969235196, 'samples': 347520, 'steps': 7239, 'loss/train': 0.8886082172393799} +07/25/2024 11:59:28 - INFO - __main__ - Step 7241: {'lr': 0.0004976364753289716, 'samples': 347568, 'steps': 7240, 'loss/train': 1.9391084909439087} +07/25/2024 11:59:29 - INFO - __main__ - Step 7242: {'lr': 0.000497635753624777, 'samples': 347616, 'steps': 7241, 'loss/train': 0.2841717600822449} +07/25/2024 11:59:29 - INFO - __main__ - Step 7243: {'lr': 0.0004976350318109362, 'samples': 347664, 'steps': 7242, 'loss/train': 1.9578981399536133} +07/25/2024 11:59:29 - INFO - __main__ - Step 7244: {'lr': 0.0004976343098874497, 'samples': 347712, 'steps': 7243, 'loss/train': 1.9929721355438232} +07/25/2024 11:59:29 - INFO - __main__ - Step 7245: {'lr': 0.0004976335878543176, 'samples': 347760, 'steps': 7244, 'loss/train': 1.9169764518737793} +07/25/2024 11:59:30 - INFO - __main__ - Step 7246: {'lr': 0.0004976328657115403, 'samples': 347808, 'steps': 7245, 'loss/train': 1.7013885974884033} +07/25/2024 11:59:30 - INFO - __main__ - Step 7247: {'lr': 0.0004976321434591181, 'samples': 347856, 'steps': 7246, 'loss/train': 2.1106114387512207} +07/25/2024 11:59:30 - INFO - __main__ - Step 7248: {'lr': 0.0004976314210970513, 'samples': 347904, 'steps': 7247, 'loss/train': 2.315579652786255} +07/25/2024 11:59:31 - INFO - __main__ - Step 7249: {'lr': 0.0004976306986253403, 'samples': 347952, 'steps': 7248, 'loss/train': 2.369248867034912} +07/25/2024 11:59:31 - INFO - __main__ - Step 7250: {'lr': 0.0004976299760439853, 'samples': 348000, 'steps': 7249, 'loss/train': 1.293647289276123} +07/25/2024 11:59:31 - INFO - __main__ - Step 7251: {'lr': 0.0004976292533529867, 'samples': 348048, 'steps': 7250, 'loss/train': 1.3532342910766602} +07/25/2024 11:59:31 - INFO - __main__ - Step 7252: {'lr': 0.0004976285305523449, 'samples': 348096, 'steps': 7251, 'loss/train': 2.241488456726074} +07/25/2024 11:59:32 - INFO - __main__ - Step 7253: {'lr': 0.0004976278076420599, 'samples': 348144, 'steps': 7252, 'loss/train': 2.036855697631836} +07/25/2024 11:59:32 - INFO - __main__ - Step 7254: {'lr': 0.0004976270846221325, 'samples': 348192, 'steps': 7253, 'loss/train': 2.8008298873901367} +07/25/2024 11:59:32 - INFO - __main__ - Step 7255: {'lr': 0.0004976263614925626, 'samples': 348240, 'steps': 7254, 'loss/train': 1.861539363861084} +07/25/2024 11:59:33 - INFO - __main__ - Step 7256: {'lr': 0.0004976256382533507, 'samples': 348288, 'steps': 7255, 'loss/train': 2.0004241466522217} +07/25/2024 11:59:33 - INFO - __main__ - Step 7257: {'lr': 0.0004976249149044972, 'samples': 348336, 'steps': 7256, 'loss/train': 2.3296051025390625} +07/25/2024 11:59:33 - INFO - __main__ - Step 7258: {'lr': 0.0004976241914460023, 'samples': 348384, 'steps': 7257, 'loss/train': 2.0461232662200928} +07/25/2024 11:59:33 - INFO - __main__ - Step 7259: {'lr': 0.0004976234678778663, 'samples': 348432, 'steps': 7258, 'loss/train': 2.2093706130981445} +07/25/2024 11:59:34 - INFO - __main__ - Step 7260: {'lr': 0.0004976227442000895, 'samples': 348480, 'steps': 7259, 'loss/train': 2.129856824874878} +07/25/2024 11:59:34 - INFO - __main__ - Step 7261: {'lr': 0.0004976220204126725, 'samples': 348528, 'steps': 7260, 'loss/train': 1.3989888429641724} +07/25/2024 11:59:34 - INFO - __main__ - Step 7262: {'lr': 0.0004976212965156152, 'samples': 348576, 'steps': 7261, 'loss/train': 2.2607758045196533} +07/25/2024 11:59:35 - INFO - __main__ - Step 7263: {'lr': 0.0004976205725089183, 'samples': 348624, 'steps': 7262, 'loss/train': 1.9730690717697144} +07/25/2024 11:59:35 - INFO - __main__ - Step 7264: {'lr': 0.0004976198483925818, 'samples': 348672, 'steps': 7263, 'loss/train': 2.2147445678710938} +07/25/2024 11:59:35 - INFO - __main__ - Step 7265: {'lr': 0.0004976191241666062, 'samples': 348720, 'steps': 7264, 'loss/train': 2.2290918827056885} +07/25/2024 11:59:35 - INFO - __main__ - Step 7266: {'lr': 0.0004976183998309918, 'samples': 348768, 'steps': 7265, 'loss/train': 0.22732284665107727} +07/25/2024 11:59:36 - INFO - __main__ - Step 7267: {'lr': 0.000497617675385739, 'samples': 348816, 'steps': 7266, 'loss/train': 2.31050705909729} +07/25/2024 11:59:36 - INFO - __main__ - Step 7268: {'lr': 0.0004976169508308479, 'samples': 348864, 'steps': 7267, 'loss/train': 2.1334457397460938} +07/25/2024 11:59:36 - INFO - __main__ - Step 7269: {'lr': 0.000497616226166319, 'samples': 348912, 'steps': 7268, 'loss/train': 1.5555132627487183} +07/25/2024 11:59:37 - INFO - __main__ - Step 7270: {'lr': 0.0004976155013921526, 'samples': 348960, 'steps': 7269, 'loss/train': 1.5952309370040894} +07/25/2024 11:59:37 - INFO - __main__ - Step 7271: {'lr': 0.000497614776508349, 'samples': 349008, 'steps': 7270, 'loss/train': 1.9736746549606323} +07/25/2024 11:59:37 - INFO - __main__ - Step 7272: {'lr': 0.0004976140515149085, 'samples': 349056, 'steps': 7271, 'loss/train': 2.2580299377441406} +07/25/2024 11:59:37 - INFO - __main__ - Step 7273: {'lr': 0.0004976133264118314, 'samples': 349104, 'steps': 7272, 'loss/train': 2.785630702972412} +07/25/2024 11:59:38 - INFO - __main__ - Step 7274: {'lr': 0.0004976126011991181, 'samples': 349152, 'steps': 7273, 'loss/train': 2.2293801307678223} +07/25/2024 11:59:38 - INFO - __main__ - Step 7275: {'lr': 0.0004976118758767688, 'samples': 349200, 'steps': 7274, 'loss/train': 1.7765815258026123} +07/25/2024 11:59:38 - INFO - __main__ - Step 7276: {'lr': 0.000497611150444784, 'samples': 349248, 'steps': 7275, 'loss/train': 2.654327392578125} +07/25/2024 11:59:39 - INFO - __main__ - Step 7277: {'lr': 0.0004976104249031639, 'samples': 349296, 'steps': 7276, 'loss/train': 2.457304000854492} +07/25/2024 11:59:39 - INFO - __main__ - Step 7278: {'lr': 0.0004976096992519087, 'samples': 349344, 'steps': 7277, 'loss/train': 2.0809874534606934} +07/25/2024 11:59:39 - INFO - __main__ - Step 7279: {'lr': 0.0004976089734910191, 'samples': 349392, 'steps': 7278, 'loss/train': 1.9292948246002197} +07/25/2024 11:59:39 - INFO - __main__ - Step 7280: {'lr': 0.000497608247620495, 'samples': 349440, 'steps': 7279, 'loss/train': 1.7324495315551758} +07/25/2024 11:59:40 - INFO - __main__ - Step 7281: {'lr': 0.000497607521640337, 'samples': 349488, 'steps': 7280, 'loss/train': 2.1259515285491943} +07/25/2024 11:59:40 - INFO - __main__ - Step 7282: {'lr': 0.0004976067955505453, 'samples': 349536, 'steps': 7281, 'loss/train': 2.152068614959717} +07/25/2024 11:59:40 - INFO - __main__ - Step 7283: {'lr': 0.0004976060693511203, 'samples': 349584, 'steps': 7282, 'loss/train': 1.9863899946212769} +07/25/2024 11:59:40 - INFO - __main__ - Step 7284: {'lr': 0.0004976053430420621, 'samples': 349632, 'steps': 7283, 'loss/train': 2.4908509254455566} +07/25/2024 11:59:41 - INFO - __main__ - Step 7285: {'lr': 0.0004976046166233714, 'samples': 349680, 'steps': 7284, 'loss/train': 1.822869896888733} +07/25/2024 11:59:41 - INFO - __main__ - Step 7286: {'lr': 0.0004976038900950482, 'samples': 349728, 'steps': 7285, 'loss/train': 2.6448450088500977} +07/25/2024 11:59:41 - INFO - __main__ - Step 7287: {'lr': 0.000497603163457093, 'samples': 349776, 'steps': 7286, 'loss/train': 1.7878692150115967} +07/25/2024 11:59:42 - INFO - __main__ - Step 7288: {'lr': 0.000497602436709506, 'samples': 349824, 'steps': 7287, 'loss/train': 2.1354918479919434} +07/25/2024 11:59:42 - INFO - __main__ - Step 7289: {'lr': 0.0004976017098522876, 'samples': 349872, 'steps': 7288, 'loss/train': 1.9056695699691772} +07/25/2024 11:59:42 - INFO - __main__ - Step 7290: {'lr': 0.0004976009828854381, 'samples': 349920, 'steps': 7289, 'loss/train': 0.18824179470539093} +07/25/2024 11:59:42 - INFO - __main__ - Step 7291: {'lr': 0.0004976002558089578, 'samples': 349968, 'steps': 7290, 'loss/train': 2.4838805198669434} +07/25/2024 11:59:43 - INFO - __main__ - Step 7292: {'lr': 0.000497599528622847, 'samples': 350016, 'steps': 7291, 'loss/train': 2.0869381427764893} +07/25/2024 11:59:43 - INFO - __main__ - Step 7293: {'lr': 0.0004975988013271063, 'samples': 350064, 'steps': 7292, 'loss/train': 1.7365862131118774} +07/25/2024 11:59:43 - INFO - __main__ - Step 7294: {'lr': 0.0004975980739217355, 'samples': 350112, 'steps': 7293, 'loss/train': 1.6015956401824951} +07/25/2024 11:59:44 - INFO - __main__ - Step 7295: {'lr': 0.0004975973464067353, 'samples': 350160, 'steps': 7294, 'loss/train': 2.268510341644287} +07/25/2024 11:59:44 - INFO - __main__ - Step 7296: {'lr': 0.0004975966187821061, 'samples': 350208, 'steps': 7295, 'loss/train': 2.157630681991577} +07/25/2024 11:59:44 - INFO - __main__ - Step 7297: {'lr': 0.0004975958910478479, 'samples': 350256, 'steps': 7296, 'loss/train': 2.485011339187622} +07/25/2024 11:59:44 - INFO - __main__ - Step 7298: {'lr': 0.0004975951632039612, 'samples': 350304, 'steps': 7297, 'loss/train': 2.3443892002105713} +07/25/2024 11:59:45 - INFO - __main__ - Step 7299: {'lr': 0.0004975944352504463, 'samples': 350352, 'steps': 7298, 'loss/train': 1.7730058431625366} +07/25/2024 11:59:45 - INFO - __main__ - Step 7300: {'lr': 0.0004975937071873035, 'samples': 350400, 'steps': 7299, 'loss/train': 2.196159839630127} +07/25/2024 11:59:45 - INFO - __main__ - Step 7301: {'lr': 0.0004975929790145331, 'samples': 350448, 'steps': 7300, 'loss/train': 1.9144325256347656} +07/25/2024 11:59:46 - INFO - __main__ - Step 7302: {'lr': 0.0004975922507321356, 'samples': 350496, 'steps': 7301, 'loss/train': 1.7247589826583862} +07/25/2024 11:59:46 - INFO - __main__ - Step 7303: {'lr': 0.0004975915223401111, 'samples': 350544, 'steps': 7302, 'loss/train': 2.0737664699554443} +07/25/2024 11:59:46 - INFO - __main__ - Step 7304: {'lr': 0.00049759079383846, 'samples': 350592, 'steps': 7303, 'loss/train': 2.5403106212615967} +07/25/2024 11:59:46 - INFO - __main__ - Step 7305: {'lr': 0.0004975900652271827, 'samples': 350640, 'steps': 7304, 'loss/train': 1.9156403541564941} +07/25/2024 11:59:47 - INFO - __main__ - Step 7306: {'lr': 0.0004975893365062794, 'samples': 350688, 'steps': 7305, 'loss/train': 1.8939944505691528} +07/25/2024 11:59:47 - INFO - __main__ - Step 7307: {'lr': 0.0004975886076757505, 'samples': 350736, 'steps': 7306, 'loss/train': 2.1526436805725098} +07/25/2024 11:59:47 - INFO - __main__ - Step 7308: {'lr': 0.0004975878787355963, 'samples': 350784, 'steps': 7307, 'loss/train': 2.304624557495117} +07/25/2024 11:59:48 - INFO - __main__ - Step 7309: {'lr': 0.0004975871496858171, 'samples': 350832, 'steps': 7308, 'loss/train': 2.403041124343872} +07/25/2024 11:59:48 - INFO - __main__ - Step 7310: {'lr': 0.0004975864205264133, 'samples': 350880, 'steps': 7309, 'loss/train': 2.781903028488159} +07/25/2024 11:59:48 - INFO - __main__ - Step 7311: {'lr': 0.0004975856912573851, 'samples': 350928, 'steps': 7310, 'loss/train': 2.4378445148468018} +07/25/2024 11:59:48 - INFO - __main__ - Step 7312: {'lr': 0.0004975849618787329, 'samples': 350976, 'steps': 7311, 'loss/train': 2.1102633476257324} +07/25/2024 11:59:49 - INFO - __main__ - Step 7313: {'lr': 0.0004975842323904572, 'samples': 351024, 'steps': 7312, 'loss/train': 1.8374603986740112} +07/25/2024 11:59:49 - INFO - __main__ - Step 7314: {'lr': 0.0004975835027925579, 'samples': 351072, 'steps': 7313, 'loss/train': 0.20655658841133118} +07/25/2024 11:59:49 - INFO - __main__ - Step 7315: {'lr': 0.0004975827730850357, 'samples': 351120, 'steps': 7314, 'loss/train': 2.694413661956787} +07/25/2024 11:59:50 - INFO - __main__ - Step 7316: {'lr': 0.0004975820432678907, 'samples': 351168, 'steps': 7315, 'loss/train': 1.1380643844604492} +07/25/2024 11:59:50 - INFO - __main__ - Step 7317: {'lr': 0.0004975813133411233, 'samples': 351216, 'steps': 7316, 'loss/train': 2.7500553131103516} +07/25/2024 11:59:50 - INFO - __main__ - Step 7318: {'lr': 0.0004975805833047339, 'samples': 351264, 'steps': 7317, 'loss/train': 2.0962672233581543} +07/25/2024 11:59:50 - INFO - __main__ - Step 7319: {'lr': 0.0004975798531587228, 'samples': 351312, 'steps': 7318, 'loss/train': 2.161458730697632} +07/25/2024 11:59:51 - INFO - __main__ - Step 7320: {'lr': 0.0004975791229030901, 'samples': 351360, 'steps': 7319, 'loss/train': 2.143786907196045} +07/25/2024 11:59:51 - INFO - __main__ - Step 7321: {'lr': 0.0004975783925378365, 'samples': 351408, 'steps': 7320, 'loss/train': 2.1284632682800293} +07/25/2024 11:59:51 - INFO - __main__ - Step 7322: {'lr': 0.0004975776620629621, 'samples': 351456, 'steps': 7321, 'loss/train': 2.048647880554199} +07/25/2024 11:59:52 - INFO - __main__ - Step 7323: {'lr': 0.0004975769314784671, 'samples': 351504, 'steps': 7322, 'loss/train': 1.5781129598617554} +07/25/2024 11:59:52 - INFO - __main__ - Step 7324: {'lr': 0.000497576200784352, 'samples': 351552, 'steps': 7323, 'loss/train': 1.471721887588501} +07/25/2024 11:59:52 - INFO - __main__ - Step 7325: {'lr': 0.0004975754699806172, 'samples': 351600, 'steps': 7324, 'loss/train': 2.105266809463501} +07/25/2024 11:59:52 - INFO - __main__ - Step 7326: {'lr': 0.0004975747390672629, 'samples': 351648, 'steps': 7325, 'loss/train': 1.5748835802078247} +07/25/2024 11:59:53 - INFO - __main__ - Step 7327: {'lr': 0.0004975740080442894, 'samples': 351696, 'steps': 7326, 'loss/train': 2.0536792278289795} +07/25/2024 11:59:53 - INFO - __main__ - Step 7328: {'lr': 0.0004975732769116971, 'samples': 351744, 'steps': 7327, 'loss/train': 2.193009614944458} +07/25/2024 11:59:53 - INFO - __main__ - Step 7329: {'lr': 0.0004975725456694863, 'samples': 351792, 'steps': 7328, 'loss/train': 2.1682233810424805} +07/25/2024 11:59:54 - INFO - __main__ - Step 7330: {'lr': 0.0004975718143176573, 'samples': 351840, 'steps': 7329, 'loss/train': 2.1696598529815674} +07/25/2024 11:59:54 - INFO - __main__ - Step 7331: {'lr': 0.0004975710828562103, 'samples': 351888, 'steps': 7330, 'loss/train': 2.503769636154175} +07/25/2024 11:59:54 - INFO - __main__ - Step 7332: {'lr': 0.000497570351285146, 'samples': 351936, 'steps': 7331, 'loss/train': 1.614776611328125} +07/25/2024 11:59:54 - INFO - __main__ - Step 7333: {'lr': 0.0004975696196044643, 'samples': 351984, 'steps': 7332, 'loss/train': 2.7488458156585693} +07/25/2024 11:59:55 - INFO - __main__ - Step 7334: {'lr': 0.0004975688878141657, 'samples': 352032, 'steps': 7333, 'loss/train': 2.208348512649536} +07/25/2024 11:59:55 - INFO - __main__ - Step 7335: {'lr': 0.0004975681559142507, 'samples': 352080, 'steps': 7334, 'loss/train': 2.842597484588623} +07/25/2024 11:59:55 - INFO - __main__ - Step 7336: {'lr': 0.0004975674239047194, 'samples': 352128, 'steps': 7335, 'loss/train': 1.7465016841888428} +07/25/2024 11:59:56 - INFO - __main__ - Step 7337: {'lr': 0.0004975666917855721, 'samples': 352176, 'steps': 7336, 'loss/train': 2.2679457664489746} +07/25/2024 11:59:56 - INFO - __main__ - Step 7338: {'lr': 0.0004975659595568093, 'samples': 352224, 'steps': 7337, 'loss/train': 1.6344071626663208} +07/25/2024 11:59:56 - INFO - __main__ - Step 7339: {'lr': 0.0004975652272184311, 'samples': 352272, 'steps': 7338, 'loss/train': 2.02538800239563} +07/25/2024 11:59:56 - INFO - __main__ - Step 7340: {'lr': 0.0004975644947704381, 'samples': 352320, 'steps': 7339, 'loss/train': 1.9441192150115967} +07/25/2024 11:59:57 - INFO - __main__ - Step 7341: {'lr': 0.0004975637622128304, 'samples': 352368, 'steps': 7340, 'loss/train': 2.371446132659912} +07/25/2024 11:59:57 - INFO - __main__ - Step 7342: {'lr': 0.0004975630295456084, 'samples': 352416, 'steps': 7341, 'loss/train': 2.468688488006592} +07/25/2024 11:59:57 - INFO - __main__ - Step 7343: {'lr': 0.0004975622967687724, 'samples': 352464, 'steps': 7342, 'loss/train': 2.1455624103546143} +07/25/2024 11:59:58 - INFO - __main__ - Step 7344: {'lr': 0.0004975615638823229, 'samples': 352512, 'steps': 7343, 'loss/train': 1.8925423622131348} +07/25/2024 11:59:58 - INFO - __main__ - Step 7345: {'lr': 0.0004975608308862599, 'samples': 352560, 'steps': 7344, 'loss/train': 2.8515408039093018} +07/25/2024 11:59:58 - INFO - __main__ - Step 7346: {'lr': 0.0004975600977805841, 'samples': 352608, 'steps': 7345, 'loss/train': 2.3111300468444824} +07/25/2024 11:59:58 - INFO - __main__ - Step 7347: {'lr': 0.0004975593645652953, 'samples': 352656, 'steps': 7346, 'loss/train': 2.128526449203491} +07/25/2024 11:59:59 - INFO - __main__ - Step 7348: {'lr': 0.0004975586312403944, 'samples': 352704, 'steps': 7347, 'loss/train': 1.8413348197937012} +07/25/2024 11:59:59 - INFO - __main__ - Step 7349: {'lr': 0.0004975578978058814, 'samples': 352752, 'steps': 7348, 'loss/train': 1.800339698791504} +07/25/2024 11:59:59 - INFO - __main__ - Step 7350: {'lr': 0.0004975571642617568, 'samples': 352800, 'steps': 7349, 'loss/train': 1.672377109527588} +07/25/2024 11:59:59 - INFO - __main__ - Step 7351: {'lr': 0.0004975564306080208, 'samples': 352848, 'steps': 7350, 'loss/train': 2.099606513977051} +07/25/2024 12:00:00 - INFO - __main__ - Step 7352: {'lr': 0.0004975556968446737, 'samples': 352896, 'steps': 7351, 'loss/train': 2.360290050506592} +07/25/2024 12:00:00 - INFO - __main__ - Step 7353: {'lr': 0.0004975549629717159, 'samples': 352944, 'steps': 7352, 'loss/train': 1.641664743423462} +07/25/2024 12:00:00 - INFO - __main__ - Step 7354: {'lr': 0.0004975542289891476, 'samples': 352992, 'steps': 7353, 'loss/train': 3.085294008255005} +07/25/2024 12:00:01 - INFO - __main__ - Step 7355: {'lr': 0.0004975534948969693, 'samples': 353040, 'steps': 7354, 'loss/train': 2.511667251586914} +07/25/2024 12:00:01 - INFO - __main__ - Step 7356: {'lr': 0.0004975527606951813, 'samples': 353088, 'steps': 7355, 'loss/train': 2.2518937587738037} +07/25/2024 12:00:01 - INFO - __main__ - Step 7357: {'lr': 0.0004975520263837837, 'samples': 353136, 'steps': 7356, 'loss/train': 2.1794512271881104} +07/25/2024 12:00:01 - INFO - __main__ - Step 7358: {'lr': 0.0004975512919627771, 'samples': 353184, 'steps': 7357, 'loss/train': 0.9087277054786682} +07/25/2024 12:00:02 - INFO - __main__ - Step 7359: {'lr': 0.0004975505574321618, 'samples': 353232, 'steps': 7358, 'loss/train': 1.858242154121399} +07/25/2024 12:00:02 - INFO - __main__ - Step 7360: {'lr': 0.000497549822791938, 'samples': 353280, 'steps': 7359, 'loss/train': 1.645272135734558} +07/25/2024 12:00:02 - INFO - __main__ - Step 7361: {'lr': 0.0004975490880421061, 'samples': 353328, 'steps': 7360, 'loss/train': 2.388742685317993} +07/25/2024 12:00:03 - INFO - __main__ - Step 7362: {'lr': 0.0004975483531826664, 'samples': 353376, 'steps': 7361, 'loss/train': 2.2119088172912598} +07/25/2024 12:00:03 - INFO - __main__ - Step 7363: {'lr': 0.0004975476182136191, 'samples': 353424, 'steps': 7362, 'loss/train': 2.084070920944214} +07/25/2024 12:00:03 - INFO - __main__ - Step 7364: {'lr': 0.0004975468831349648, 'samples': 353472, 'steps': 7363, 'loss/train': 2.0672876834869385} +07/25/2024 12:00:03 - INFO - __main__ - Step 7365: {'lr': 0.0004975461479467036, 'samples': 353520, 'steps': 7364, 'loss/train': 2.2759385108947754} +07/25/2024 12:00:04 - INFO - __main__ - Step 7366: {'lr': 0.000497545412648836, 'samples': 353568, 'steps': 7365, 'loss/train': 2.9252028465270996} +07/25/2024 12:00:04 - INFO - __main__ - Step 7367: {'lr': 0.000497544677241362, 'samples': 353616, 'steps': 7366, 'loss/train': 2.03674578666687} +07/25/2024 12:00:04 - INFO - __main__ - Step 7368: {'lr': 0.0004975439417242824, 'samples': 353664, 'steps': 7367, 'loss/train': 1.96981680393219} +07/25/2024 12:00:05 - INFO - __main__ - Step 7369: {'lr': 0.000497543206097597, 'samples': 353712, 'steps': 7368, 'loss/train': 2.4298369884490967} +07/25/2024 12:00:05 - INFO - __main__ - Step 7370: {'lr': 0.0004975424703613068, 'samples': 353760, 'steps': 7369, 'loss/train': 1.7803386449813843} +07/25/2024 12:00:05 - INFO - __main__ - Step 7371: {'lr': 0.0004975417345154114, 'samples': 353808, 'steps': 7370, 'loss/train': 2.307863712310791} +07/25/2024 12:00:05 - INFO - __main__ - Step 7372: {'lr': 0.0004975409985599115, 'samples': 353856, 'steps': 7371, 'loss/train': 2.2357521057128906} +07/25/2024 12:00:06 - INFO - __main__ - Step 7373: {'lr': 0.0004975402624948076, 'samples': 353904, 'steps': 7372, 'loss/train': 2.3762733936309814} +07/25/2024 12:00:06 - INFO - __main__ - Step 7374: {'lr': 0.0004975395263200996, 'samples': 353952, 'steps': 7373, 'loss/train': 2.1259925365448} +07/25/2024 12:00:06 - INFO - __main__ - Step 7375: {'lr': 0.000497538790035788, 'samples': 354000, 'steps': 7374, 'loss/train': 2.5779147148132324} +07/25/2024 12:00:07 - INFO - __main__ - Step 7376: {'lr': 0.0004975380536418733, 'samples': 354048, 'steps': 7375, 'loss/train': 2.043574571609497} +07/25/2024 12:00:07 - INFO - __main__ - Step 7377: {'lr': 0.0004975373171383557, 'samples': 354096, 'steps': 7376, 'loss/train': 1.8133344650268555} +07/25/2024 12:00:07 - INFO - __main__ - Step 7378: {'lr': 0.0004975365805252354, 'samples': 354144, 'steps': 7377, 'loss/train': 2.319772243499756} +07/25/2024 12:00:07 - INFO - __main__ - Step 7379: {'lr': 0.0004975358438025129, 'samples': 354192, 'steps': 7378, 'loss/train': 1.6550968885421753} +07/25/2024 12:00:08 - INFO - __main__ - Step 7380: {'lr': 0.0004975351069701885, 'samples': 354240, 'steps': 7379, 'loss/train': 1.745753288269043} +07/25/2024 12:00:08 - INFO - __main__ - Step 7381: {'lr': 0.0004975343700282624, 'samples': 354288, 'steps': 7380, 'loss/train': 2.003892660140991} +07/25/2024 12:00:08 - INFO - __main__ - Step 7382: {'lr': 0.000497533632976735, 'samples': 354336, 'steps': 7381, 'loss/train': 1.6947877407073975} +07/25/2024 12:00:09 - INFO - __main__ - Step 7383: {'lr': 0.0004975328958156068, 'samples': 354384, 'steps': 7382, 'loss/train': 2.1707561016082764} +07/25/2024 12:00:09 - INFO - __main__ - Step 7384: {'lr': 0.0004975321585448778, 'samples': 354432, 'steps': 7383, 'loss/train': 2.0350840091705322} +07/25/2024 12:00:09 - INFO - __main__ - Step 7385: {'lr': 0.0004975314211645486, 'samples': 354480, 'steps': 7384, 'loss/train': 2.3784592151641846} +07/25/2024 12:00:09 - INFO - __main__ - Step 7386: {'lr': 0.0004975306836746193, 'samples': 354528, 'steps': 7385, 'loss/train': 1.4594045877456665} +07/25/2024 12:00:10 - INFO - __main__ - Step 7387: {'lr': 0.0004975299460750905, 'samples': 354576, 'steps': 7386, 'loss/train': 1.8437752723693848} +07/25/2024 12:00:10 - INFO - __main__ - Step 7388: {'lr': 0.0004975292083659623, 'samples': 354624, 'steps': 7387, 'loss/train': 1.6463508605957031} +07/25/2024 12:00:10 - INFO - __main__ - Step 7389: {'lr': 0.0004975284705472352, 'samples': 354672, 'steps': 7388, 'loss/train': 1.8806813955307007} +07/25/2024 12:00:11 - INFO - __main__ - Step 7390: {'lr': 0.0004975277326189092, 'samples': 354720, 'steps': 7389, 'loss/train': 2.5896859169006348} +07/25/2024 12:00:11 - INFO - __main__ - Step 7391: {'lr': 0.000497526994580985, 'samples': 354768, 'steps': 7390, 'loss/train': 1.735308289527893} +07/25/2024 12:00:11 - INFO - __main__ - Step 7392: {'lr': 0.0004975262564334629, 'samples': 354816, 'steps': 7391, 'loss/train': 1.788886547088623} +07/25/2024 12:00:11 - INFO - __main__ - Step 7393: {'lr': 0.0004975255181763429, 'samples': 354864, 'steps': 7392, 'loss/train': 2.053177833557129} +07/25/2024 12:00:12 - INFO - __main__ - Step 7394: {'lr': 0.0004975247798096257, 'samples': 354912, 'steps': 7393, 'loss/train': 2.0451879501342773} +07/25/2024 12:00:12 - INFO - __main__ - Step 7395: {'lr': 0.0004975240413333114, 'samples': 354960, 'steps': 7394, 'loss/train': 1.8071388006210327} +07/25/2024 12:00:12 - INFO - __main__ - Step 7396: {'lr': 0.0004975233027474003, 'samples': 355008, 'steps': 7395, 'loss/train': 3.611309766769409} +07/25/2024 12:00:13 - INFO - __main__ - Step 7397: {'lr': 0.0004975225640518929, 'samples': 355056, 'steps': 7396, 'loss/train': 2.086387872695923} +07/25/2024 12:00:13 - INFO - __main__ - Step 7398: {'lr': 0.0004975218252467894, 'samples': 355104, 'steps': 7397, 'loss/train': 2.3575263023376465} +07/25/2024 12:00:13 - INFO - __main__ - Step 7399: {'lr': 0.0004975210863320904, 'samples': 355152, 'steps': 7398, 'loss/train': 1.0216705799102783} +07/25/2024 12:00:13 - INFO - __main__ - Step 7400: {'lr': 0.0004975203473077957, 'samples': 355200, 'steps': 7399, 'loss/train': 2.540294647216797} +07/25/2024 12:00:14 - INFO - __main__ - Step 7401: {'lr': 0.0004975196081739061, 'samples': 355248, 'steps': 7400, 'loss/train': 2.067852258682251} +07/25/2024 12:00:14 - INFO - __main__ - Step 7402: {'lr': 0.0004975188689304216, 'samples': 355296, 'steps': 7401, 'loss/train': 2.0886757373809814} +07/25/2024 12:00:14 - INFO - __main__ - Step 7403: {'lr': 0.0004975181295773428, 'samples': 355344, 'steps': 7402, 'loss/train': 2.3596978187561035} +07/25/2024 12:00:15 - INFO - __main__ - Step 7404: {'lr': 0.0004975173901146699, 'samples': 355392, 'steps': 7403, 'loss/train': 1.830951452255249} +07/25/2024 12:00:15 - INFO - __main__ - Step 7405: {'lr': 0.0004975166505424033, 'samples': 355440, 'steps': 7404, 'loss/train': 2.327284574508667} +07/25/2024 12:00:15 - INFO - __main__ - Step 7406: {'lr': 0.000497515910860543, 'samples': 355488, 'steps': 7405, 'loss/train': 1.9388012886047363} +07/25/2024 12:00:15 - INFO - __main__ - Step 7407: {'lr': 0.0004975151710690898, 'samples': 355536, 'steps': 7406, 'loss/train': 2.1559317111968994} +07/25/2024 12:00:16 - INFO - __main__ - Step 7408: {'lr': 0.0004975144311680439, 'samples': 355584, 'steps': 7407, 'loss/train': 2.083746910095215} +07/25/2024 12:00:16 - INFO - __main__ - Step 7409: {'lr': 0.0004975136911574054, 'samples': 355632, 'steps': 7408, 'loss/train': 2.5044448375701904} +07/25/2024 12:00:16 - INFO - __main__ - Step 7410: {'lr': 0.0004975129510371748, 'samples': 355680, 'steps': 7409, 'loss/train': 2.518415689468384} +07/25/2024 12:00:17 - INFO - __main__ - Step 7411: {'lr': 0.0004975122108073523, 'samples': 355728, 'steps': 7410, 'loss/train': 2.380887269973755} +07/25/2024 12:00:17 - INFO - __main__ - Step 7412: {'lr': 0.0004975114704679384, 'samples': 355776, 'steps': 7411, 'loss/train': 2.3372929096221924} +07/25/2024 12:00:17 - INFO - __main__ - Step 7413: {'lr': 0.0004975107300189335, 'samples': 355824, 'steps': 7412, 'loss/train': 2.25634765625} +07/25/2024 12:00:17 - INFO - __main__ - Step 7414: {'lr': 0.0004975099894603377, 'samples': 355872, 'steps': 7413, 'loss/train': 2.2193775177001953} +07/25/2024 12:00:18 - INFO - __main__ - Step 7415: {'lr': 0.0004975092487921513, 'samples': 355920, 'steps': 7414, 'loss/train': 2.182204484939575} +07/25/2024 12:00:18 - INFO - __main__ - Step 7416: {'lr': 0.0004975085080143748, 'samples': 355968, 'steps': 7415, 'loss/train': 2.3352251052856445} +07/25/2024 12:00:18 - INFO - __main__ - Step 7417: {'lr': 0.0004975077671270085, 'samples': 356016, 'steps': 7416, 'loss/train': 2.0047526359558105} +07/25/2024 12:00:19 - INFO - __main__ - Step 7418: {'lr': 0.0004975070261300528, 'samples': 356064, 'steps': 7417, 'loss/train': 2.0478527545928955} +07/25/2024 12:00:19 - INFO - __main__ - Step 7419: {'lr': 0.0004975062850235077, 'samples': 356112, 'steps': 7418, 'loss/train': 2.133099317550659} +07/25/2024 12:00:19 - INFO - __main__ - Step 7420: {'lr': 0.0004975055438073739, 'samples': 356160, 'steps': 7419, 'loss/train': 2.065000057220459} +07/25/2024 12:00:19 - INFO - __main__ - Step 7421: {'lr': 0.0004975048024816515, 'samples': 356208, 'steps': 7420, 'loss/train': 3.0099446773529053} +07/25/2024 12:00:20 - INFO - __main__ - Step 7422: {'lr': 0.000497504061046341, 'samples': 356256, 'steps': 7421, 'loss/train': 2.1626341342926025} +07/25/2024 12:00:20 - INFO - __main__ - Step 7423: {'lr': 0.0004975033195014426, 'samples': 356304, 'steps': 7422, 'loss/train': 2.0894906520843506} +07/25/2024 12:00:20 - INFO - __main__ - Step 7424: {'lr': 0.0004975025778469566, 'samples': 356352, 'steps': 7423, 'loss/train': 2.5593512058258057} +07/25/2024 12:00:20 - INFO - __main__ - Step 7425: {'lr': 0.0004975018360828835, 'samples': 356400, 'steps': 7424, 'loss/train': 1.9379054307937622} +07/25/2024 12:00:21 - INFO - __main__ - Step 7426: {'lr': 0.0004975010942092234, 'samples': 356448, 'steps': 7425, 'loss/train': 1.9834047555923462} +07/25/2024 12:00:21 - INFO - __main__ - Step 7427: {'lr': 0.0004975003522259769, 'samples': 356496, 'steps': 7426, 'loss/train': 1.976242184638977} +07/25/2024 12:00:21 - INFO - __main__ - Step 7428: {'lr': 0.0004974996101331441, 'samples': 356544, 'steps': 7427, 'loss/train': 1.794973373413086} +07/25/2024 12:00:22 - INFO - __main__ - Step 7429: {'lr': 0.0004974988679307253, 'samples': 356592, 'steps': 7428, 'loss/train': 1.9795286655426025} +07/25/2024 12:00:22 - INFO - __main__ - Step 7430: {'lr': 0.0004974981256187211, 'samples': 356640, 'steps': 7429, 'loss/train': 1.4586139917373657} +07/25/2024 12:00:22 - INFO - __main__ - Step 7431: {'lr': 0.0004974973831971315, 'samples': 356688, 'steps': 7430, 'loss/train': 2.3064160346984863} +07/25/2024 12:00:22 - INFO - __main__ - Step 7432: {'lr': 0.0004974966406659572, 'samples': 356736, 'steps': 7431, 'loss/train': 1.6221120357513428} +07/25/2024 12:00:23 - INFO - __main__ - Step 7433: {'lr': 0.0004974958980251982, 'samples': 356784, 'steps': 7432, 'loss/train': 1.7324888706207275} +07/25/2024 12:00:23 - INFO - __main__ - Step 7434: {'lr': 0.000497495155274855, 'samples': 356832, 'steps': 7433, 'loss/train': 2.3679518699645996} +07/25/2024 12:00:23 - INFO - __main__ - Step 7435: {'lr': 0.0004974944124149279, 'samples': 356880, 'steps': 7434, 'loss/train': 1.954719066619873} +07/25/2024 12:00:24 - INFO - __main__ - Step 7436: {'lr': 0.0004974936694454171, 'samples': 356928, 'steps': 7435, 'loss/train': 1.957721471786499} +07/25/2024 12:00:24 - INFO - __main__ - Step 7437: {'lr': 0.000497492926366323, 'samples': 356976, 'steps': 7436, 'loss/train': 2.2205750942230225} +07/25/2024 12:00:24 - INFO - __main__ - Step 7438: {'lr': 0.0004974921831776462, 'samples': 357024, 'steps': 7437, 'loss/train': 2.619152784347534} +07/25/2024 12:00:24 - INFO - __main__ - Step 7439: {'lr': 0.0004974914398793866, 'samples': 357072, 'steps': 7438, 'loss/train': 1.8065835237503052} +07/25/2024 12:00:25 - INFO - __main__ - Step 7440: {'lr': 0.0004974906964715448, 'samples': 357120, 'steps': 7439, 'loss/train': 1.8168047666549683} +07/25/2024 12:00:25 - INFO - __main__ - Step 7441: {'lr': 0.000497489952954121, 'samples': 357168, 'steps': 7440, 'loss/train': 1.8821048736572266} +07/25/2024 12:00:25 - INFO - __main__ - Step 7442: {'lr': 0.0004974892093271157, 'samples': 357216, 'steps': 7441, 'loss/train': 1.9852452278137207} +07/25/2024 12:00:26 - INFO - __main__ - Step 7443: {'lr': 0.000497488465590529, 'samples': 357264, 'steps': 7442, 'loss/train': 2.2353968620300293} +07/25/2024 12:00:26 - INFO - __main__ - Step 7444: {'lr': 0.0004974877217443614, 'samples': 357312, 'steps': 7443, 'loss/train': 1.7959725856781006} +07/25/2024 12:00:26 - INFO - __main__ - Step 7445: {'lr': 0.0004974869777886131, 'samples': 357360, 'steps': 7444, 'loss/train': 2.5967297554016113} +07/25/2024 12:00:26 - INFO - __main__ - Step 7446: {'lr': 0.0004974862337232846, 'samples': 357408, 'steps': 7445, 'loss/train': 2.317558526992798} +07/25/2024 12:00:27 - INFO - __main__ - Step 7447: {'lr': 0.000497485489548376, 'samples': 357456, 'steps': 7446, 'loss/train': 2.2921173572540283} +07/25/2024 12:00:27 - INFO - __main__ - Step 7448: {'lr': 0.0004974847452638879, 'samples': 357504, 'steps': 7447, 'loss/train': 2.3977768421173096} +07/25/2024 12:00:27 - INFO - __main__ - Step 7449: {'lr': 0.0004974840008698205, 'samples': 357552, 'steps': 7448, 'loss/train': 2.0660407543182373} +07/25/2024 12:00:28 - INFO - __main__ - Step 7450: {'lr': 0.000497483256366174, 'samples': 357600, 'steps': 7449, 'loss/train': 1.7883390188217163} +07/25/2024 12:00:28 - INFO - __main__ - Step 7451: {'lr': 0.0004974825117529489, 'samples': 357648, 'steps': 7450, 'loss/train': 2.1866140365600586} +07/25/2024 12:00:28 - INFO - __main__ - Step 7452: {'lr': 0.0004974817670301455, 'samples': 357696, 'steps': 7451, 'loss/train': 2.1276652812957764} +07/25/2024 12:00:28 - INFO - __main__ - Step 7453: {'lr': 0.0004974810221977641, 'samples': 357744, 'steps': 7452, 'loss/train': 2.292081356048584} +07/25/2024 12:00:29 - INFO - __main__ - Step 7454: {'lr': 0.0004974802772558049, 'samples': 357792, 'steps': 7453, 'loss/train': 1.7898380756378174} +07/25/2024 12:00:29 - INFO - __main__ - Step 7455: {'lr': 0.0004974795322042686, 'samples': 357840, 'steps': 7454, 'loss/train': 2.0330543518066406} +07/25/2024 12:00:29 - INFO - __main__ - Step 7456: {'lr': 0.0004974787870431551, 'samples': 357888, 'steps': 7455, 'loss/train': 1.9546090364456177} +07/25/2024 12:00:30 - INFO - __main__ - Step 7457: {'lr': 0.000497478041772465, 'samples': 357936, 'steps': 7456, 'loss/train': 2.860015869140625} +07/25/2024 12:00:30 - INFO - __main__ - Step 7458: {'lr': 0.0004974772963921986, 'samples': 357984, 'steps': 7457, 'loss/train': 1.783592939376831} +07/25/2024 12:00:30 - INFO - __main__ - Step 7459: {'lr': 0.0004974765509023561, 'samples': 358032, 'steps': 7458, 'loss/train': 2.0893537998199463} +07/25/2024 12:00:30 - INFO - __main__ - Step 7460: {'lr': 0.000497475805302938, 'samples': 358080, 'steps': 7459, 'loss/train': 1.9406346082687378} +07/25/2024 12:00:31 - INFO - __main__ - Step 7461: {'lr': 0.0004974750595939445, 'samples': 358128, 'steps': 7460, 'loss/train': 2.0493149757385254} +07/25/2024 12:00:31 - INFO - __main__ - Step 7462: {'lr': 0.0004974743137753762, 'samples': 358176, 'steps': 7461, 'loss/train': 1.910670518875122} +07/25/2024 12:00:31 - INFO - __main__ - Step 7463: {'lr': 0.0004974735678472329, 'samples': 358224, 'steps': 7462, 'loss/train': 2.02184796333313} +07/25/2024 12:00:32 - INFO - __main__ - Step 7464: {'lr': 0.0004974728218095154, 'samples': 358272, 'steps': 7463, 'loss/train': 0.6543272137641907} +07/25/2024 12:00:32 - INFO - __main__ - Step 7465: {'lr': 0.0004974720756622238, 'samples': 358320, 'steps': 7464, 'loss/train': 2.1857786178588867} +07/25/2024 12:00:32 - INFO - __main__ - Step 7466: {'lr': 0.0004974713294053584, 'samples': 358368, 'steps': 7465, 'loss/train': 1.7601587772369385} +07/25/2024 12:00:32 - INFO - __main__ - Step 7467: {'lr': 0.0004974705830389197, 'samples': 358416, 'steps': 7466, 'loss/train': 1.9301396608352661} +07/25/2024 12:00:33 - INFO - __main__ - Step 7468: {'lr': 0.0004974698365629081, 'samples': 358464, 'steps': 7467, 'loss/train': 0.4896204471588135} +07/25/2024 12:00:33 - INFO - __main__ - Step 7469: {'lr': 0.0004974690899773236, 'samples': 358512, 'steps': 7468, 'loss/train': 2.4688022136688232} +07/25/2024 12:00:33 - INFO - __main__ - Step 7470: {'lr': 0.000497468343282167, 'samples': 358560, 'steps': 7469, 'loss/train': 1.3466185331344604} +07/25/2024 12:00:34 - INFO - __main__ - Step 7471: {'lr': 0.0004974675964774381, 'samples': 358608, 'steps': 7470, 'loss/train': 1.9883838891983032} +07/25/2024 12:00:34 - INFO - __main__ - Step 7472: {'lr': 0.0004974668495631375, 'samples': 358656, 'steps': 7471, 'loss/train': 2.344762086868286} +07/25/2024 12:00:34 - INFO - __main__ - Step 7473: {'lr': 0.0004974661025392656, 'samples': 358704, 'steps': 7472, 'loss/train': 2.182661533355713} +07/25/2024 12:00:34 - INFO - __main__ - Step 7474: {'lr': 0.0004974653554058226, 'samples': 358752, 'steps': 7473, 'loss/train': 2.249436140060425} +07/25/2024 12:00:35 - INFO - __main__ - Step 7475: {'lr': 0.0004974646081628088, 'samples': 358800, 'steps': 7474, 'loss/train': 2.0632224082946777} +07/25/2024 12:00:35 - INFO - __main__ - Step 7476: {'lr': 0.0004974638608102247, 'samples': 358848, 'steps': 7475, 'loss/train': 2.0237748622894287} +07/25/2024 12:00:35 - INFO - __main__ - Step 7477: {'lr': 0.0004974631133480706, 'samples': 358896, 'steps': 7476, 'loss/train': 2.5589990615844727} +07/25/2024 12:00:36 - INFO - __main__ - Step 7478: {'lr': 0.0004974623657763467, 'samples': 358944, 'steps': 7477, 'loss/train': 1.8047754764556885} +07/25/2024 12:00:36 - INFO - __main__ - Step 7479: {'lr': 0.0004974616180950534, 'samples': 358992, 'steps': 7478, 'loss/train': 1.9599967002868652} +07/25/2024 12:00:36 - INFO - __main__ - Step 7480: {'lr': 0.0004974608703041909, 'samples': 359040, 'steps': 7479, 'loss/train': 2.346050977706909} +07/25/2024 12:00:36 - INFO - __main__ - Step 7481: {'lr': 0.0004974601224037598, 'samples': 359088, 'steps': 7480, 'loss/train': 2.7618701457977295} +07/25/2024 12:00:37 - INFO - __main__ - Step 7482: {'lr': 0.0004974593743937604, 'samples': 359136, 'steps': 7481, 'loss/train': 2.292534828186035} +07/25/2024 12:00:37 - INFO - __main__ - Step 7483: {'lr': 0.0004974586262741927, 'samples': 359184, 'steps': 7482, 'loss/train': 2.083501100540161} +07/25/2024 12:00:37 - INFO - __main__ - Step 7484: {'lr': 0.0004974578780450574, 'samples': 359232, 'steps': 7483, 'loss/train': 2.3684134483337402} +07/25/2024 12:00:37 - INFO - __main__ - Step 7485: {'lr': 0.0004974571297063546, 'samples': 359280, 'steps': 7484, 'loss/train': 2.5173654556274414} +07/25/2024 12:00:38 - INFO - __main__ - Step 7486: {'lr': 0.0004974563812580846, 'samples': 359328, 'steps': 7485, 'loss/train': 2.496281862258911} +07/25/2024 12:00:38 - INFO - __main__ - Step 7487: {'lr': 0.000497455632700248, 'samples': 359376, 'steps': 7486, 'loss/train': 2.2660205364227295} +07/25/2024 12:00:38 - INFO - __main__ - Step 7488: {'lr': 0.0004974548840328451, 'samples': 359424, 'steps': 7487, 'loss/train': 2.126884937286377} +07/25/2024 12:00:39 - INFO - __main__ - Step 7489: {'lr': 0.000497454135255876, 'samples': 359472, 'steps': 7488, 'loss/train': 2.153578042984009} +07/25/2024 12:00:39 - INFO - __main__ - Step 7490: {'lr': 0.0004974533863693412, 'samples': 359520, 'steps': 7489, 'loss/train': 1.773816466331482} +07/25/2024 12:00:39 - INFO - __main__ - Step 7491: {'lr': 0.0004974526373732408, 'samples': 359568, 'steps': 7490, 'loss/train': 2.109530448913574} +07/25/2024 12:00:39 - INFO - __main__ - Step 7492: {'lr': 0.0004974518882675754, 'samples': 359616, 'steps': 7491, 'loss/train': 1.807002067565918} +07/25/2024 12:00:40 - INFO - __main__ - Step 7493: {'lr': 0.0004974511390523453, 'samples': 359664, 'steps': 7492, 'loss/train': 2.5696218013763428} +07/25/2024 12:00:40 - INFO - __main__ - Step 7494: {'lr': 0.0004974503897275507, 'samples': 359712, 'steps': 7493, 'loss/train': 2.032634735107422} +07/25/2024 12:00:40 - INFO - __main__ - Step 7495: {'lr': 0.0004974496402931919, 'samples': 359760, 'steps': 7494, 'loss/train': 2.0750622749328613} +07/25/2024 12:00:41 - INFO - __main__ - Step 7496: {'lr': 0.0004974488907492695, 'samples': 359808, 'steps': 7495, 'loss/train': 1.7108975648880005} +07/25/2024 12:00:41 - INFO - __main__ - Step 7497: {'lr': 0.0004974481410957837, 'samples': 359856, 'steps': 7496, 'loss/train': 1.735080599784851} +07/25/2024 12:00:41 - INFO - __main__ - Step 7498: {'lr': 0.0004974473913327346, 'samples': 359904, 'steps': 7497, 'loss/train': 2.327422618865967} +07/25/2024 12:00:41 - INFO - __main__ - Step 7499: {'lr': 0.0004974466414601229, 'samples': 359952, 'steps': 7498, 'loss/train': 2.069739818572998} +07/25/2024 12:00:42 - INFO - __main__ - Step 7500: {'lr': 0.0004974458914779488, 'samples': 360000, 'steps': 7499, 'loss/train': 1.5058765411376953} +07/25/2024 12:00:42 - INFO - __main__ - Step 7501: {'lr': 0.0004974451413862124, 'samples': 360048, 'steps': 7500, 'loss/train': 2.537777900695801} +07/25/2024 12:00:42 - INFO - __main__ - Step 7502: {'lr': 0.0004974443911849143, 'samples': 360096, 'steps': 7501, 'loss/train': 1.5610674619674683} +07/25/2024 12:00:43 - INFO - __main__ - Step 7503: {'lr': 0.0004974436408740548, 'samples': 360144, 'steps': 7502, 'loss/train': 2.1270546913146973} +07/25/2024 12:00:43 - INFO - __main__ - Step 7504: {'lr': 0.0004974428904536341, 'samples': 360192, 'steps': 7503, 'loss/train': 2.1345295906066895} +07/25/2024 12:00:43 - INFO - __main__ - Step 7505: {'lr': 0.0004974421399236527, 'samples': 360240, 'steps': 7504, 'loss/train': 1.7636584043502808} +07/25/2024 12:00:43 - INFO - __main__ - Step 7506: {'lr': 0.0004974413892841109, 'samples': 360288, 'steps': 7505, 'loss/train': 1.6790008544921875} +07/25/2024 12:00:44 - INFO - __main__ - Step 7507: {'lr': 0.0004974406385350089, 'samples': 360336, 'steps': 7506, 'loss/train': 2.68168044090271} +07/25/2024 12:00:44 - INFO - __main__ - Step 7508: {'lr': 0.000497439887676347, 'samples': 360384, 'steps': 7507, 'loss/train': 2.3393478393554688} +07/25/2024 12:00:44 - INFO - __main__ - Step 7509: {'lr': 0.000497439136708126, 'samples': 360432, 'steps': 7508, 'loss/train': 2.0581493377685547} +07/25/2024 12:00:45 - INFO - __main__ - Step 7510: {'lr': 0.0004974383856303456, 'samples': 360480, 'steps': 7509, 'loss/train': 1.7817473411560059} +07/25/2024 12:00:45 - INFO - __main__ - Step 7511: {'lr': 0.0004974376344430065, 'samples': 360528, 'steps': 7510, 'loss/train': 2.772034168243408} +07/25/2024 12:00:45 - INFO - __main__ - Step 7512: {'lr': 0.000497436883146109, 'samples': 360576, 'steps': 7511, 'loss/train': 2.2015533447265625} +07/25/2024 12:00:45 - INFO - __main__ - Step 7513: {'lr': 0.0004974361317396532, 'samples': 360624, 'steps': 7512, 'loss/train': 2.2437479496002197} +07/25/2024 12:00:46 - INFO - __main__ - Step 7514: {'lr': 0.0004974353802236397, 'samples': 360672, 'steps': 7513, 'loss/train': 1.992369532585144} +07/25/2024 12:00:46 - INFO - __main__ - Step 7515: {'lr': 0.0004974346285980688, 'samples': 360720, 'steps': 7514, 'loss/train': 2.2410128116607666} +07/25/2024 12:00:46 - INFO - __main__ - Step 7516: {'lr': 0.0004974338768629408, 'samples': 360768, 'steps': 7515, 'loss/train': 2.208986520767212} +07/25/2024 12:00:47 - INFO - __main__ - Step 7517: {'lr': 0.000497433125018256, 'samples': 360816, 'steps': 7516, 'loss/train': 1.5331388711929321} +07/25/2024 12:00:47 - INFO - __main__ - Step 7518: {'lr': 0.0004974323730640147, 'samples': 360864, 'steps': 7517, 'loss/train': 2.045835018157959} +07/25/2024 12:00:47 - INFO - __main__ - Step 7519: {'lr': 0.0004974316210002173, 'samples': 360912, 'steps': 7518, 'loss/train': 1.8056416511535645} +07/25/2024 12:00:47 - INFO - __main__ - Step 7520: {'lr': 0.0004974308688268641, 'samples': 360960, 'steps': 7519, 'loss/train': 1.7996790409088135} +07/25/2024 12:00:48 - INFO - __main__ - Step 7521: {'lr': 0.0004974301165439555, 'samples': 361008, 'steps': 7520, 'loss/train': 1.7870694398880005} +07/25/2024 12:00:48 - INFO - __main__ - Step 7522: {'lr': 0.0004974293641514917, 'samples': 361056, 'steps': 7521, 'loss/train': 2.141388177871704} +07/25/2024 12:00:48 - INFO - __main__ - Step 7523: {'lr': 0.0004974286116494731, 'samples': 361104, 'steps': 7522, 'loss/train': 1.750646948814392} +07/25/2024 12:00:49 - INFO - __main__ - Step 7524: {'lr': 0.0004974278590379001, 'samples': 361152, 'steps': 7523, 'loss/train': 1.3814343214035034} +07/25/2024 12:00:49 - INFO - __main__ - Step 7525: {'lr': 0.0004974271063167729, 'samples': 361200, 'steps': 7524, 'loss/train': 2.3137426376342773} +07/25/2024 12:00:49 - INFO - __main__ - Step 7526: {'lr': 0.000497426353486092, 'samples': 361248, 'steps': 7525, 'loss/train': 1.2878590822219849} +07/25/2024 12:00:49 - INFO - __main__ - Step 7527: {'lr': 0.0004974256005458576, 'samples': 361296, 'steps': 7526, 'loss/train': 2.251997947692871} +07/25/2024 12:00:50 - INFO - __main__ - Step 7528: {'lr': 0.0004974248474960701, 'samples': 361344, 'steps': 7527, 'loss/train': 2.6011483669281006} +07/25/2024 12:00:50 - INFO - __main__ - Step 7529: {'lr': 0.0004974240943367298, 'samples': 361392, 'steps': 7528, 'loss/train': 1.98617684841156} +07/25/2024 12:00:50 - INFO - __main__ - Step 7530: {'lr': 0.000497423341067837, 'samples': 361440, 'steps': 7529, 'loss/train': 2.1108906269073486} +07/25/2024 12:00:51 - INFO - __main__ - Step 7531: {'lr': 0.0004974225876893922, 'samples': 361488, 'steps': 7530, 'loss/train': 3.130383253097534} +07/25/2024 12:00:51 - INFO - __main__ - Step 7532: {'lr': 0.0004974218342013955, 'samples': 361536, 'steps': 7531, 'loss/train': 2.3303210735321045} +07/25/2024 12:00:51 - INFO - __main__ - Step 7533: {'lr': 0.0004974210806038473, 'samples': 361584, 'steps': 7532, 'loss/train': 2.2307324409484863} +07/25/2024 12:00:51 - INFO - __main__ - Step 7534: {'lr': 0.0004974203268967481, 'samples': 361632, 'steps': 7533, 'loss/train': 1.7293691635131836} +07/25/2024 12:00:52 - INFO - __main__ - Step 7535: {'lr': 0.0004974195730800982, 'samples': 361680, 'steps': 7534, 'loss/train': 1.2235603332519531} +07/25/2024 12:00:52 - INFO - __main__ - Step 7536: {'lr': 0.0004974188191538976, 'samples': 361728, 'steps': 7535, 'loss/train': 2.31330943107605} +07/25/2024 12:00:52 - INFO - __main__ - Step 7537: {'lr': 0.000497418065118147, 'samples': 361776, 'steps': 7536, 'loss/train': 2.1998660564422607} +07/25/2024 12:00:53 - INFO - __main__ - Step 7538: {'lr': 0.0004974173109728466, 'samples': 361824, 'steps': 7537, 'loss/train': 1.7911595106124878} +07/25/2024 12:00:53 - INFO - __main__ - Step 7539: {'lr': 0.0004974165567179968, 'samples': 361872, 'steps': 7538, 'loss/train': 1.7384681701660156} +07/25/2024 12:00:53 - INFO - __main__ - Step 7540: {'lr': 0.0004974158023535978, 'samples': 361920, 'steps': 7539, 'loss/train': 2.1023244857788086} +07/25/2024 12:00:53 - INFO - __main__ - Step 7541: {'lr': 0.0004974150478796501, 'samples': 361968, 'steps': 7540, 'loss/train': 1.196591854095459} +07/25/2024 12:00:54 - INFO - __main__ - Step 7542: {'lr': 0.0004974142932961538, 'samples': 362016, 'steps': 7541, 'loss/train': 1.8137180805206299} +07/25/2024 12:00:54 - INFO - __main__ - Step 7543: {'lr': 0.0004974135386031095, 'samples': 362064, 'steps': 7542, 'loss/train': 2.767256498336792} +07/25/2024 12:00:54 - INFO - __main__ - Step 7544: {'lr': 0.0004974127838005175, 'samples': 362112, 'steps': 7543, 'loss/train': 1.3764088153839111} +07/25/2024 12:00:55 - INFO - __main__ - Step 7545: {'lr': 0.0004974120288883779, 'samples': 362160, 'steps': 7544, 'loss/train': 2.1061465740203857} +07/25/2024 12:00:55 - INFO - __main__ - Step 7546: {'lr': 0.0004974112738666913, 'samples': 362208, 'steps': 7545, 'loss/train': 1.5804483890533447} +07/25/2024 12:00:55 - INFO - __main__ - Step 7547: {'lr': 0.0004974105187354578, 'samples': 362256, 'steps': 7546, 'loss/train': 1.9000203609466553} +07/25/2024 12:00:55 - INFO - __main__ - Step 7548: {'lr': 0.0004974097634946779, 'samples': 362304, 'steps': 7547, 'loss/train': 2.0294506549835205} +07/25/2024 12:00:56 - INFO - __main__ - Step 7549: {'lr': 0.0004974090081443519, 'samples': 362352, 'steps': 7548, 'loss/train': 1.9470793008804321} +07/25/2024 12:00:56 - INFO - __main__ - Step 7550: {'lr': 0.0004974082526844801, 'samples': 362400, 'steps': 7549, 'loss/train': 1.2441614866256714} +07/25/2024 12:00:56 - INFO - __main__ - Step 7551: {'lr': 0.0004974074971150629, 'samples': 362448, 'steps': 7550, 'loss/train': 1.631978988647461} +07/25/2024 12:00:56 - INFO - __main__ - Step 7552: {'lr': 0.0004974067414361006, 'samples': 362496, 'steps': 7551, 'loss/train': 2.2394540309906006} +07/25/2024 12:00:57 - INFO - __main__ - Step 7553: {'lr': 0.0004974059856475935, 'samples': 362544, 'steps': 7552, 'loss/train': 1.9084299802780151} +07/25/2024 12:00:57 - INFO - __main__ - Step 7554: {'lr': 0.0004974052297495419, 'samples': 362592, 'steps': 7553, 'loss/train': 2.1613142490386963} +07/25/2024 12:00:57 - INFO - __main__ - Step 7555: {'lr': 0.0004974044737419463, 'samples': 362640, 'steps': 7554, 'loss/train': 3.2274203300476074} +07/25/2024 12:00:58 - INFO - __main__ - Step 7556: {'lr': 0.0004974037176248069, 'samples': 362688, 'steps': 7555, 'loss/train': 2.514436721801758} +07/25/2024 12:00:58 - INFO - __main__ - Step 7557: {'lr': 0.0004974029613981241, 'samples': 362736, 'steps': 7556, 'loss/train': 1.9554386138916016} +07/25/2024 12:00:58 - INFO - __main__ - Step 7558: {'lr': 0.0004974022050618981, 'samples': 362784, 'steps': 7557, 'loss/train': 2.1484932899475098} +07/25/2024 12:00:58 - INFO - __main__ - Step 7559: {'lr': 0.0004974014486161295, 'samples': 362832, 'steps': 7558, 'loss/train': 1.7870184183120728} +07/25/2024 12:00:59 - INFO - __main__ - Step 7560: {'lr': 0.0004974006920608183, 'samples': 362880, 'steps': 7559, 'loss/train': 2.1862597465515137} +07/25/2024 12:00:59 - INFO - __main__ - Step 7561: {'lr': 0.000497399935395965, 'samples': 362928, 'steps': 7560, 'loss/train': 2.0810813903808594} +07/25/2024 12:00:59 - INFO - __main__ - Step 7562: {'lr': 0.00049739917862157, 'samples': 362976, 'steps': 7561, 'loss/train': 1.9498326778411865} +07/25/2024 12:01:00 - INFO - __main__ - Step 7563: {'lr': 0.0004973984217376337, 'samples': 363024, 'steps': 7562, 'loss/train': 1.6229488849639893} +07/25/2024 12:01:00 - INFO - __main__ - Step 7564: {'lr': 0.0004973976647441561, 'samples': 363072, 'steps': 7563, 'loss/train': 1.9204615354537964} +07/25/2024 12:01:00 - INFO - __main__ - Step 7565: {'lr': 0.0004973969076411379, 'samples': 363120, 'steps': 7564, 'loss/train': 1.9788844585418701} +07/25/2024 12:01:00 - INFO - __main__ - Step 7566: {'lr': 0.0004973961504285792, 'samples': 363168, 'steps': 7565, 'loss/train': 2.0018794536590576} +07/25/2024 12:01:01 - INFO - __main__ - Step 7567: {'lr': 0.0004973953931064805, 'samples': 363216, 'steps': 7566, 'loss/train': 2.138824224472046} +07/25/2024 12:01:01 - INFO - __main__ - Step 7568: {'lr': 0.0004973946356748419, 'samples': 363264, 'steps': 7567, 'loss/train': 1.2129098176956177} +07/25/2024 12:01:01 - INFO - __main__ - Step 7569: {'lr': 0.000497393878133664, 'samples': 363312, 'steps': 7568, 'loss/train': 1.5087963342666626} +07/25/2024 12:01:02 - INFO - __main__ - Step 7570: {'lr': 0.000497393120482947, 'samples': 363360, 'steps': 7569, 'loss/train': 1.9327560663223267} +07/25/2024 12:01:02 - INFO - __main__ - Step 7571: {'lr': 0.0004973923627226913, 'samples': 363408, 'steps': 7570, 'loss/train': 1.4195102453231812} +07/25/2024 12:01:02 - INFO - __main__ - Step 7572: {'lr': 0.0004973916048528972, 'samples': 363456, 'steps': 7571, 'loss/train': 2.0256266593933105} +07/25/2024 12:01:02 - INFO - __main__ - Step 7573: {'lr': 0.000497390846873565, 'samples': 363504, 'steps': 7572, 'loss/train': 1.8809993267059326} +07/25/2024 12:01:03 - INFO - __main__ - Step 7574: {'lr': 0.000497390088784695, 'samples': 363552, 'steps': 7573, 'loss/train': 1.0424211025238037} +07/25/2024 12:01:03 - INFO - __main__ - Step 7575: {'lr': 0.0004973893305862876, 'samples': 363600, 'steps': 7574, 'loss/train': 1.2733328342437744} +07/25/2024 12:01:03 - INFO - __main__ - Step 7576: {'lr': 0.0004973885722783432, 'samples': 363648, 'steps': 7575, 'loss/train': 1.7903860807418823} +07/25/2024 12:01:04 - INFO - __main__ - Step 7577: {'lr': 0.000497387813860862, 'samples': 363696, 'steps': 7576, 'loss/train': 2.0995030403137207} +07/25/2024 12:01:04 - INFO - __main__ - Step 7578: {'lr': 0.0004973870553338446, 'samples': 363744, 'steps': 7577, 'loss/train': 1.8554677963256836} +07/25/2024 12:01:04 - INFO - __main__ - Step 7579: {'lr': 0.0004973862966972909, 'samples': 363792, 'steps': 7578, 'loss/train': 2.662992238998413} +07/25/2024 12:01:04 - INFO - __main__ - Step 7580: {'lr': 0.0004973855379512016, 'samples': 363840, 'steps': 7579, 'loss/train': 2.0281951427459717} +07/25/2024 12:01:05 - INFO - __main__ - Step 7581: {'lr': 0.0004973847790955769, 'samples': 363888, 'steps': 7580, 'loss/train': 1.5887598991394043} +07/25/2024 12:01:05 - INFO - __main__ - Step 7582: {'lr': 0.0004973840201304172, 'samples': 363936, 'steps': 7581, 'loss/train': 1.8691823482513428} +07/25/2024 12:01:05 - INFO - __main__ - Step 7583: {'lr': 0.0004973832610557227, 'samples': 363984, 'steps': 7582, 'loss/train': 2.453737735748291} +07/25/2024 12:01:06 - INFO - __main__ - Step 7584: {'lr': 0.0004973825018714939, 'samples': 364032, 'steps': 7583, 'loss/train': 2.8630104064941406} +07/25/2024 12:01:06 - INFO - __main__ - Step 7585: {'lr': 0.000497381742577731, 'samples': 364080, 'steps': 7584, 'loss/train': 2.048081874847412} +07/25/2024 12:01:06 - INFO - __main__ - Step 7586: {'lr': 0.0004973809831744345, 'samples': 364128, 'steps': 7585, 'loss/train': 1.9972004890441895} +07/25/2024 12:01:06 - INFO - __main__ - Step 7587: {'lr': 0.0004973802236616045, 'samples': 364176, 'steps': 7586, 'loss/train': 2.204789400100708} +07/25/2024 12:01:07 - INFO - __main__ - Step 7588: {'lr': 0.0004973794640392415, 'samples': 364224, 'steps': 7587, 'loss/train': 2.1202168464660645} +07/25/2024 12:01:07 - INFO - __main__ - Step 7589: {'lr': 0.0004973787043073459, 'samples': 364272, 'steps': 7588, 'loss/train': 2.19248628616333} +07/25/2024 12:01:07 - INFO - __main__ - Step 7590: {'lr': 0.0004973779444659178, 'samples': 364320, 'steps': 7589, 'loss/train': 2.210496187210083} +07/25/2024 12:01:08 - INFO - __main__ - Step 7591: {'lr': 0.0004973771845149578, 'samples': 364368, 'steps': 7590, 'loss/train': 2.181157112121582} +07/25/2024 12:01:08 - INFO - __main__ - Step 7592: {'lr': 0.000497376424454466, 'samples': 364416, 'steps': 7591, 'loss/train': 1.250518560409546} +07/25/2024 12:01:08 - INFO - __main__ - Step 7593: {'lr': 0.0004973756642844429, 'samples': 364464, 'steps': 7592, 'loss/train': 2.1151561737060547} +07/25/2024 12:01:08 - INFO - __main__ - Step 7594: {'lr': 0.0004973749040048888, 'samples': 364512, 'steps': 7593, 'loss/train': 1.647731065750122} +07/25/2024 12:01:09 - INFO - __main__ - Step 7595: {'lr': 0.0004973741436158041, 'samples': 364560, 'steps': 7594, 'loss/train': 1.8662519454956055} +07/25/2024 12:01:09 - INFO - __main__ - Step 7596: {'lr': 0.000497373383117189, 'samples': 364608, 'steps': 7595, 'loss/train': 2.347627639770508} +07/25/2024 12:01:09 - INFO - __main__ - Step 7597: {'lr': 0.0004973726225090438, 'samples': 364656, 'steps': 7596, 'loss/train': 1.5452308654785156} +07/25/2024 12:01:10 - INFO - __main__ - Step 7598: {'lr': 0.0004973718617913691, 'samples': 364704, 'steps': 7597, 'loss/train': 1.0979820489883423} +07/25/2024 12:01:10 - INFO - __main__ - Step 7599: {'lr': 0.0004973711009641649, 'samples': 364752, 'steps': 7598, 'loss/train': 2.2160427570343018} +07/25/2024 12:01:10 - INFO - __main__ - Step 7600: {'lr': 0.0004973703400274319, 'samples': 364800, 'steps': 7599, 'loss/train': 2.0730350017547607} +07/25/2024 12:01:10 - INFO - __main__ - Step 7601: {'lr': 0.0004973695789811701, 'samples': 364848, 'steps': 7600, 'loss/train': 1.3479301929473877} +07/25/2024 12:01:11 - INFO - __main__ - Step 7602: {'lr': 0.00049736881782538, 'samples': 364896, 'steps': 7601, 'loss/train': 2.2023255825042725} +07/25/2024 12:01:11 - INFO - __main__ - Step 7603: {'lr': 0.0004973680565600618, 'samples': 364944, 'steps': 7602, 'loss/train': 2.625422954559326} +07/25/2024 12:01:11 - INFO - __main__ - Step 7604: {'lr': 0.0004973672951852162, 'samples': 364992, 'steps': 7603, 'loss/train': 2.2062318325042725} +07/25/2024 12:01:12 - INFO - __main__ - Step 7605: {'lr': 0.0004973665337008431, 'samples': 365040, 'steps': 7604, 'loss/train': 2.1542394161224365} +07/25/2024 12:01:12 - INFO - __main__ - Step 7606: {'lr': 0.0004973657721069432, 'samples': 365088, 'steps': 7605, 'loss/train': 2.167145013809204} +07/25/2024 12:01:12 - INFO - __main__ - Step 7607: {'lr': 0.0004973650104035165, 'samples': 365136, 'steps': 7606, 'loss/train': 2.1070616245269775} +07/25/2024 12:01:12 - INFO - __main__ - Step 7608: {'lr': 0.0004973642485905637, 'samples': 365184, 'steps': 7607, 'loss/train': 2.248077392578125} +07/25/2024 12:01:13 - INFO - __main__ - Step 7609: {'lr': 0.0004973634866680849, 'samples': 365232, 'steps': 7608, 'loss/train': 2.0916850566864014} +07/25/2024 12:01:13 - INFO - __main__ - Step 7610: {'lr': 0.0004973627246360803, 'samples': 365280, 'steps': 7609, 'loss/train': 2.0857274532318115} +07/25/2024 12:01:13 - INFO - __main__ - Step 7611: {'lr': 0.0004973619624945505, 'samples': 365328, 'steps': 7610, 'loss/train': 2.2106308937072754} +07/25/2024 12:01:13 - INFO - __main__ - Step 7612: {'lr': 0.0004973612002434958, 'samples': 365376, 'steps': 7611, 'loss/train': 2.452357292175293} +07/25/2024 12:01:14 - INFO - __main__ - Step 7613: {'lr': 0.0004973604378829165, 'samples': 365424, 'steps': 7612, 'loss/train': 0.588740348815918} +07/25/2024 12:01:14 - INFO - __main__ - Step 7614: {'lr': 0.0004973596754128129, 'samples': 365472, 'steps': 7613, 'loss/train': 2.5331273078918457} +07/25/2024 12:01:14 - INFO - __main__ - Step 7615: {'lr': 0.0004973589128331854, 'samples': 365520, 'steps': 7614, 'loss/train': 2.1163508892059326} +07/25/2024 12:01:15 - INFO - __main__ - Step 7616: {'lr': 0.0004973581501440342, 'samples': 365568, 'steps': 7615, 'loss/train': 1.1439682245254517} +07/25/2024 12:01:15 - INFO - __main__ - Step 7617: {'lr': 0.0004973573873453598, 'samples': 365616, 'steps': 7616, 'loss/train': 1.9105503559112549} +07/25/2024 12:01:15 - INFO - __main__ - Step 7618: {'lr': 0.0004973566244371626, 'samples': 365664, 'steps': 7617, 'loss/train': 1.8098067045211792} +07/25/2024 12:01:15 - INFO - __main__ - Step 7619: {'lr': 0.0004973558614194427, 'samples': 365712, 'steps': 7618, 'loss/train': 2.063772201538086} +07/25/2024 12:01:16 - INFO - __main__ - Step 7620: {'lr': 0.0004973550982922004, 'samples': 365760, 'steps': 7619, 'loss/train': 1.7926183938980103} +07/25/2024 12:01:16 - INFO - __main__ - Step 7621: {'lr': 0.0004973543350554365, 'samples': 365808, 'steps': 7620, 'loss/train': 1.8525091409683228} +07/25/2024 12:01:16 - INFO - __main__ - Step 7622: {'lr': 0.0004973535717091509, 'samples': 365856, 'steps': 7621, 'loss/train': 1.4522814750671387} +07/25/2024 12:01:17 - INFO - __main__ - Step 7623: {'lr': 0.0004973528082533441, 'samples': 365904, 'steps': 7622, 'loss/train': 2.202219009399414} +07/25/2024 12:01:17 - INFO - __main__ - Step 7624: {'lr': 0.0004973520446880164, 'samples': 365952, 'steps': 7623, 'loss/train': 1.9040002822875977} +07/25/2024 12:01:17 - INFO - __main__ - Step 7625: {'lr': 0.000497351281013168, 'samples': 366000, 'steps': 7624, 'loss/train': 2.1950483322143555} +07/25/2024 12:01:17 - INFO - __main__ - Step 7626: {'lr': 0.0004973505172287995, 'samples': 366048, 'steps': 7625, 'loss/train': 1.938924789428711} +07/25/2024 12:01:18 - INFO - __main__ - Step 7627: {'lr': 0.0004973497533349111, 'samples': 366096, 'steps': 7626, 'loss/train': 2.253023624420166} +07/25/2024 12:01:18 - INFO - __main__ - Step 7628: {'lr': 0.0004973489893315032, 'samples': 366144, 'steps': 7627, 'loss/train': 1.9264990091323853} +07/25/2024 12:01:18 - INFO - __main__ - Step 7629: {'lr': 0.000497348225218576, 'samples': 366192, 'steps': 7628, 'loss/train': 2.1689369678497314} +07/25/2024 12:01:19 - INFO - __main__ - Step 7630: {'lr': 0.00049734746099613, 'samples': 366240, 'steps': 7629, 'loss/train': 1.9928302764892578} +07/25/2024 12:01:19 - INFO - __main__ - Step 7631: {'lr': 0.0004973466966641656, 'samples': 366288, 'steps': 7630, 'loss/train': 2.3051722049713135} +07/25/2024 12:01:19 - INFO - __main__ - Step 7632: {'lr': 0.0004973459322226829, 'samples': 366336, 'steps': 7631, 'loss/train': 2.3652637004852295} +07/25/2024 12:01:19 - INFO - __main__ - Step 7633: {'lr': 0.0004973451676716823, 'samples': 366384, 'steps': 7632, 'loss/train': 1.8012111186981201} +07/25/2024 12:01:20 - INFO - __main__ - Step 7634: {'lr': 0.0004973444030111642, 'samples': 366432, 'steps': 7633, 'loss/train': 2.233081579208374} +07/25/2024 12:01:20 - INFO - __main__ - Step 7635: {'lr': 0.000497343638241129, 'samples': 366480, 'steps': 7634, 'loss/train': 2.111697196960449} +07/25/2024 12:01:20 - INFO - __main__ - Step 7636: {'lr': 0.000497342873361577, 'samples': 366528, 'steps': 7635, 'loss/train': 2.3419156074523926} +07/25/2024 12:01:21 - INFO - __main__ - Step 7637: {'lr': 0.0004973421083725084, 'samples': 366576, 'steps': 7636, 'loss/train': 2.270420789718628} +07/25/2024 12:01:21 - INFO - __main__ - Step 7638: {'lr': 0.0004973413432739237, 'samples': 366624, 'steps': 7637, 'loss/train': 1.7751047611236572} +07/25/2024 12:01:21 - INFO - __main__ - Step 7639: {'lr': 0.0004973405780658232, 'samples': 366672, 'steps': 7638, 'loss/train': 2.328033924102783} +07/25/2024 12:01:21 - INFO - __main__ - Step 7640: {'lr': 0.0004973398127482071, 'samples': 366720, 'steps': 7639, 'loss/train': 1.1721129417419434} +07/25/2024 12:01:22 - INFO - __main__ - Step 7641: {'lr': 0.0004973390473210761, 'samples': 366768, 'steps': 7640, 'loss/train': 2.536313056945801} +07/25/2024 12:01:22 - INFO - __main__ - Step 7642: {'lr': 0.00049733828178443, 'samples': 366816, 'steps': 7641, 'loss/train': 2.2116270065307617} +07/25/2024 12:01:22 - INFO - __main__ - Step 7643: {'lr': 0.0004973375161382696, 'samples': 366864, 'steps': 7642, 'loss/train': 1.7615294456481934} +07/25/2024 12:01:23 - INFO - __main__ - Step 7644: {'lr': 0.0004973367503825952, 'samples': 366912, 'steps': 7643, 'loss/train': 1.8876638412475586} +07/25/2024 12:01:23 - INFO - __main__ - Step 7645: {'lr': 0.0004973359845174068, 'samples': 366960, 'steps': 7644, 'loss/train': 2.670081853866577} +07/25/2024 12:01:23 - INFO - __main__ - Step 7646: {'lr': 0.0004973352185427051, 'samples': 367008, 'steps': 7645, 'loss/train': 1.9760096073150635} +07/25/2024 12:01:23 - INFO - __main__ - Step 7647: {'lr': 0.0004973344524584903, 'samples': 367056, 'steps': 7646, 'loss/train': 2.10508394241333} +07/25/2024 12:01:24 - INFO - __main__ - Step 7648: {'lr': 0.0004973336862647627, 'samples': 367104, 'steps': 7647, 'loss/train': 1.7024238109588623} +07/25/2024 12:01:24 - INFO - __main__ - Step 7649: {'lr': 0.0004973329199615225, 'samples': 367152, 'steps': 7648, 'loss/train': 1.8900402784347534} +07/25/2024 12:01:24 - INFO - __main__ - Step 7650: {'lr': 0.0004973321535487704, 'samples': 367200, 'steps': 7649, 'loss/train': 2.3565473556518555} +07/25/2024 12:01:25 - INFO - __main__ - Step 7651: {'lr': 0.0004973313870265065, 'samples': 367248, 'steps': 7650, 'loss/train': 2.5470492839813232} +07/25/2024 12:01:25 - INFO - __main__ - Step 7652: {'lr': 0.0004973306203947313, 'samples': 367296, 'steps': 7651, 'loss/train': 2.0262272357940674} +07/25/2024 12:01:25 - INFO - __main__ - Step 7653: {'lr': 0.0004973298536534448, 'samples': 367344, 'steps': 7652, 'loss/train': 2.2985212802886963} +07/25/2024 12:01:25 - INFO - __main__ - Step 7654: {'lr': 0.0004973290868026478, 'samples': 367392, 'steps': 7653, 'loss/train': 2.2194459438323975} +07/25/2024 12:01:26 - INFO - __main__ - Step 7655: {'lr': 0.0004973283198423403, 'samples': 367440, 'steps': 7654, 'loss/train': 2.317619562149048} +07/25/2024 12:01:26 - INFO - __main__ - Step 7656: {'lr': 0.0004973275527725227, 'samples': 367488, 'steps': 7655, 'loss/train': 1.3669649362564087} +07/25/2024 12:01:26 - INFO - __main__ - Step 7657: {'lr': 0.0004973267855931955, 'samples': 367536, 'steps': 7656, 'loss/train': 2.154040575027466} +07/25/2024 12:01:27 - INFO - __main__ - Step 7658: {'lr': 0.000497326018304359, 'samples': 367584, 'steps': 7657, 'loss/train': 2.0260870456695557} +07/25/2024 12:01:27 - INFO - __main__ - Step 7659: {'lr': 0.0004973252509060134, 'samples': 367632, 'steps': 7658, 'loss/train': 2.200594902038574} +07/25/2024 12:01:27 - INFO - __main__ - Step 7660: {'lr': 0.0004973244833981589, 'samples': 367680, 'steps': 7659, 'loss/train': 2.21616530418396} +07/25/2024 12:01:27 - INFO - __main__ - Step 7661: {'lr': 0.0004973237157807963, 'samples': 367728, 'steps': 7660, 'loss/train': 2.4136667251586914} +07/25/2024 12:01:28 - INFO - __main__ - Step 7662: {'lr': 0.0004973229480539256, 'samples': 367776, 'steps': 7661, 'loss/train': 1.3945043087005615} +07/25/2024 12:01:28 - INFO - __main__ - Step 7663: {'lr': 0.0004973221802175473, 'samples': 367824, 'steps': 7662, 'loss/train': 2.5061187744140625} +07/25/2024 12:01:28 - INFO - __main__ - Step 7664: {'lr': 0.0004973214122716615, 'samples': 367872, 'steps': 7663, 'loss/train': 1.8993422985076904} +07/25/2024 12:01:29 - INFO - __main__ - Step 7665: {'lr': 0.0004973206442162689, 'samples': 367920, 'steps': 7664, 'loss/train': 2.591188907623291} +07/25/2024 12:01:29 - INFO - __main__ - Step 7666: {'lr': 0.0004973198760513695, 'samples': 367968, 'steps': 7665, 'loss/train': 1.8108446598052979} +07/25/2024 12:01:29 - INFO - __main__ - Step 7667: {'lr': 0.0004973191077769639, 'samples': 368016, 'steps': 7666, 'loss/train': 2.232635498046875} +07/25/2024 12:01:29 - INFO - __main__ - Step 7668: {'lr': 0.0004973183393930522, 'samples': 368064, 'steps': 7667, 'loss/train': 2.651214599609375} +07/25/2024 12:01:30 - INFO - __main__ - Step 7669: {'lr': 0.000497317570899635, 'samples': 368112, 'steps': 7668, 'loss/train': 2.7004899978637695} +07/25/2024 12:01:30 - INFO - __main__ - Step 7670: {'lr': 0.0004973168022967125, 'samples': 368160, 'steps': 7669, 'loss/train': 2.444145917892456} +07/25/2024 12:01:30 - INFO - __main__ - Step 7671: {'lr': 0.0004973160335842849, 'samples': 368208, 'steps': 7670, 'loss/train': 2.155787229537964} +07/25/2024 12:01:31 - INFO - __main__ - Step 7672: {'lr': 0.0004973152647623528, 'samples': 368256, 'steps': 7671, 'loss/train': 2.2650444507598877} +07/25/2024 12:01:31 - INFO - __main__ - Step 7673: {'lr': 0.0004973144958309165, 'samples': 368304, 'steps': 7672, 'loss/train': 1.9753563404083252} +07/25/2024 12:01:31 - INFO - __main__ - Step 7674: {'lr': 0.0004973137267899762, 'samples': 368352, 'steps': 7673, 'loss/train': 2.2767207622528076} +07/25/2024 12:01:31 - INFO - __main__ - Step 7675: {'lr': 0.0004973129576395323, 'samples': 368400, 'steps': 7674, 'loss/train': 2.1028964519500732} +07/25/2024 12:01:32 - INFO - __main__ - Step 7676: {'lr': 0.0004973121883795851, 'samples': 368448, 'steps': 7675, 'loss/train': 2.2014145851135254} +07/25/2024 12:01:32 - INFO - __main__ - Step 7677: {'lr': 0.0004973114190101351, 'samples': 368496, 'steps': 7676, 'loss/train': 2.128117084503174} +07/25/2024 12:01:32 - INFO - __main__ - Step 7678: {'lr': 0.0004973106495311825, 'samples': 368544, 'steps': 7677, 'loss/train': 1.942909598350525} +07/25/2024 12:01:32 - INFO - __main__ - Step 7679: {'lr': 0.0004973098799427277, 'samples': 368592, 'steps': 7678, 'loss/train': 1.785217046737671} +07/25/2024 12:01:33 - INFO - __main__ - Step 7680: {'lr': 0.0004973091102447709, 'samples': 368640, 'steps': 7679, 'loss/train': 2.3401761054992676} +07/25/2024 12:01:33 - INFO - __main__ - Step 7681: {'lr': 0.0004973083404373127, 'samples': 368688, 'steps': 7680, 'loss/train': 1.7994147539138794} +07/25/2024 12:01:33 - INFO - __main__ - Step 7682: {'lr': 0.0004973075705203532, 'samples': 368736, 'steps': 7681, 'loss/train': 1.9831892251968384} +07/25/2024 12:01:34 - INFO - __main__ - Step 7683: {'lr': 0.0004973068004938928, 'samples': 368784, 'steps': 7682, 'loss/train': 2.3571906089782715} +07/25/2024 12:01:34 - INFO - __main__ - Step 7684: {'lr': 0.000497306030357932, 'samples': 368832, 'steps': 7683, 'loss/train': 2.018216848373413} +07/25/2024 12:01:34 - INFO - __main__ - Step 7685: {'lr': 0.0004973052601124709, 'samples': 368880, 'steps': 7684, 'loss/train': 1.4915302991867065} +07/25/2024 12:01:34 - INFO - __main__ - Step 7686: {'lr': 0.0004973044897575101, 'samples': 368928, 'steps': 7685, 'loss/train': 1.6733261346817017} +07/25/2024 12:01:35 - INFO - __main__ - Step 7687: {'lr': 0.0004973037192930496, 'samples': 368976, 'steps': 7686, 'loss/train': 1.6908111572265625} +07/25/2024 12:01:35 - INFO - __main__ - Step 7688: {'lr': 0.0004973029487190901, 'samples': 369024, 'steps': 7687, 'loss/train': 2.3601834774017334} +07/25/2024 12:01:35 - INFO - __main__ - Step 7689: {'lr': 0.0004973021780356318, 'samples': 369072, 'steps': 7688, 'loss/train': 2.0966038703918457} +07/25/2024 12:01:36 - INFO - __main__ - Step 7690: {'lr': 0.0004973014072426749, 'samples': 369120, 'steps': 7689, 'loss/train': 1.8524789810180664} +07/25/2024 12:01:36 - INFO - __main__ - Step 7691: {'lr': 0.00049730063634022, 'samples': 369168, 'steps': 7690, 'loss/train': 2.0106067657470703} +07/25/2024 12:01:36 - INFO - __main__ - Step 7692: {'lr': 0.0004972998653282672, 'samples': 369216, 'steps': 7691, 'loss/train': 2.463153600692749} +07/25/2024 12:01:36 - INFO - __main__ - Step 7693: {'lr': 0.000497299094206817, 'samples': 369264, 'steps': 7692, 'loss/train': 2.362605571746826} +07/25/2024 12:01:37 - INFO - __main__ - Step 7694: {'lr': 0.0004972983229758698, 'samples': 369312, 'steps': 7693, 'loss/train': 2.651498317718506} +07/25/2024 12:01:37 - INFO - __main__ - Step 7695: {'lr': 0.0004972975516354257, 'samples': 369360, 'steps': 7694, 'loss/train': 1.8024529218673706} +07/25/2024 12:01:37 - INFO - __main__ - Step 7696: {'lr': 0.0004972967801854852, 'samples': 369408, 'steps': 7695, 'loss/train': 2.6585500240325928} +07/25/2024 12:01:38 - INFO - __main__ - Step 7697: {'lr': 0.0004972960086260485, 'samples': 369456, 'steps': 7696, 'loss/train': 1.9625880718231201} +07/25/2024 12:01:38 - INFO - __main__ - Step 7698: {'lr': 0.0004972952369571162, 'samples': 369504, 'steps': 7697, 'loss/train': 1.4670655727386475} +07/25/2024 12:01:38 - INFO - __main__ - Step 7699: {'lr': 0.0004972944651786884, 'samples': 369552, 'steps': 7698, 'loss/train': 2.115668296813965} +07/25/2024 12:01:38 - INFO - __main__ - Step 7700: {'lr': 0.0004972936932907657, 'samples': 369600, 'steps': 7699, 'loss/train': 2.5446174144744873} +07/25/2024 12:01:39 - INFO - __main__ - Step 7701: {'lr': 0.0004972929212933481, 'samples': 369648, 'steps': 7700, 'loss/train': 2.1450705528259277} +07/25/2024 12:01:39 - INFO - __main__ - Step 7702: {'lr': 0.0004972921491864363, 'samples': 369696, 'steps': 7701, 'loss/train': 1.5781307220458984} +07/25/2024 12:01:39 - INFO - __main__ - Step 7703: {'lr': 0.0004972913769700304, 'samples': 369744, 'steps': 7702, 'loss/train': 1.7328474521636963} +07/25/2024 12:01:40 - INFO - __main__ - Step 7704: {'lr': 0.0004972906046441307, 'samples': 369792, 'steps': 7703, 'loss/train': 2.150753974914551} +07/25/2024 12:01:40 - INFO - __main__ - Step 7705: {'lr': 0.0004972898322087377, 'samples': 369840, 'steps': 7704, 'loss/train': 1.7684714794158936} +07/25/2024 12:01:40 - INFO - __main__ - Step 7706: {'lr': 0.0004972890596638517, 'samples': 369888, 'steps': 7705, 'loss/train': 2.0979483127593994} +07/25/2024 12:01:40 - INFO - __main__ - Step 7707: {'lr': 0.0004972882870094732, 'samples': 369936, 'steps': 7706, 'loss/train': 1.6510564088821411} +07/25/2024 12:01:41 - INFO - __main__ - Step 7708: {'lr': 0.0004972875142456022, 'samples': 369984, 'steps': 7707, 'loss/train': 1.38041090965271} +07/25/2024 12:01:41 - INFO - __main__ - Step 7709: {'lr': 0.0004972867413722392, 'samples': 370032, 'steps': 7708, 'loss/train': 2.061476945877075} +07/25/2024 12:01:41 - INFO - __main__ - Step 7710: {'lr': 0.0004972859683893846, 'samples': 370080, 'steps': 7709, 'loss/train': 1.8972835540771484} +07/25/2024 12:01:42 - INFO - __main__ - Step 7711: {'lr': 0.0004972851952970387, 'samples': 370128, 'steps': 7710, 'loss/train': 2.216489553451538} +07/25/2024 12:01:42 - INFO - __main__ - Step 7712: {'lr': 0.0004972844220952019, 'samples': 370176, 'steps': 7711, 'loss/train': 1.740204095840454} +07/25/2024 12:01:42 - INFO - __main__ - Step 7713: {'lr': 0.0004972836487838743, 'samples': 370224, 'steps': 7712, 'loss/train': 2.0095419883728027} +07/25/2024 12:01:42 - INFO - __main__ - Step 7714: {'lr': 0.0004972828753630567, 'samples': 370272, 'steps': 7713, 'loss/train': 2.278909683227539} +07/25/2024 12:01:43 - INFO - __main__ - Step 7715: {'lr': 0.000497282101832749, 'samples': 370320, 'steps': 7714, 'loss/train': 1.4381855726242065} +07/25/2024 12:01:43 - INFO - __main__ - Step 7716: {'lr': 0.0004972813281929518, 'samples': 370368, 'steps': 7715, 'loss/train': 1.7760499715805054} +07/25/2024 12:01:43 - INFO - __main__ - Step 7717: {'lr': 0.0004972805544436653, 'samples': 370416, 'steps': 7716, 'loss/train': 2.2694125175476074} +07/25/2024 12:01:44 - INFO - __main__ - Step 7718: {'lr': 0.0004972797805848899, 'samples': 370464, 'steps': 7717, 'loss/train': 1.6697907447814941} +07/25/2024 12:01:44 - INFO - __main__ - Step 7719: {'lr': 0.000497279006616626, 'samples': 370512, 'steps': 7718, 'loss/train': 1.8547523021697998} +07/25/2024 12:01:44 - INFO - __main__ - Step 7720: {'lr': 0.0004972782325388738, 'samples': 370560, 'steps': 7719, 'loss/train': 2.5078635215759277} +07/25/2024 12:01:44 - INFO - __main__ - Step 7721: {'lr': 0.0004972774583516337, 'samples': 370608, 'steps': 7720, 'loss/train': 1.681342363357544} +07/25/2024 12:01:45 - INFO - __main__ - Step 7722: {'lr': 0.0004972766840549062, 'samples': 370656, 'steps': 7721, 'loss/train': 1.9610977172851562} +07/25/2024 12:01:45 - INFO - __main__ - Step 7723: {'lr': 0.0004972759096486914, 'samples': 370704, 'steps': 7722, 'loss/train': 2.1547341346740723} +07/25/2024 12:01:45 - INFO - __main__ - Step 7724: {'lr': 0.0004972751351329898, 'samples': 370752, 'steps': 7723, 'loss/train': 1.7733075618743896} +07/25/2024 12:01:46 - INFO - __main__ - Step 7725: {'lr': 0.0004972743605078017, 'samples': 370800, 'steps': 7724, 'loss/train': 2.1474826335906982} +07/25/2024 12:01:46 - INFO - __main__ - Step 7726: {'lr': 0.0004972735857731274, 'samples': 370848, 'steps': 7725, 'loss/train': 0.6150942444801331} +07/25/2024 12:01:46 - INFO - __main__ - Step 7727: {'lr': 0.0004972728109289674, 'samples': 370896, 'steps': 7726, 'loss/train': 2.0493645668029785} +07/25/2024 12:01:46 - INFO - __main__ - Step 7728: {'lr': 0.0004972720359753219, 'samples': 370944, 'steps': 7727, 'loss/train': 2.3602499961853027} +07/25/2024 12:01:47 - INFO - __main__ - Step 7729: {'lr': 0.0004972712609121911, 'samples': 370992, 'steps': 7728, 'loss/train': 2.383479595184326} +07/25/2024 12:01:47 - INFO - __main__ - Step 7730: {'lr': 0.0004972704857395757, 'samples': 371040, 'steps': 7729, 'loss/train': 2.0734455585479736} +07/25/2024 12:01:47 - INFO - __main__ - Step 7731: {'lr': 0.0004972697104574758, 'samples': 371088, 'steps': 7730, 'loss/train': 1.1935436725616455} +07/25/2024 12:01:48 - INFO - __main__ - Step 7732: {'lr': 0.0004972689350658917, 'samples': 371136, 'steps': 7731, 'loss/train': 1.4837546348571777} +07/25/2024 12:01:48 - INFO - __main__ - Step 7733: {'lr': 0.0004972681595648241, 'samples': 371184, 'steps': 7732, 'loss/train': 1.7195112705230713} +07/25/2024 12:01:48 - INFO - __main__ - Step 7734: {'lr': 0.0004972673839542728, 'samples': 371232, 'steps': 7733, 'loss/train': 1.993535041809082} +07/25/2024 12:01:48 - INFO - __main__ - Step 7735: {'lr': 0.0004972666082342386, 'samples': 371280, 'steps': 7734, 'loss/train': 1.8671921491622925} +07/25/2024 12:01:49 - INFO - __main__ - Step 7736: {'lr': 0.0004972658324047216, 'samples': 371328, 'steps': 7735, 'loss/train': 1.5356568098068237} +07/25/2024 12:01:49 - INFO - __main__ - Step 7737: {'lr': 0.0004972650564657223, 'samples': 371376, 'steps': 7736, 'loss/train': 2.0215983390808105} +07/25/2024 12:01:49 - INFO - __main__ - Step 7738: {'lr': 0.0004972642804172409, 'samples': 371424, 'steps': 7737, 'loss/train': 1.3308848142623901} +07/25/2024 12:01:50 - INFO - __main__ - Step 7739: {'lr': 0.0004972635042592778, 'samples': 371472, 'steps': 7738, 'loss/train': 1.8361358642578125} +07/25/2024 12:01:50 - INFO - __main__ - Step 7740: {'lr': 0.0004972627279918334, 'samples': 371520, 'steps': 7739, 'loss/train': 2.2158563137054443} +07/25/2024 12:01:50 - INFO - __main__ - Step 7741: {'lr': 0.0004972619516149079, 'samples': 371568, 'steps': 7740, 'loss/train': 2.3569962978363037} +07/25/2024 12:01:50 - INFO - __main__ - Step 7742: {'lr': 0.0004972611751285019, 'samples': 371616, 'steps': 7741, 'loss/train': 1.654227375984192} +07/25/2024 12:01:51 - INFO - __main__ - Step 7743: {'lr': 0.0004972603985326153, 'samples': 371664, 'steps': 7742, 'loss/train': 2.187300443649292} +07/25/2024 12:01:51 - INFO - __main__ - Step 7744: {'lr': 0.0004972596218272491, 'samples': 371712, 'steps': 7743, 'loss/train': 2.2928059101104736} +07/25/2024 12:01:51 - INFO - __main__ - Step 7745: {'lr': 0.000497258845012403, 'samples': 371760, 'steps': 7744, 'loss/train': 2.118063449859619} +07/25/2024 12:01:52 - INFO - __main__ - Step 7746: {'lr': 0.0004972580680880777, 'samples': 371808, 'steps': 7745, 'loss/train': 2.5381064414978027} +07/25/2024 12:01:52 - INFO - __main__ - Step 7747: {'lr': 0.0004972572910542734, 'samples': 371856, 'steps': 7746, 'loss/train': 1.3910256624221802} +07/25/2024 12:01:52 - INFO - __main__ - Step 7748: {'lr': 0.0004972565139109905, 'samples': 371904, 'steps': 7747, 'loss/train': 2.294065475463867} +07/25/2024 12:01:52 - INFO - __main__ - Step 7749: {'lr': 0.0004972557366582295, 'samples': 371952, 'steps': 7748, 'loss/train': 1.3130184412002563} +07/25/2024 12:01:53 - INFO - __main__ - Step 7750: {'lr': 0.0004972549592959904, 'samples': 372000, 'steps': 7749, 'loss/train': 1.6137478351593018} +07/25/2024 12:01:53 - INFO - __main__ - Step 7751: {'lr': 0.0004972541818242738, 'samples': 372048, 'steps': 7750, 'loss/train': 2.1289539337158203} +07/25/2024 12:01:53 - INFO - __main__ - Step 7752: {'lr': 0.00049725340424308, 'samples': 372096, 'steps': 7751, 'loss/train': 2.212150812149048} +07/25/2024 12:01:53 - INFO - __main__ - Step 7753: {'lr': 0.0004972526265524092, 'samples': 372144, 'steps': 7752, 'loss/train': 2.25547456741333} +07/25/2024 12:01:54 - INFO - __main__ - Step 7754: {'lr': 0.000497251848752262, 'samples': 372192, 'steps': 7753, 'loss/train': 1.9911386966705322} +07/25/2024 12:01:54 - INFO - __main__ - Step 7755: {'lr': 0.0004972510708426385, 'samples': 372240, 'steps': 7754, 'loss/train': 2.305964469909668} +07/25/2024 12:01:54 - INFO - __main__ - Step 7756: {'lr': 0.0004972502928235393, 'samples': 372288, 'steps': 7755, 'loss/train': 2.1511449813842773} +07/25/2024 12:01:55 - INFO - __main__ - Step 7757: {'lr': 0.0004972495146949644, 'samples': 372336, 'steps': 7756, 'loss/train': 1.6286048889160156} +07/25/2024 12:01:55 - INFO - __main__ - Step 7758: {'lr': 0.0004972487364569146, 'samples': 372384, 'steps': 7757, 'loss/train': 2.1350183486938477} +07/25/2024 12:01:55 - INFO - __main__ - Step 7759: {'lr': 0.0004972479581093898, 'samples': 372432, 'steps': 7758, 'loss/train': 2.0292258262634277} +07/25/2024 12:01:55 - INFO - __main__ - Step 7760: {'lr': 0.0004972471796523907, 'samples': 372480, 'steps': 7759, 'loss/train': 1.3921781778335571} +07/25/2024 12:01:56 - INFO - __main__ - Step 7761: {'lr': 0.0004972464010859172, 'samples': 372528, 'steps': 7760, 'loss/train': 1.5660250186920166} +07/25/2024 12:01:56 - INFO - __main__ - Step 7762: {'lr': 0.0004972456224099701, 'samples': 372576, 'steps': 7761, 'loss/train': 0.6413735747337341} +07/25/2024 12:01:56 - INFO - __main__ - Step 7763: {'lr': 0.0004972448436245495, 'samples': 372624, 'steps': 7762, 'loss/train': 1.8423454761505127} +07/25/2024 12:01:57 - INFO - __main__ - Step 7764: {'lr': 0.000497244064729656, 'samples': 372672, 'steps': 7763, 'loss/train': 1.4086942672729492} +07/25/2024 12:01:57 - INFO - __main__ - Step 7765: {'lr': 0.0004972432857252895, 'samples': 372720, 'steps': 7764, 'loss/train': 1.2858184576034546} +07/25/2024 12:01:57 - INFO - __main__ - Step 7766: {'lr': 0.0004972425066114508, 'samples': 372768, 'steps': 7765, 'loss/train': 1.141648530960083} +07/25/2024 12:01:57 - INFO - __main__ - Step 7767: {'lr': 0.0004972417273881399, 'samples': 372816, 'steps': 7766, 'loss/train': 1.9776415824890137} +07/25/2024 12:01:58 - INFO - __main__ - Step 7768: {'lr': 0.0004972409480553573, 'samples': 372864, 'steps': 7767, 'loss/train': 2.33292818069458} +07/25/2024 12:01:58 - INFO - __main__ - Step 7769: {'lr': 0.0004972401686131034, 'samples': 372912, 'steps': 7768, 'loss/train': 2.2386062145233154} +07/25/2024 12:01:58 - INFO - __main__ - Step 7770: {'lr': 0.0004972393890613785, 'samples': 372960, 'steps': 7769, 'loss/train': 2.251246929168701} +07/25/2024 12:01:59 - INFO - __main__ - Step 7771: {'lr': 0.000497238609400183, 'samples': 373008, 'steps': 7770, 'loss/train': 1.6751317977905273} +07/25/2024 12:01:59 - INFO - __main__ - Step 7772: {'lr': 0.000497237829629517, 'samples': 373056, 'steps': 7771, 'loss/train': 2.071923017501831} +07/25/2024 12:01:59 - INFO - __main__ - Step 7773: {'lr': 0.000497237049749381, 'samples': 373104, 'steps': 7772, 'loss/train': 1.8720176219940186} +07/25/2024 12:01:59 - INFO - __main__ - Step 7774: {'lr': 0.0004972362697597756, 'samples': 373152, 'steps': 7773, 'loss/train': 1.3949151039123535} +07/25/2024 12:02:00 - INFO - __main__ - Step 7775: {'lr': 0.0004972354896607006, 'samples': 373200, 'steps': 7774, 'loss/train': 2.2591981887817383} +07/25/2024 12:02:00 - INFO - __main__ - Step 7776: {'lr': 0.0004972347094521569, 'samples': 373248, 'steps': 7775, 'loss/train': 1.9094725847244263} +07/25/2024 12:02:00 - INFO - __main__ - Step 7777: {'lr': 0.0004972339291341444, 'samples': 373296, 'steps': 7776, 'loss/train': 2.0624842643737793} +07/25/2024 12:02:01 - INFO - __main__ - Step 7778: {'lr': 0.0004972331487066638, 'samples': 373344, 'steps': 7777, 'loss/train': 2.2802810668945312} +07/25/2024 12:02:01 - INFO - __main__ - Step 7779: {'lr': 0.0004972323681697152, 'samples': 373392, 'steps': 7778, 'loss/train': 2.3749988079071045} +07/25/2024 12:02:01 - INFO - __main__ - Step 7780: {'lr': 0.0004972315875232991, 'samples': 373440, 'steps': 7779, 'loss/train': 2.130457878112793} +07/25/2024 12:02:01 - INFO - __main__ - Step 7781: {'lr': 0.0004972308067674158, 'samples': 373488, 'steps': 7780, 'loss/train': 1.6328394412994385} +07/25/2024 12:02:02 - INFO - __main__ - Step 7782: {'lr': 0.0004972300259020655, 'samples': 373536, 'steps': 7781, 'loss/train': 2.2240443229675293} +07/25/2024 12:02:02 - INFO - __main__ - Step 7783: {'lr': 0.0004972292449272487, 'samples': 373584, 'steps': 7782, 'loss/train': 1.81447434425354} +07/25/2024 12:02:02 - INFO - __main__ - Step 7784: {'lr': 0.0004972284638429658, 'samples': 373632, 'steps': 7783, 'loss/train': 1.8024410009384155} +07/25/2024 12:02:03 - INFO - __main__ - Step 7785: {'lr': 0.000497227682649217, 'samples': 373680, 'steps': 7784, 'loss/train': 2.149529457092285} +07/25/2024 12:02:03 - INFO - __main__ - Step 7786: {'lr': 0.0004972269013460027, 'samples': 373728, 'steps': 7785, 'loss/train': 2.763054609298706} +07/25/2024 12:02:03 - INFO - __main__ - Step 7787: {'lr': 0.0004972261199333234, 'samples': 373776, 'steps': 7786, 'loss/train': 1.9432168006896973} +07/25/2024 12:02:03 - INFO - __main__ - Step 7788: {'lr': 0.0004972253384111792, 'samples': 373824, 'steps': 7787, 'loss/train': 2.080453634262085} +07/25/2024 12:02:04 - INFO - __main__ - Step 7789: {'lr': 0.0004972245567795705, 'samples': 373872, 'steps': 7788, 'loss/train': 1.7539066076278687} +07/25/2024 12:02:04 - INFO - __main__ - Step 7790: {'lr': 0.0004972237750384978, 'samples': 373920, 'steps': 7789, 'loss/train': 1.998993992805481} +07/25/2024 12:02:04 - INFO - __main__ - Step 7791: {'lr': 0.0004972229931879612, 'samples': 373968, 'steps': 7790, 'loss/train': 1.6963775157928467} +07/25/2024 12:02:05 - INFO - __main__ - Step 7792: {'lr': 0.0004972222112279613, 'samples': 374016, 'steps': 7791, 'loss/train': 1.8771545886993408} +07/25/2024 12:02:05 - INFO - __main__ - Step 7793: {'lr': 0.0004972214291584983, 'samples': 374064, 'steps': 7792, 'loss/train': 2.21453595161438} +07/25/2024 12:02:05 - INFO - __main__ - Step 7794: {'lr': 0.0004972206469795725, 'samples': 374112, 'steps': 7793, 'loss/train': 2.0802054405212402} +07/25/2024 12:02:05 - INFO - __main__ - Step 7795: {'lr': 0.0004972198646911845, 'samples': 374160, 'steps': 7794, 'loss/train': 1.518589973449707} +07/25/2024 12:02:06 - INFO - __main__ - Step 7796: {'lr': 0.0004972190822933343, 'samples': 374208, 'steps': 7795, 'loss/train': 1.888830304145813} +07/25/2024 12:02:06 - INFO - __main__ - Step 7797: {'lr': 0.0004972182997860224, 'samples': 374256, 'steps': 7796, 'loss/train': 1.8549225330352783} +07/25/2024 12:02:06 - INFO - __main__ - Step 7798: {'lr': 0.0004972175171692494, 'samples': 374304, 'steps': 7797, 'loss/train': 1.7114261388778687} +07/25/2024 12:02:07 - INFO - __main__ - Step 7799: {'lr': 0.0004972167344430152, 'samples': 374352, 'steps': 7798, 'loss/train': 2.117532253265381} +07/25/2024 12:02:07 - INFO - __main__ - Step 7800: {'lr': 0.0004972159516073204, 'samples': 374400, 'steps': 7799, 'loss/train': 1.5380539894104004} +07/25/2024 12:02:07 - INFO - __main__ - Step 7801: {'lr': 0.0004972151686621654, 'samples': 374448, 'steps': 7800, 'loss/train': 2.0071775913238525} +07/25/2024 12:02:07 - INFO - __main__ - Step 7802: {'lr': 0.0004972143856075503, 'samples': 374496, 'steps': 7801, 'loss/train': 1.9615169763565063} +07/25/2024 12:02:08 - INFO - __main__ - Step 7803: {'lr': 0.0004972136024434757, 'samples': 374544, 'steps': 7802, 'loss/train': 1.9392668008804321} +07/25/2024 12:02:08 - INFO - __main__ - Step 7804: {'lr': 0.0004972128191699418, 'samples': 374592, 'steps': 7803, 'loss/train': 1.918797492980957} +07/25/2024 12:02:08 - INFO - __main__ - Step 7805: {'lr': 0.000497212035786949, 'samples': 374640, 'steps': 7804, 'loss/train': 1.8438903093338013} +07/25/2024 12:02:09 - INFO - __main__ - Step 7806: {'lr': 0.0004972112522944976, 'samples': 374688, 'steps': 7805, 'loss/train': 1.9358460903167725} +07/25/2024 12:02:09 - INFO - __main__ - Step 7807: {'lr': 0.000497210468692588, 'samples': 374736, 'steps': 7806, 'loss/train': 1.6412839889526367} +07/25/2024 12:02:09 - INFO - __main__ - Step 7808: {'lr': 0.0004972096849812206, 'samples': 374784, 'steps': 7807, 'loss/train': 2.3928675651550293} +07/25/2024 12:02:09 - INFO - __main__ - Step 7809: {'lr': 0.0004972089011603955, 'samples': 374832, 'steps': 7808, 'loss/train': 2.4811596870422363} +07/25/2024 12:02:10 - INFO - __main__ - Step 7810: {'lr': 0.0004972081172301134, 'samples': 374880, 'steps': 7809, 'loss/train': 2.0399067401885986} +07/25/2024 12:02:10 - INFO - __main__ - Step 7811: {'lr': 0.0004972073331903744, 'samples': 374928, 'steps': 7810, 'loss/train': 2.003913640975952} +07/25/2024 12:02:10 - INFO - __main__ - Step 7812: {'lr': 0.000497206549041179, 'samples': 374976, 'steps': 7811, 'loss/train': 0.6990901827812195} +07/25/2024 12:02:11 - INFO - __main__ - Step 7813: {'lr': 0.0004972057647825274, 'samples': 375024, 'steps': 7812, 'loss/train': 2.070037603378296} +07/25/2024 12:02:11 - INFO - __main__ - Step 7814: {'lr': 0.00049720498041442, 'samples': 375072, 'steps': 7813, 'loss/train': 1.609778642654419} +07/25/2024 12:02:11 - INFO - __main__ - Step 7815: {'lr': 0.0004972041959368571, 'samples': 375120, 'steps': 7814, 'loss/train': 2.7122180461883545} +07/25/2024 12:02:11 - INFO - __main__ - Step 7816: {'lr': 0.0004972034113498393, 'samples': 375168, 'steps': 7815, 'loss/train': 2.638681411743164} +07/25/2024 12:02:12 - INFO - __main__ - Step 7817: {'lr': 0.0004972026266533666, 'samples': 375216, 'steps': 7816, 'loss/train': 1.8813287019729614} +07/25/2024 12:02:12 - INFO - __main__ - Step 7818: {'lr': 0.0004972018418474396, 'samples': 375264, 'steps': 7817, 'loss/train': 2.308939218521118} +07/25/2024 12:02:12 - INFO - __main__ - Step 7819: {'lr': 0.0004972010569320585, 'samples': 375312, 'steps': 7818, 'loss/train': 1.7410993576049805} +07/25/2024 12:02:13 - INFO - __main__ - Step 7820: {'lr': 0.0004972002719072237, 'samples': 375360, 'steps': 7819, 'loss/train': 1.9854905605316162} +07/25/2024 12:02:13 - INFO - __main__ - Step 7821: {'lr': 0.0004971994867729356, 'samples': 375408, 'steps': 7820, 'loss/train': 1.745620846748352} +07/25/2024 12:02:13 - INFO - __main__ - Step 7822: {'lr': 0.0004971987015291945, 'samples': 375456, 'steps': 7821, 'loss/train': 2.222299814224243} +07/25/2024 12:02:13 - INFO - __main__ - Step 7823: {'lr': 0.0004971979161760006, 'samples': 375504, 'steps': 7822, 'loss/train': 2.5264124870300293} +07/25/2024 12:02:14 - INFO - __main__ - Step 7824: {'lr': 0.0004971971307133545, 'samples': 375552, 'steps': 7823, 'loss/train': 2.160489797592163} +07/25/2024 12:02:14 - INFO - __main__ - Step 7825: {'lr': 0.0004971963451412564, 'samples': 375600, 'steps': 7824, 'loss/train': 1.7022801637649536} +07/25/2024 12:02:14 - INFO - __main__ - Step 7826: {'lr': 0.0004971955594597067, 'samples': 375648, 'steps': 7825, 'loss/train': 1.961668610572815} +07/25/2024 12:02:15 - INFO - __main__ - Step 7827: {'lr': 0.0004971947736687057, 'samples': 375696, 'steps': 7826, 'loss/train': 2.1508586406707764} +07/25/2024 12:02:15 - INFO - __main__ - Step 7828: {'lr': 0.0004971939877682538, 'samples': 375744, 'steps': 7827, 'loss/train': 1.7002928256988525} +07/25/2024 12:02:15 - INFO - __main__ - Step 7829: {'lr': 0.0004971932017583514, 'samples': 375792, 'steps': 7828, 'loss/train': 2.0433807373046875} +07/25/2024 12:02:15 - INFO - __main__ - Step 7830: {'lr': 0.0004971924156389986, 'samples': 375840, 'steps': 7829, 'loss/train': 2.07211971282959} +07/25/2024 12:02:16 - INFO - __main__ - Step 7831: {'lr': 0.0004971916294101962, 'samples': 375888, 'steps': 7830, 'loss/train': 2.094085693359375} +07/25/2024 12:02:16 - INFO - __main__ - Step 7832: {'lr': 0.0004971908430719441, 'samples': 375936, 'steps': 7831, 'loss/train': 2.2468066215515137} +07/25/2024 12:02:16 - INFO - __main__ - Step 7833: {'lr': 0.0004971900566242427, 'samples': 375984, 'steps': 7832, 'loss/train': 1.364355206489563} +07/25/2024 12:02:17 - INFO - __main__ - Step 7834: {'lr': 0.0004971892700670927, 'samples': 376032, 'steps': 7833, 'loss/train': 2.0382649898529053} +07/25/2024 12:02:17 - INFO - __main__ - Step 7835: {'lr': 0.0004971884834004941, 'samples': 376080, 'steps': 7834, 'loss/train': 2.7906408309936523} +07/25/2024 12:02:17 - INFO - __main__ - Step 7836: {'lr': 0.0004971876966244474, 'samples': 376128, 'steps': 7835, 'loss/train': 2.3669419288635254} +07/25/2024 12:02:17 - INFO - __main__ - Step 7837: {'lr': 0.0004971869097389528, 'samples': 376176, 'steps': 7836, 'loss/train': 2.0829732418060303} +07/25/2024 12:02:18 - INFO - __main__ - Step 7838: {'lr': 0.000497186122744011, 'samples': 376224, 'steps': 7837, 'loss/train': 2.03102970123291} +07/25/2024 12:02:18 - INFO - __main__ - Step 7839: {'lr': 0.0004971853356396219, 'samples': 376272, 'steps': 7838, 'loss/train': 1.992648720741272} +07/25/2024 12:02:18 - INFO - __main__ - Step 7840: {'lr': 0.0004971845484257861, 'samples': 376320, 'steps': 7839, 'loss/train': 2.203390598297119} +07/25/2024 12:02:19 - INFO - __main__ - Step 7841: {'lr': 0.0004971837611025039, 'samples': 376368, 'steps': 7840, 'loss/train': 1.9929248094558716} +07/25/2024 12:02:19 - INFO - __main__ - Step 7842: {'lr': 0.0004971829736697758, 'samples': 376416, 'steps': 7841, 'loss/train': 1.9839246273040771} +07/25/2024 12:02:19 - INFO - __main__ - Step 7843: {'lr': 0.0004971821861276018, 'samples': 376464, 'steps': 7842, 'loss/train': 2.266531229019165} +07/25/2024 12:02:19 - INFO - __main__ - Step 7844: {'lr': 0.0004971813984759826, 'samples': 376512, 'steps': 7843, 'loss/train': 2.01827073097229} +07/25/2024 12:02:20 - INFO - __main__ - Step 7845: {'lr': 0.0004971806107149184, 'samples': 376560, 'steps': 7844, 'loss/train': 1.8154245615005493} +07/25/2024 12:02:20 - INFO - __main__ - Step 7846: {'lr': 0.0004971798228444095, 'samples': 376608, 'steps': 7845, 'loss/train': 1.9906331300735474} +07/25/2024 12:02:20 - INFO - __main__ - Step 7847: {'lr': 0.0004971790348644563, 'samples': 376656, 'steps': 7846, 'loss/train': 2.095024347305298} +07/25/2024 12:02:20 - INFO - __main__ - Step 7848: {'lr': 0.0004971782467750592, 'samples': 376704, 'steps': 7847, 'loss/train': 0.6082751154899597} +07/25/2024 12:02:21 - INFO - __main__ - Step 7849: {'lr': 0.0004971774585762185, 'samples': 376752, 'steps': 7848, 'loss/train': 1.9056819677352905} +07/25/2024 12:02:21 - INFO - __main__ - Step 7850: {'lr': 0.0004971766702679345, 'samples': 376800, 'steps': 7849, 'loss/train': 1.7460962533950806} +07/25/2024 12:02:21 - INFO - __main__ - Step 7851: {'lr': 0.0004971758818502075, 'samples': 376848, 'steps': 7850, 'loss/train': 2.296602725982666} +07/25/2024 12:02:22 - INFO - __main__ - Step 7852: {'lr': 0.0004971750933230381, 'samples': 376896, 'steps': 7851, 'loss/train': 2.3917198181152344} +07/25/2024 12:02:22 - INFO - __main__ - Step 7853: {'lr': 0.0004971743046864264, 'samples': 376944, 'steps': 7852, 'loss/train': 2.087251663208008} +07/25/2024 12:02:22 - INFO - __main__ - Step 7854: {'lr': 0.0004971735159403728, 'samples': 376992, 'steps': 7853, 'loss/train': 2.4999241828918457} +07/25/2024 12:02:22 - INFO - __main__ - Step 7855: {'lr': 0.0004971727270848779, 'samples': 377040, 'steps': 7854, 'loss/train': 2.2232236862182617} +07/25/2024 12:02:23 - INFO - __main__ - Step 7856: {'lr': 0.0004971719381199417, 'samples': 377088, 'steps': 7855, 'loss/train': 2.2941033840179443} +07/25/2024 12:02:23 - INFO - __main__ - Step 7857: {'lr': 0.0004971711490455647, 'samples': 377136, 'steps': 7856, 'loss/train': 1.2364574670791626} +07/25/2024 12:02:23 - INFO - __main__ - Step 7858: {'lr': 0.0004971703598617473, 'samples': 377184, 'steps': 7857, 'loss/train': 1.8702888488769531} +07/25/2024 12:02:24 - INFO - __main__ - Step 7859: {'lr': 0.0004971695705684897, 'samples': 377232, 'steps': 7858, 'loss/train': 2.761699914932251} +07/25/2024 12:02:24 - INFO - __main__ - Step 7860: {'lr': 0.0004971687811657923, 'samples': 377280, 'steps': 7859, 'loss/train': 1.998285174369812} +07/25/2024 12:02:24 - INFO - __main__ - Step 7861: {'lr': 0.0004971679916536556, 'samples': 377328, 'steps': 7860, 'loss/train': 2.240154981613159} +07/25/2024 12:02:24 - INFO - __main__ - Step 7862: {'lr': 0.0004971672020320798, 'samples': 377376, 'steps': 7861, 'loss/train': 2.4726674556732178} +07/25/2024 12:02:25 - INFO - __main__ - Step 7863: {'lr': 0.0004971664123010653, 'samples': 377424, 'steps': 7862, 'loss/train': 2.4315247535705566} +07/25/2024 12:02:25 - INFO - __main__ - Step 7864: {'lr': 0.0004971656224606125, 'samples': 377472, 'steps': 7863, 'loss/train': 1.6165982484817505} +07/25/2024 12:02:25 - INFO - __main__ - Step 7865: {'lr': 0.0004971648325107216, 'samples': 377520, 'steps': 7864, 'loss/train': 2.4704854488372803} +07/25/2024 12:02:26 - INFO - __main__ - Step 7866: {'lr': 0.0004971640424513931, 'samples': 377568, 'steps': 7865, 'loss/train': 1.9559135437011719} +07/25/2024 12:02:26 - INFO - __main__ - Step 7867: {'lr': 0.0004971632522826272, 'samples': 377616, 'steps': 7866, 'loss/train': 1.4918177127838135} +07/25/2024 12:02:26 - INFO - __main__ - Step 7868: {'lr': 0.0004971624620044244, 'samples': 377664, 'steps': 7867, 'loss/train': 1.823886513710022} +07/25/2024 12:02:26 - INFO - __main__ - Step 7869: {'lr': 0.000497161671616785, 'samples': 377712, 'steps': 7868, 'loss/train': 1.8524686098098755} +07/25/2024 12:02:27 - INFO - __main__ - Step 7870: {'lr': 0.0004971608811197093, 'samples': 377760, 'steps': 7869, 'loss/train': 1.9397982358932495} +07/25/2024 12:02:27 - INFO - __main__ - Step 7871: {'lr': 0.0004971600905131977, 'samples': 377808, 'steps': 7870, 'loss/train': 2.064786672592163} +07/25/2024 12:02:27 - INFO - __main__ - Step 7872: {'lr': 0.0004971592997972504, 'samples': 377856, 'steps': 7871, 'loss/train': 2.020679235458374} +07/25/2024 12:02:28 - INFO - __main__ - Step 7873: {'lr': 0.0004971585089718682, 'samples': 377904, 'steps': 7872, 'loss/train': 1.8640894889831543} +07/25/2024 12:02:28 - INFO - __main__ - Step 7874: {'lr': 0.0004971577180370508, 'samples': 377952, 'steps': 7873, 'loss/train': 1.7021836042404175} +07/25/2024 12:02:28 - INFO - __main__ - Step 7875: {'lr': 0.0004971569269927992, 'samples': 378000, 'steps': 7874, 'loss/train': 2.6899683475494385} +07/25/2024 12:02:28 - INFO - __main__ - Step 7876: {'lr': 0.0004971561358391132, 'samples': 378048, 'steps': 7875, 'loss/train': 2.4476704597473145} +07/25/2024 12:02:29 - INFO - __main__ - Step 7877: {'lr': 0.0004971553445759935, 'samples': 378096, 'steps': 7876, 'loss/train': 2.044224262237549} +07/25/2024 12:02:29 - INFO - __main__ - Step 7878: {'lr': 0.0004971545532034403, 'samples': 378144, 'steps': 7877, 'loss/train': 2.214571714401245} +07/25/2024 12:02:29 - INFO - __main__ - Step 7879: {'lr': 0.000497153761721454, 'samples': 378192, 'steps': 7878, 'loss/train': 1.8707082271575928} +07/25/2024 12:02:30 - INFO - __main__ - Step 7880: {'lr': 0.000497152970130035, 'samples': 378240, 'steps': 7879, 'loss/train': 1.825571894645691} +07/25/2024 12:02:30 - INFO - __main__ - Step 7881: {'lr': 0.0004971521784291835, 'samples': 378288, 'steps': 7880, 'loss/train': 1.8172087669372559} +07/25/2024 12:02:30 - INFO - __main__ - Step 7882: {'lr': 0.0004971513866189, 'samples': 378336, 'steps': 7881, 'loss/train': 2.5512635707855225} +07/25/2024 12:02:30 - INFO - __main__ - Step 7883: {'lr': 0.0004971505946991848, 'samples': 378384, 'steps': 7882, 'loss/train': 2.5736804008483887} +07/25/2024 12:02:31 - INFO - __main__ - Step 7884: {'lr': 0.0004971498026700382, 'samples': 378432, 'steps': 7883, 'loss/train': 1.7487819194793701} +07/25/2024 12:02:31 - INFO - __main__ - Step 7885: {'lr': 0.0004971490105314606, 'samples': 378480, 'steps': 7884, 'loss/train': 2.4187355041503906} +07/25/2024 12:02:31 - INFO - __main__ - Step 7886: {'lr': 0.0004971482182834523, 'samples': 378528, 'steps': 7885, 'loss/train': 1.529443383216858} +07/25/2024 12:02:32 - INFO - __main__ - Step 7887: {'lr': 0.0004971474259260137, 'samples': 378576, 'steps': 7886, 'loss/train': 1.7717046737670898} +07/25/2024 12:02:32 - INFO - __main__ - Step 7888: {'lr': 0.0004971466334591452, 'samples': 378624, 'steps': 7887, 'loss/train': 2.0699615478515625} +07/25/2024 12:02:32 - INFO - __main__ - Step 7889: {'lr': 0.0004971458408828471, 'samples': 378672, 'steps': 7888, 'loss/train': 2.492372751235962} +07/25/2024 12:02:32 - INFO - __main__ - Step 7890: {'lr': 0.0004971450481971197, 'samples': 378720, 'steps': 7889, 'loss/train': 2.0162837505340576} +07/25/2024 12:02:33 - INFO - __main__ - Step 7891: {'lr': 0.0004971442554019634, 'samples': 378768, 'steps': 7890, 'loss/train': 1.6145046949386597} +07/25/2024 12:02:33 - INFO - __main__ - Step 7892: {'lr': 0.0004971434624973785, 'samples': 378816, 'steps': 7891, 'loss/train': 2.3181991577148438} +07/25/2024 12:02:33 - INFO - __main__ - Step 7893: {'lr': 0.0004971426694833656, 'samples': 378864, 'steps': 7892, 'loss/train': 2.1757619380950928} +07/25/2024 12:02:34 - INFO - __main__ - Step 7894: {'lr': 0.0004971418763599247, 'samples': 378912, 'steps': 7893, 'loss/train': 2.1968886852264404} +07/25/2024 12:02:34 - INFO - __main__ - Step 7895: {'lr': 0.0004971410831270563, 'samples': 378960, 'steps': 7894, 'loss/train': 2.365232467651367} +07/25/2024 12:02:34 - INFO - __main__ - Step 7896: {'lr': 0.0004971402897847608, 'samples': 379008, 'steps': 7895, 'loss/train': 2.14804744720459} +07/25/2024 12:02:34 - INFO - __main__ - Step 7897: {'lr': 0.0004971394963330384, 'samples': 379056, 'steps': 7896, 'loss/train': 1.692529320716858} +07/25/2024 12:02:35 - INFO - __main__ - Step 7898: {'lr': 0.0004971387027718897, 'samples': 379104, 'steps': 7897, 'loss/train': 1.8296581506729126} +07/25/2024 12:02:35 - INFO - __main__ - Step 7899: {'lr': 0.0004971379091013148, 'samples': 379152, 'steps': 7898, 'loss/train': 1.8525819778442383} +07/25/2024 12:02:35 - INFO - __main__ - Step 7900: {'lr': 0.0004971371153213143, 'samples': 379200, 'steps': 7899, 'loss/train': 1.46533203125} +07/25/2024 12:02:36 - INFO - __main__ - Step 7901: {'lr': 0.0004971363214318883, 'samples': 379248, 'steps': 7900, 'loss/train': 1.8499759435653687} +07/25/2024 12:02:36 - INFO - __main__ - Step 7902: {'lr': 0.0004971355274330372, 'samples': 379296, 'steps': 7901, 'loss/train': 1.7265924215316772} +07/25/2024 12:02:36 - INFO - __main__ - Step 7903: {'lr': 0.0004971347333247615, 'samples': 379344, 'steps': 7902, 'loss/train': 2.6973743438720703} +07/25/2024 12:02:36 - INFO - __main__ - Step 7904: {'lr': 0.0004971339391070614, 'samples': 379392, 'steps': 7903, 'loss/train': 1.6292258501052856} +07/25/2024 12:02:37 - INFO - __main__ - Step 7905: {'lr': 0.0004971331447799374, 'samples': 379440, 'steps': 7904, 'loss/train': 1.680062174797058} +07/25/2024 12:02:37 - INFO - __main__ - Step 7906: {'lr': 0.0004971323503433897, 'samples': 379488, 'steps': 7905, 'loss/train': 3.0133190155029297} +07/25/2024 12:02:37 - INFO - __main__ - Step 7907: {'lr': 0.0004971315557974188, 'samples': 379536, 'steps': 7906, 'loss/train': 2.1937286853790283} +07/25/2024 12:02:38 - INFO - __main__ - Step 7908: {'lr': 0.0004971307611420249, 'samples': 379584, 'steps': 7907, 'loss/train': 2.1371076107025146} +07/25/2024 12:02:38 - INFO - __main__ - Step 7909: {'lr': 0.0004971299663772084, 'samples': 379632, 'steps': 7908, 'loss/train': 1.9001929759979248} +07/25/2024 12:02:38 - INFO - __main__ - Step 7910: {'lr': 0.0004971291715029697, 'samples': 379680, 'steps': 7909, 'loss/train': 1.957559585571289} +07/25/2024 12:02:38 - INFO - __main__ - Step 7911: {'lr': 0.0004971283765193092, 'samples': 379728, 'steps': 7910, 'loss/train': 1.6683577299118042} +07/25/2024 12:02:39 - INFO - __main__ - Step 7912: {'lr': 0.0004971275814262271, 'samples': 379776, 'steps': 7911, 'loss/train': 2.357656955718994} +07/25/2024 12:02:39 - INFO - __main__ - Step 7913: {'lr': 0.0004971267862237239, 'samples': 379824, 'steps': 7912, 'loss/train': 1.849816083908081} +07/25/2024 12:02:39 - INFO - __main__ - Step 7914: {'lr': 0.0004971259909117998, 'samples': 379872, 'steps': 7913, 'loss/train': 2.5860490798950195} +07/25/2024 12:02:40 - INFO - __main__ - Step 7915: {'lr': 0.0004971251954904553, 'samples': 379920, 'steps': 7914, 'loss/train': 2.175352096557617} +07/25/2024 12:02:40 - INFO - __main__ - Step 7916: {'lr': 0.0004971243999596907, 'samples': 379968, 'steps': 7915, 'loss/train': 1.896638035774231} +07/25/2024 12:02:40 - INFO - __main__ - Step 7917: {'lr': 0.0004971236043195064, 'samples': 380016, 'steps': 7916, 'loss/train': 2.3051607608795166} +07/25/2024 12:02:40 - INFO - __main__ - Step 7918: {'lr': 0.0004971228085699025, 'samples': 380064, 'steps': 7917, 'loss/train': 2.4725661277770996} +07/25/2024 12:02:41 - INFO - __main__ - Step 7919: {'lr': 0.0004971220127108797, 'samples': 380112, 'steps': 7918, 'loss/train': 2.0427658557891846} +07/25/2024 12:02:41 - INFO - __main__ - Step 7920: {'lr': 0.0004971212167424381, 'samples': 380160, 'steps': 7919, 'loss/train': 1.683219075202942} +07/25/2024 12:02:41 - INFO - __main__ - Step 7921: {'lr': 0.0004971204206645782, 'samples': 380208, 'steps': 7920, 'loss/train': 2.2149388790130615} +07/25/2024 12:02:42 - INFO - __main__ - Step 7922: {'lr': 0.0004971196244773004, 'samples': 380256, 'steps': 7921, 'loss/train': 2.0421230792999268} +07/25/2024 12:02:42 - INFO - __main__ - Step 7923: {'lr': 0.0004971188281806047, 'samples': 380304, 'steps': 7922, 'loss/train': 2.0021259784698486} +07/25/2024 12:02:42 - INFO - __main__ - Step 7924: {'lr': 0.0004971180317744918, 'samples': 380352, 'steps': 7923, 'loss/train': 1.623558759689331} +07/25/2024 12:02:42 - INFO - __main__ - Step 7925: {'lr': 0.0004971172352589621, 'samples': 380400, 'steps': 7924, 'loss/train': 2.1287970542907715} +07/25/2024 12:02:43 - INFO - __main__ - Step 7926: {'lr': 0.0004971164386340156, 'samples': 380448, 'steps': 7925, 'loss/train': 2.470064401626587} +07/25/2024 12:02:43 - INFO - __main__ - Step 7927: {'lr': 0.0004971156418996531, 'samples': 380496, 'steps': 7926, 'loss/train': 1.5382776260375977} +07/25/2024 12:02:43 - INFO - __main__ - Step 7928: {'lr': 0.0004971148450558747, 'samples': 380544, 'steps': 7927, 'loss/train': 2.5778422355651855} +07/25/2024 12:02:44 - INFO - __main__ - Step 7929: {'lr': 0.0004971140481026806, 'samples': 380592, 'steps': 7928, 'loss/train': 2.004002094268799} +07/25/2024 12:02:44 - INFO - __main__ - Step 7930: {'lr': 0.0004971132510400714, 'samples': 380640, 'steps': 7929, 'loss/train': 2.330512762069702} +07/25/2024 12:02:44 - INFO - __main__ - Step 7931: {'lr': 0.0004971124538680474, 'samples': 380688, 'steps': 7930, 'loss/train': 1.827989101409912} +07/25/2024 12:02:44 - INFO - __main__ - Step 7932: {'lr': 0.0004971116565866089, 'samples': 380736, 'steps': 7931, 'loss/train': 2.5723137855529785} +07/25/2024 12:02:45 - INFO - __main__ - Step 7933: {'lr': 0.0004971108591957563, 'samples': 380784, 'steps': 7932, 'loss/train': 1.8066637516021729} +07/25/2024 12:02:45 - INFO - __main__ - Step 7934: {'lr': 0.0004971100616954899, 'samples': 380832, 'steps': 7933, 'loss/train': 2.3138787746429443} +07/25/2024 12:02:45 - INFO - __main__ - Step 7935: {'lr': 0.0004971092640858101, 'samples': 380880, 'steps': 7934, 'loss/train': 1.7419565916061401} +07/25/2024 12:02:46 - INFO - __main__ - Step 7936: {'lr': 0.0004971084663667173, 'samples': 380928, 'steps': 7935, 'loss/train': 1.8238197565078735} +07/25/2024 12:02:46 - INFO - __main__ - Step 7937: {'lr': 0.0004971076685382118, 'samples': 380976, 'steps': 7936, 'loss/train': 2.483422040939331} +07/25/2024 12:02:46 - INFO - __main__ - Step 7938: {'lr': 0.0004971068706002938, 'samples': 381024, 'steps': 7937, 'loss/train': 1.9352720975875854} +07/25/2024 12:02:46 - INFO - __main__ - Step 7939: {'lr': 0.0004971060725529639, 'samples': 381072, 'steps': 7938, 'loss/train': 1.8719819784164429} +07/25/2024 12:02:47 - INFO - __main__ - Step 7940: {'lr': 0.0004971052743962224, 'samples': 381120, 'steps': 7939, 'loss/train': 2.2404613494873047} +07/25/2024 12:02:47 - INFO - __main__ - Step 7941: {'lr': 0.0004971044761300696, 'samples': 381168, 'steps': 7940, 'loss/train': 1.780259132385254} +07/25/2024 12:02:47 - INFO - __main__ - Step 7942: {'lr': 0.0004971036777545058, 'samples': 381216, 'steps': 7941, 'loss/train': 2.0584347248077393} +07/25/2024 12:02:48 - INFO - __main__ - Step 7943: {'lr': 0.0004971028792695315, 'samples': 381264, 'steps': 7942, 'loss/train': 1.8533830642700195} +07/25/2024 12:02:48 - INFO - __main__ - Step 7944: {'lr': 0.0004971020806751468, 'samples': 381312, 'steps': 7943, 'loss/train': 1.5160871744155884} +07/25/2024 12:02:48 - INFO - __main__ - Step 7945: {'lr': 0.0004971012819713524, 'samples': 381360, 'steps': 7944, 'loss/train': 1.8905208110809326} +07/25/2024 12:02:48 - INFO - __main__ - Step 7946: {'lr': 0.0004971004831581484, 'samples': 381408, 'steps': 7945, 'loss/train': 2.269307851791382} +07/25/2024 12:02:49 - INFO - __main__ - Step 7947: {'lr': 0.0004970996842355352, 'samples': 381456, 'steps': 7946, 'loss/train': 1.76309072971344} +07/25/2024 12:02:49 - INFO - __main__ - Step 7948: {'lr': 0.0004970988852035132, 'samples': 381504, 'steps': 7947, 'loss/train': 2.0200109481811523} +07/25/2024 12:02:49 - INFO - __main__ - Step 7949: {'lr': 0.0004970980860620829, 'samples': 381552, 'steps': 7948, 'loss/train': 3.5589497089385986} +07/25/2024 12:02:50 - INFO - __main__ - Step 7950: {'lr': 0.0004970972868112443, 'samples': 381600, 'steps': 7949, 'loss/train': 2.5685667991638184} +07/25/2024 12:02:50 - INFO - __main__ - Step 7951: {'lr': 0.000497096487450998, 'samples': 381648, 'steps': 7950, 'loss/train': 1.2523618936538696} +07/25/2024 12:02:50 - INFO - __main__ - Step 7952: {'lr': 0.0004970956879813443, 'samples': 381696, 'steps': 7951, 'loss/train': 1.7171690464019775} +07/25/2024 12:02:50 - INFO - __main__ - Step 7953: {'lr': 0.0004970948884022835, 'samples': 381744, 'steps': 7952, 'loss/train': 1.866081953048706} +07/25/2024 12:02:51 - INFO - __main__ - Step 7954: {'lr': 0.000497094088713816, 'samples': 381792, 'steps': 7953, 'loss/train': 1.8984737396240234} +07/25/2024 12:02:51 - INFO - __main__ - Step 7955: {'lr': 0.0004970932889159423, 'samples': 381840, 'steps': 7954, 'loss/train': 2.4222612380981445} +07/25/2024 12:02:51 - INFO - __main__ - Step 7956: {'lr': 0.0004970924890086625, 'samples': 381888, 'steps': 7955, 'loss/train': 2.5676634311676025} +07/25/2024 12:02:52 - INFO - __main__ - Step 7957: {'lr': 0.0004970916889919771, 'samples': 381936, 'steps': 7956, 'loss/train': 1.8541665077209473} +07/25/2024 12:02:52 - INFO - __main__ - Step 7958: {'lr': 0.0004970908888658864, 'samples': 381984, 'steps': 7957, 'loss/train': 1.9855396747589111} +07/25/2024 12:02:52 - INFO - __main__ - Step 7959: {'lr': 0.0004970900886303908, 'samples': 382032, 'steps': 7958, 'loss/train': 1.5837342739105225} +07/25/2024 12:02:52 - INFO - __main__ - Step 7960: {'lr': 0.0004970892882854907, 'samples': 382080, 'steps': 7959, 'loss/train': 1.9351837635040283} +07/25/2024 12:02:53 - INFO - __main__ - Step 7961: {'lr': 0.0004970884878311863, 'samples': 382128, 'steps': 7960, 'loss/train': 2.368110179901123} +07/25/2024 12:02:53 - INFO - __main__ - Step 7962: {'lr': 0.0004970876872674781, 'samples': 382176, 'steps': 7961, 'loss/train': 2.3467650413513184} +07/25/2024 12:02:53 - INFO - __main__ - Step 7963: {'lr': 0.0004970868865943663, 'samples': 382224, 'steps': 7962, 'loss/train': 1.775335431098938} +07/25/2024 12:02:54 - INFO - __main__ - Step 7964: {'lr': 0.0004970860858118514, 'samples': 382272, 'steps': 7963, 'loss/train': 2.053800344467163} +07/25/2024 12:02:54 - INFO - __main__ - Step 7965: {'lr': 0.0004970852849199338, 'samples': 382320, 'steps': 7964, 'loss/train': 1.850450873374939} +07/25/2024 12:02:54 - DEBUG - datasets.packaged_modules.json.json - Batch of 10494233 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 12:02:54 - INFO - __main__ - Step 7966: {'lr': 0.0004970844839186136, 'samples': 382368, 'steps': 7965, 'loss/train': 1.3622199296951294} +07/25/2024 12:02:54 - INFO - __main__ - Step 7967: {'lr': 0.0004970836828078914, 'samples': 382416, 'steps': 7966, 'loss/train': 2.207662343978882} +07/25/2024 12:02:55 - INFO - __main__ - Step 7968: {'lr': 0.0004970828815877675, 'samples': 382464, 'steps': 7967, 'loss/train': 1.9915270805358887} +07/25/2024 12:02:55 - INFO - __main__ - Step 7969: {'lr': 0.000497082080258242, 'samples': 382512, 'steps': 7968, 'loss/train': 1.7251427173614502} +07/25/2024 12:02:55 - INFO - __main__ - Step 7970: {'lr': 0.0004970812788193157, 'samples': 382560, 'steps': 7969, 'loss/train': 2.092514991760254} +07/25/2024 12:02:56 - INFO - __main__ - Step 7971: {'lr': 0.0004970804772709888, 'samples': 382608, 'steps': 7970, 'loss/train': 2.188264846801758} +07/25/2024 12:02:56 - INFO - __main__ - Step 7972: {'lr': 0.0004970796756132614, 'samples': 382656, 'steps': 7971, 'loss/train': 1.9657883644104004} +07/25/2024 12:02:56 - INFO - __main__ - Step 7973: {'lr': 0.0004970788738461341, 'samples': 382704, 'steps': 7972, 'loss/train': 1.9875072240829468} +07/25/2024 12:02:56 - INFO - __main__ - Step 7974: {'lr': 0.0004970780719696073, 'samples': 382752, 'steps': 7973, 'loss/train': 2.531735897064209} +07/25/2024 12:02:57 - INFO - __main__ - Step 7975: {'lr': 0.0004970772699836811, 'samples': 382800, 'steps': 7974, 'loss/train': 1.7136961221694946} +07/25/2024 12:02:57 - INFO - __main__ - Step 7976: {'lr': 0.0004970764678883561, 'samples': 382848, 'steps': 7975, 'loss/train': 2.2420060634613037} +07/25/2024 12:02:57 - INFO - __main__ - Step 7977: {'lr': 0.0004970756656836326, 'samples': 382896, 'steps': 7976, 'loss/train': 1.7125821113586426} +07/25/2024 12:02:57 - INFO - __main__ - Step 7978: {'lr': 0.0004970748633695108, 'samples': 382944, 'steps': 7977, 'loss/train': 1.2470617294311523} +07/25/2024 12:02:58 - INFO - __main__ - Step 7979: {'lr': 0.0004970740609459913, 'samples': 382992, 'steps': 7978, 'loss/train': 3.562842607498169} +07/25/2024 12:02:58 - INFO - __main__ - Step 7980: {'lr': 0.0004970732584130743, 'samples': 383040, 'steps': 7979, 'loss/train': 2.2040674686431885} +07/25/2024 12:02:58 - INFO - __main__ - Step 7981: {'lr': 0.0004970724557707601, 'samples': 383088, 'steps': 7980, 'loss/train': 2.18532133102417} +07/25/2024 12:02:59 - INFO - __main__ - Step 7982: {'lr': 0.0004970716530190491, 'samples': 383136, 'steps': 7981, 'loss/train': 2.2800636291503906} +07/25/2024 12:02:59 - INFO - __main__ - Step 7983: {'lr': 0.0004970708501579418, 'samples': 383184, 'steps': 7982, 'loss/train': 2.134655237197876} +07/25/2024 12:02:59 - INFO - __main__ - Step 7984: {'lr': 0.0004970700471874384, 'samples': 383232, 'steps': 7983, 'loss/train': 1.838072419166565} +07/25/2024 12:02:59 - INFO - __main__ - Step 7985: {'lr': 0.0004970692441075394, 'samples': 383280, 'steps': 7984, 'loss/train': 1.6481906175613403} +07/25/2024 12:03:00 - INFO - __main__ - Step 7986: {'lr': 0.000497068440918245, 'samples': 383328, 'steps': 7985, 'loss/train': 2.183335304260254} +07/25/2024 12:03:00 - INFO - __main__ - Step 7987: {'lr': 0.0004970676376195556, 'samples': 383376, 'steps': 7986, 'loss/train': 1.8575806617736816} +07/25/2024 12:03:00 - INFO - __main__ - Step 7988: {'lr': 0.0004970668342114716, 'samples': 383424, 'steps': 7987, 'loss/train': 2.4857664108276367} +07/25/2024 12:03:01 - INFO - __main__ - Step 7989: {'lr': 0.0004970660306939932, 'samples': 383472, 'steps': 7988, 'loss/train': 2.0260684490203857} +07/25/2024 12:03:01 - INFO - __main__ - Step 7990: {'lr': 0.0004970652270671211, 'samples': 383520, 'steps': 7989, 'loss/train': 1.3080371618270874} +07/25/2024 12:03:01 - INFO - __main__ - Step 7991: {'lr': 0.0004970644233308554, 'samples': 383568, 'steps': 7990, 'loss/train': 2.335736036300659} +07/25/2024 12:03:01 - INFO - __main__ - Step 7992: {'lr': 0.0004970636194851964, 'samples': 383616, 'steps': 7991, 'loss/train': 1.7671667337417603} +07/25/2024 12:03:02 - INFO - __main__ - Step 7993: {'lr': 0.0004970628155301445, 'samples': 383664, 'steps': 7992, 'loss/train': 1.641112208366394} +07/25/2024 12:03:02 - INFO - __main__ - Step 7994: {'lr': 0.0004970620114657003, 'samples': 383712, 'steps': 7993, 'loss/train': 1.6101254224777222} +07/25/2024 12:03:02 - INFO - __main__ - Step 7995: {'lr': 0.0004970612072918637, 'samples': 383760, 'steps': 7994, 'loss/train': 1.8751543760299683} +07/25/2024 12:03:03 - INFO - __main__ - Step 7996: {'lr': 0.0004970604030086355, 'samples': 383808, 'steps': 7995, 'loss/train': 2.113722801208496} +07/25/2024 12:03:03 - INFO - __main__ - Step 7997: {'lr': 0.0004970595986160159, 'samples': 383856, 'steps': 7996, 'loss/train': 2.1689984798431396} +07/25/2024 12:03:03 - INFO - __main__ - Step 7998: {'lr': 0.0004970587941140052, 'samples': 383904, 'steps': 7997, 'loss/train': 2.1324546337127686} +07/25/2024 12:03:03 - INFO - __main__ - Step 7999: {'lr': 0.0004970579895026037, 'samples': 383952, 'steps': 7998, 'loss/train': 1.9486063718795776} +07/25/2024 12:03:04 - INFO - __main__ - Step 8000: {'lr': 0.000497057184781812, 'samples': 384000, 'steps': 7999, 'loss/train': 2.096236228942871} +07/25/2024 12:03:04 - INFO - __main__ - Step 8001: {'lr': 0.0004970563799516302, 'samples': 384048, 'steps': 8000, 'loss/train': 1.728959083557129} +07/25/2024 12:03:04 - INFO - __main__ - Step 8002: {'lr': 0.0004970555750120587, 'samples': 384096, 'steps': 8001, 'loss/train': 1.042281150817871} +07/25/2024 12:03:05 - INFO - __main__ - Step 8003: {'lr': 0.0004970547699630981, 'samples': 384144, 'steps': 8002, 'loss/train': 2.0095205307006836} +07/25/2024 12:03:05 - INFO - __main__ - Step 8004: {'lr': 0.0004970539648047485, 'samples': 384192, 'steps': 8003, 'loss/train': 1.7028069496154785} +07/25/2024 12:03:05 - INFO - __main__ - Step 8005: {'lr': 0.0004970531595370102, 'samples': 384240, 'steps': 8004, 'loss/train': 2.1564416885375977} +07/25/2024 12:03:05 - INFO - __main__ - Step 8006: {'lr': 0.0004970523541598838, 'samples': 384288, 'steps': 8005, 'loss/train': 1.7530648708343506} +07/25/2024 12:03:06 - INFO - __main__ - Step 8007: {'lr': 0.0004970515486733695, 'samples': 384336, 'steps': 8006, 'loss/train': 2.3120474815368652} +07/25/2024 12:03:06 - INFO - __main__ - Step 8008: {'lr': 0.0004970507430774677, 'samples': 384384, 'steps': 8007, 'loss/train': 1.6023083925247192} +07/25/2024 12:03:06 - INFO - __main__ - Step 8009: {'lr': 0.0004970499373721787, 'samples': 384432, 'steps': 8008, 'loss/train': 2.017509937286377} +07/25/2024 12:03:07 - INFO - __main__ - Step 8010: {'lr': 0.0004970491315575031, 'samples': 384480, 'steps': 8009, 'loss/train': 2.4161651134490967} +07/25/2024 12:03:07 - INFO - __main__ - Step 8011: {'lr': 0.0004970483256334409, 'samples': 384528, 'steps': 8010, 'loss/train': 1.9873456954956055} +07/25/2024 12:03:07 - INFO - __main__ - Step 8012: {'lr': 0.0004970475195999926, 'samples': 384576, 'steps': 8011, 'loss/train': 2.265312910079956} +07/25/2024 12:03:07 - INFO - __main__ - Step 8013: {'lr': 0.0004970467134571587, 'samples': 384624, 'steps': 8012, 'loss/train': 1.6406430006027222} +07/25/2024 12:03:08 - INFO - __main__ - Step 8014: {'lr': 0.0004970459072049393, 'samples': 384672, 'steps': 8013, 'loss/train': 2.1848297119140625} +07/25/2024 12:03:08 - INFO - __main__ - Step 8015: {'lr': 0.0004970451008433351, 'samples': 384720, 'steps': 8014, 'loss/train': 2.0518949031829834} +07/25/2024 12:03:08 - INFO - __main__ - Step 8016: {'lr': 0.0004970442943723461, 'samples': 384768, 'steps': 8015, 'loss/train': 1.5622994899749756} +07/25/2024 12:03:09 - INFO - __main__ - Step 8017: {'lr': 0.0004970434877919728, 'samples': 384816, 'steps': 8016, 'loss/train': 2.115623950958252} +07/25/2024 12:03:09 - INFO - __main__ - Step 8018: {'lr': 0.0004970426811022156, 'samples': 384864, 'steps': 8017, 'loss/train': 2.124375104904175} +07/25/2024 12:03:09 - INFO - __main__ - Step 8019: {'lr': 0.0004970418743030749, 'samples': 384912, 'steps': 8018, 'loss/train': 2.2502074241638184} +07/25/2024 12:03:09 - INFO - __main__ - Step 8020: {'lr': 0.0004970410673945508, 'samples': 384960, 'steps': 8019, 'loss/train': 2.3279287815093994} +07/25/2024 12:03:10 - INFO - __main__ - Step 8021: {'lr': 0.000497040260376644, 'samples': 385008, 'steps': 8020, 'loss/train': 1.8021186590194702} +07/25/2024 12:03:10 - INFO - __main__ - Step 8022: {'lr': 0.0004970394532493546, 'samples': 385056, 'steps': 8021, 'loss/train': 2.0383663177490234} +07/25/2024 12:03:10 - INFO - __main__ - Step 8023: {'lr': 0.0004970386460126831, 'samples': 385104, 'steps': 8022, 'loss/train': 1.8075381517410278} +07/25/2024 12:03:11 - INFO - __main__ - Step 8024: {'lr': 0.0004970378386666297, 'samples': 385152, 'steps': 8023, 'loss/train': 2.022270917892456} +07/25/2024 12:03:11 - INFO - __main__ - Step 8025: {'lr': 0.0004970370312111951, 'samples': 385200, 'steps': 8024, 'loss/train': 1.5929207801818848} +07/25/2024 12:03:11 - INFO - __main__ - Step 8026: {'lr': 0.0004970362236463793, 'samples': 385248, 'steps': 8025, 'loss/train': 1.0823876857757568} +07/25/2024 12:03:11 - INFO - __main__ - Step 8027: {'lr': 0.0004970354159721828, 'samples': 385296, 'steps': 8026, 'loss/train': 2.1148788928985596} +07/25/2024 12:03:12 - INFO - __main__ - Step 8028: {'lr': 0.0004970346081886059, 'samples': 385344, 'steps': 8027, 'loss/train': 2.039558172225952} +07/25/2024 12:03:12 - INFO - __main__ - Step 8029: {'lr': 0.0004970338002956489, 'samples': 385392, 'steps': 8028, 'loss/train': 2.0793802738189697} +07/25/2024 12:03:12 - INFO - __main__ - Step 8030: {'lr': 0.0004970329922933124, 'samples': 385440, 'steps': 8029, 'loss/train': 1.8463129997253418} +07/25/2024 12:03:13 - INFO - __main__ - Step 8031: {'lr': 0.0004970321841815965, 'samples': 385488, 'steps': 8030, 'loss/train': 2.2261877059936523} +07/25/2024 12:03:13 - INFO - __main__ - Step 8032: {'lr': 0.0004970313759605019, 'samples': 385536, 'steps': 8031, 'loss/train': 1.6415021419525146} +07/25/2024 12:03:13 - INFO - __main__ - Step 8033: {'lr': 0.0004970305676300286, 'samples': 385584, 'steps': 8032, 'loss/train': 1.8642452955245972} +07/25/2024 12:03:13 - INFO - __main__ - Step 8034: {'lr': 0.0004970297591901769, 'samples': 385632, 'steps': 8033, 'loss/train': 1.9897427558898926} +07/25/2024 12:03:14 - INFO - __main__ - Step 8035: {'lr': 0.0004970289506409475, 'samples': 385680, 'steps': 8034, 'loss/train': 2.0796568393707275} +07/25/2024 12:03:14 - INFO - __main__ - Step 8036: {'lr': 0.0004970281419823407, 'samples': 385728, 'steps': 8035, 'loss/train': 2.1709940433502197} +07/25/2024 12:03:14 - INFO - __main__ - Step 8037: {'lr': 0.0004970273332143566, 'samples': 385776, 'steps': 8036, 'loss/train': 2.177652359008789} +07/25/2024 12:03:15 - INFO - __main__ - Step 8038: {'lr': 0.0004970265243369958, 'samples': 385824, 'steps': 8037, 'loss/train': 3.1108851432800293} +07/25/2024 12:03:15 - INFO - __main__ - Step 8039: {'lr': 0.0004970257153502586, 'samples': 385872, 'steps': 8038, 'loss/train': 2.274742364883423} +07/25/2024 12:03:15 - INFO - __main__ - Step 8040: {'lr': 0.0004970249062541453, 'samples': 385920, 'steps': 8039, 'loss/train': 2.1882057189941406} +07/25/2024 12:03:15 - INFO - __main__ - Step 8041: {'lr': 0.0004970240970486562, 'samples': 385968, 'steps': 8040, 'loss/train': 2.1998345851898193} +07/25/2024 12:03:16 - INFO - __main__ - Step 8042: {'lr': 0.000497023287733792, 'samples': 386016, 'steps': 8041, 'loss/train': 1.5757616758346558} +07/25/2024 12:03:16 - INFO - __main__ - Step 8043: {'lr': 0.0004970224783095526, 'samples': 386064, 'steps': 8042, 'loss/train': 1.7047439813613892} +07/25/2024 12:03:16 - INFO - __main__ - Step 8044: {'lr': 0.0004970216687759385, 'samples': 386112, 'steps': 8043, 'loss/train': 1.8167213201522827} +07/25/2024 12:03:17 - INFO - __main__ - Step 8045: {'lr': 0.0004970208591329504, 'samples': 386160, 'steps': 8044, 'loss/train': 1.5312633514404297} +07/25/2024 12:03:17 - INFO - __main__ - Step 8046: {'lr': 0.0004970200493805882, 'samples': 386208, 'steps': 8045, 'loss/train': 1.8641835451126099} +07/25/2024 12:03:17 - INFO - __main__ - Step 8047: {'lr': 0.0004970192395188525, 'samples': 386256, 'steps': 8046, 'loss/train': 1.6403553485870361} +07/25/2024 12:03:17 - INFO - __main__ - Step 8048: {'lr': 0.0004970184295477435, 'samples': 386304, 'steps': 8047, 'loss/train': 2.1360204219818115} +07/25/2024 12:03:18 - INFO - __main__ - Step 8049: {'lr': 0.0004970176194672618, 'samples': 386352, 'steps': 8048, 'loss/train': 2.0975160598754883} +07/25/2024 12:03:18 - INFO - __main__ - Step 8050: {'lr': 0.0004970168092774075, 'samples': 386400, 'steps': 8049, 'loss/train': 1.7410045862197876} +07/25/2024 12:03:18 - INFO - __main__ - Step 8051: {'lr': 0.0004970159989781812, 'samples': 386448, 'steps': 8050, 'loss/train': 2.1135642528533936} +07/25/2024 12:03:19 - INFO - __main__ - Step 8052: {'lr': 0.0004970151885695831, 'samples': 386496, 'steps': 8051, 'loss/train': 1.3343361616134644} +07/25/2024 12:03:19 - INFO - __main__ - Step 8053: {'lr': 0.0004970143780516135, 'samples': 386544, 'steps': 8052, 'loss/train': 2.0109565258026123} +07/25/2024 12:03:19 - INFO - __main__ - Step 8054: {'lr': 0.000497013567424273, 'samples': 386592, 'steps': 8053, 'loss/train': 1.9966470003128052} +07/25/2024 12:03:19 - INFO - __main__ - Step 8055: {'lr': 0.0004970127566875616, 'samples': 386640, 'steps': 8054, 'loss/train': 1.8832175731658936} +07/25/2024 12:03:20 - INFO - __main__ - Step 8056: {'lr': 0.00049701194584148, 'samples': 386688, 'steps': 8055, 'loss/train': 2.224273443222046} +07/25/2024 12:03:20 - INFO - __main__ - Step 8057: {'lr': 0.0004970111348860285, 'samples': 386736, 'steps': 8056, 'loss/train': 2.0562946796417236} +07/25/2024 12:03:20 - INFO - __main__ - Step 8058: {'lr': 0.0004970103238212074, 'samples': 386784, 'steps': 8057, 'loss/train': 1.8468151092529297} +07/25/2024 12:03:21 - INFO - __main__ - Step 8059: {'lr': 0.0004970095126470169, 'samples': 386832, 'steps': 8058, 'loss/train': 2.080401659011841} +07/25/2024 12:03:21 - INFO - __main__ - Step 8060: {'lr': 0.0004970087013634576, 'samples': 386880, 'steps': 8059, 'loss/train': 1.9534577131271362} +07/25/2024 12:03:21 - INFO - __main__ - Step 8061: {'lr': 0.0004970078899705297, 'samples': 386928, 'steps': 8060, 'loss/train': 1.5583349466323853} +07/25/2024 12:03:21 - INFO - __main__ - Step 8062: {'lr': 0.0004970070784682337, 'samples': 386976, 'steps': 8061, 'loss/train': 2.121882200241089} +07/25/2024 12:03:22 - INFO - __main__ - Step 8063: {'lr': 0.0004970062668565699, 'samples': 387024, 'steps': 8062, 'loss/train': 2.377234697341919} +07/25/2024 12:03:22 - INFO - __main__ - Step 8064: {'lr': 0.0004970054551355386, 'samples': 387072, 'steps': 8063, 'loss/train': 2.110187292098999} +07/25/2024 12:03:22 - INFO - __main__ - Step 8065: {'lr': 0.0004970046433051403, 'samples': 387120, 'steps': 8064, 'loss/train': 1.8644623756408691} +07/25/2024 12:03:23 - INFO - __main__ - Step 8066: {'lr': 0.0004970038313653752, 'samples': 387168, 'steps': 8065, 'loss/train': 1.9496945142745972} +07/25/2024 12:03:23 - INFO - __main__ - Step 8067: {'lr': 0.0004970030193162437, 'samples': 387216, 'steps': 8066, 'loss/train': 1.2324872016906738} +07/25/2024 12:03:23 - INFO - __main__ - Step 8068: {'lr': 0.0004970022071577461, 'samples': 387264, 'steps': 8067, 'loss/train': 1.9723989963531494} +07/25/2024 12:03:23 - INFO - __main__ - Step 8069: {'lr': 0.000497001394889883, 'samples': 387312, 'steps': 8068, 'loss/train': 1.5229538679122925} +07/25/2024 12:03:24 - INFO - __main__ - Step 8070: {'lr': 0.0004970005825126546, 'samples': 387360, 'steps': 8069, 'loss/train': 1.874909520149231} +07/25/2024 12:03:24 - INFO - __main__ - Step 8071: {'lr': 0.0004969997700260611, 'samples': 387408, 'steps': 8070, 'loss/train': 2.291747570037842} +07/25/2024 12:03:24 - INFO - __main__ - Step 8072: {'lr': 0.0004969989574301031, 'samples': 387456, 'steps': 8071, 'loss/train': 2.0355472564697266} +07/25/2024 12:03:25 - INFO - __main__ - Step 8073: {'lr': 0.000496998144724781, 'samples': 387504, 'steps': 8072, 'loss/train': 2.0224428176879883} +07/25/2024 12:03:25 - INFO - __main__ - Step 8074: {'lr': 0.0004969973319100949, 'samples': 387552, 'steps': 8073, 'loss/train': 1.849591612815857} +07/25/2024 12:03:25 - INFO - __main__ - Step 8075: {'lr': 0.0004969965189860453, 'samples': 387600, 'steps': 8074, 'loss/train': 1.9684902429580688} +07/25/2024 12:03:25 - INFO - __main__ - Step 8076: {'lr': 0.0004969957059526327, 'samples': 387648, 'steps': 8075, 'loss/train': 2.2180347442626953} +07/25/2024 12:03:26 - INFO - __main__ - Step 8077: {'lr': 0.0004969948928098572, 'samples': 387696, 'steps': 8076, 'loss/train': 1.5256413221359253} +07/25/2024 12:03:26 - INFO - __main__ - Step 8078: {'lr': 0.0004969940795577193, 'samples': 387744, 'steps': 8077, 'loss/train': 1.8957446813583374} +07/25/2024 12:03:26 - INFO - __main__ - Step 8079: {'lr': 0.0004969932661962194, 'samples': 387792, 'steps': 8078, 'loss/train': 2.1296632289886475} +07/25/2024 12:03:26 - INFO - __main__ - Step 8080: {'lr': 0.0004969924527253578, 'samples': 387840, 'steps': 8079, 'loss/train': 2.0588624477386475} +07/25/2024 12:03:27 - INFO - __main__ - Step 8081: {'lr': 0.0004969916391451348, 'samples': 387888, 'steps': 8080, 'loss/train': 2.5517427921295166} +07/25/2024 12:03:27 - INFO - __main__ - Step 8082: {'lr': 0.0004969908254555508, 'samples': 387936, 'steps': 8081, 'loss/train': 0.9040146470069885} +07/25/2024 12:03:27 - INFO - __main__ - Step 8083: {'lr': 0.0004969900116566063, 'samples': 387984, 'steps': 8082, 'loss/train': 1.0654100179672241} +07/25/2024 12:03:28 - INFO - __main__ - Step 8084: {'lr': 0.0004969891977483015, 'samples': 388032, 'steps': 8083, 'loss/train': 1.8841736316680908} +07/25/2024 12:03:28 - INFO - __main__ - Step 8085: {'lr': 0.0004969883837306369, 'samples': 388080, 'steps': 8084, 'loss/train': 1.7826277017593384} +07/25/2024 12:03:28 - INFO - __main__ - Step 8086: {'lr': 0.0004969875696036125, 'samples': 388128, 'steps': 8085, 'loss/train': 2.292834997177124} +07/25/2024 12:03:28 - INFO - __main__ - Step 8087: {'lr': 0.0004969867553672291, 'samples': 388176, 'steps': 8086, 'loss/train': 1.520822525024414} +07/25/2024 12:03:29 - INFO - __main__ - Step 8088: {'lr': 0.0004969859410214869, 'samples': 388224, 'steps': 8087, 'loss/train': 1.5534956455230713} +07/25/2024 12:03:29 - INFO - __main__ - Step 8089: {'lr': 0.0004969851265663862, 'samples': 388272, 'steps': 8088, 'loss/train': 2.2120771408081055} +07/25/2024 12:03:29 - INFO - __main__ - Step 8090: {'lr': 0.0004969843120019273, 'samples': 388320, 'steps': 8089, 'loss/train': 2.4512319564819336} +07/25/2024 12:03:30 - INFO - __main__ - Step 8091: {'lr': 0.0004969834973281109, 'samples': 388368, 'steps': 8090, 'loss/train': 1.3260796070098877} +07/25/2024 12:03:30 - INFO - __main__ - Step 8092: {'lr': 0.000496982682544937, 'samples': 388416, 'steps': 8091, 'loss/train': 2.367117404937744} +07/25/2024 12:03:30 - INFO - __main__ - Step 8093: {'lr': 0.000496981867652406, 'samples': 388464, 'steps': 8092, 'loss/train': 2.1354761123657227} +07/25/2024 12:03:30 - INFO - __main__ - Step 8094: {'lr': 0.0004969810526505186, 'samples': 388512, 'steps': 8093, 'loss/train': 2.3095850944519043} +07/25/2024 12:03:31 - INFO - __main__ - Step 8095: {'lr': 0.0004969802375392747, 'samples': 388560, 'steps': 8094, 'loss/train': 2.0964877605438232} +07/25/2024 12:03:31 - INFO - __main__ - Step 8096: {'lr': 0.0004969794223186749, 'samples': 388608, 'steps': 8095, 'loss/train': 2.413630485534668} +07/25/2024 12:03:31 - INFO - __main__ - Step 8097: {'lr': 0.0004969786069887195, 'samples': 388656, 'steps': 8096, 'loss/train': 1.660788655281067} +07/25/2024 12:03:32 - INFO - __main__ - Step 8098: {'lr': 0.0004969777915494089, 'samples': 388704, 'steps': 8097, 'loss/train': 2.2390222549438477} +07/25/2024 12:03:32 - INFO - __main__ - Step 8099: {'lr': 0.0004969769760007435, 'samples': 388752, 'steps': 8098, 'loss/train': 1.582593321800232} +07/25/2024 12:03:32 - INFO - __main__ - Step 8100: {'lr': 0.0004969761603427237, 'samples': 388800, 'steps': 8099, 'loss/train': 2.178642749786377} +07/25/2024 12:03:32 - INFO - __main__ - Step 8101: {'lr': 0.0004969753445753497, 'samples': 388848, 'steps': 8100, 'loss/train': 1.9768829345703125} +07/25/2024 12:03:33 - INFO - __main__ - Step 8102: {'lr': 0.000496974528698622, 'samples': 388896, 'steps': 8101, 'loss/train': 1.7740626335144043} +07/25/2024 12:03:33 - INFO - __main__ - Step 8103: {'lr': 0.0004969737127125407, 'samples': 388944, 'steps': 8102, 'loss/train': 1.8839836120605469} +07/25/2024 12:03:33 - INFO - __main__ - Step 8104: {'lr': 0.0004969728966171066, 'samples': 388992, 'steps': 8103, 'loss/train': 2.3725202083587646} +07/25/2024 12:03:34 - INFO - __main__ - Step 8105: {'lr': 0.0004969720804123196, 'samples': 389040, 'steps': 8104, 'loss/train': 2.1683685779571533} +07/25/2024 12:03:34 - INFO - __main__ - Step 8106: {'lr': 0.0004969712640981804, 'samples': 389088, 'steps': 8105, 'loss/train': 1.9111028909683228} +07/25/2024 12:03:34 - INFO - __main__ - Step 8107: {'lr': 0.0004969704476746892, 'samples': 389136, 'steps': 8106, 'loss/train': 0.6603255271911621} +07/25/2024 12:03:34 - INFO - __main__ - Step 8108: {'lr': 0.0004969696311418465, 'samples': 389184, 'steps': 8107, 'loss/train': 1.7322754859924316} +07/25/2024 12:03:35 - INFO - __main__ - Step 8109: {'lr': 0.0004969688144996525, 'samples': 389232, 'steps': 8108, 'loss/train': 2.9253222942352295} +07/25/2024 12:03:35 - INFO - __main__ - Step 8110: {'lr': 0.0004969679977481076, 'samples': 389280, 'steps': 8109, 'loss/train': 1.9060004949569702} +07/25/2024 12:03:35 - INFO - __main__ - Step 8111: {'lr': 0.0004969671808872123, 'samples': 389328, 'steps': 8110, 'loss/train': 1.803330659866333} +07/25/2024 12:03:36 - INFO - __main__ - Step 8112: {'lr': 0.0004969663639169668, 'samples': 389376, 'steps': 8111, 'loss/train': 1.2852659225463867} +07/25/2024 12:03:36 - INFO - __main__ - Step 8113: {'lr': 0.0004969655468373715, 'samples': 389424, 'steps': 8112, 'loss/train': 1.8357855081558228} +07/25/2024 12:03:36 - INFO - __main__ - Step 8114: {'lr': 0.0004969647296484268, 'samples': 389472, 'steps': 8113, 'loss/train': 1.74209463596344} +07/25/2024 12:03:36 - INFO - __main__ - Step 8115: {'lr': 0.000496963912350133, 'samples': 389520, 'steps': 8114, 'loss/train': 0.8925318121910095} +07/25/2024 12:03:37 - INFO - __main__ - Step 8116: {'lr': 0.0004969630949424905, 'samples': 389568, 'steps': 8115, 'loss/train': 2.120093584060669} +07/25/2024 12:03:37 - INFO - __main__ - Step 8117: {'lr': 0.0004969622774254997, 'samples': 389616, 'steps': 8116, 'loss/train': 2.1255922317504883} +07/25/2024 12:03:37 - INFO - __main__ - Step 8118: {'lr': 0.0004969614597991609, 'samples': 389664, 'steps': 8117, 'loss/train': 2.189537525177002} +07/25/2024 12:03:38 - INFO - __main__ - Step 8119: {'lr': 0.0004969606420634744, 'samples': 389712, 'steps': 8118, 'loss/train': 2.2892394065856934} +07/25/2024 12:03:38 - INFO - __main__ - Step 8120: {'lr': 0.0004969598242184407, 'samples': 389760, 'steps': 8119, 'loss/train': 1.7902686595916748} +07/25/2024 12:03:38 - INFO - __main__ - Step 8121: {'lr': 0.0004969590062640602, 'samples': 389808, 'steps': 8120, 'loss/train': 1.7056976556777954} +07/25/2024 12:03:38 - INFO - __main__ - Step 8122: {'lr': 0.0004969581882003331, 'samples': 389856, 'steps': 8121, 'loss/train': 2.456559658050537} +07/25/2024 12:03:39 - INFO - __main__ - Step 8123: {'lr': 0.0004969573700272598, 'samples': 389904, 'steps': 8122, 'loss/train': 1.8766855001449585} +07/25/2024 12:03:39 - INFO - __main__ - Step 8124: {'lr': 0.0004969565517448408, 'samples': 389952, 'steps': 8123, 'loss/train': 1.9116169214248657} +07/25/2024 12:03:39 - INFO - __main__ - Step 8125: {'lr': 0.0004969557333530763, 'samples': 390000, 'steps': 8124, 'loss/train': 2.0922350883483887} +07/25/2024 12:03:40 - INFO - __main__ - Step 8126: {'lr': 0.0004969549148519667, 'samples': 390048, 'steps': 8125, 'loss/train': 1.901181936264038} +07/25/2024 12:03:40 - INFO - __main__ - Step 8127: {'lr': 0.0004969540962415123, 'samples': 390096, 'steps': 8126, 'loss/train': 2.628939390182495} +07/25/2024 12:03:40 - INFO - __main__ - Step 8128: {'lr': 0.0004969532775217137, 'samples': 390144, 'steps': 8127, 'loss/train': 1.7026841640472412} +07/25/2024 12:03:40 - INFO - __main__ - Step 8129: {'lr': 0.0004969524586925711, 'samples': 390192, 'steps': 8128, 'loss/train': 1.8731786012649536} +07/25/2024 12:03:41 - INFO - __main__ - Step 8130: {'lr': 0.0004969516397540848, 'samples': 390240, 'steps': 8129, 'loss/train': 2.294612407684326} +07/25/2024 12:03:41 - INFO - __main__ - Step 8131: {'lr': 0.0004969508207062553, 'samples': 390288, 'steps': 8130, 'loss/train': 0.6887643933296204} +07/25/2024 12:03:41 - INFO - __main__ - Step 8132: {'lr': 0.0004969500015490829, 'samples': 390336, 'steps': 8131, 'loss/train': 1.9863766431808472} +07/25/2024 12:03:42 - INFO - __main__ - Step 8133: {'lr': 0.0004969491822825678, 'samples': 390384, 'steps': 8132, 'loss/train': 2.3063759803771973} +07/25/2024 12:03:42 - INFO - __main__ - Step 8134: {'lr': 0.0004969483629067107, 'samples': 390432, 'steps': 8133, 'loss/train': 1.8384217023849487} +07/25/2024 12:03:42 - INFO - __main__ - Step 8135: {'lr': 0.0004969475434215118, 'samples': 390480, 'steps': 8134, 'loss/train': 1.6774147748947144} +07/25/2024 12:03:42 - INFO - __main__ - Step 8136: {'lr': 0.0004969467238269713, 'samples': 390528, 'steps': 8135, 'loss/train': 2.0224239826202393} +07/25/2024 12:03:43 - INFO - __main__ - Step 8137: {'lr': 0.0004969459041230898, 'samples': 390576, 'steps': 8136, 'loss/train': 1.4840195178985596} +07/25/2024 12:03:43 - INFO - __main__ - Step 8138: {'lr': 0.0004969450843098675, 'samples': 390624, 'steps': 8137, 'loss/train': 1.9334561824798584} +07/25/2024 12:03:43 - INFO - __main__ - Step 8139: {'lr': 0.0004969442643873049, 'samples': 390672, 'steps': 8138, 'loss/train': 2.375011682510376} +07/25/2024 12:03:44 - INFO - __main__ - Step 8140: {'lr': 0.0004969434443554023, 'samples': 390720, 'steps': 8139, 'loss/train': 1.5128812789916992} +07/25/2024 12:03:44 - INFO - __main__ - Step 8141: {'lr': 0.0004969426242141601, 'samples': 390768, 'steps': 8140, 'loss/train': 2.927309036254883} +07/25/2024 12:03:44 - INFO - __main__ - Step 8142: {'lr': 0.0004969418039635784, 'samples': 390816, 'steps': 8141, 'loss/train': 2.2657275199890137} +07/25/2024 12:03:44 - INFO - __main__ - Step 8143: {'lr': 0.0004969409836036581, 'samples': 390864, 'steps': 8142, 'loss/train': 1.8849923610687256} +07/25/2024 12:03:45 - INFO - __main__ - Step 8144: {'lr': 0.0004969401631343991, 'samples': 390912, 'steps': 8143, 'loss/train': 2.1199657917022705} +07/25/2024 12:03:45 - INFO - __main__ - Step 8145: {'lr': 0.0004969393425558019, 'samples': 390960, 'steps': 8144, 'loss/train': 2.3165125846862793} +07/25/2024 12:03:45 - INFO - __main__ - Step 8146: {'lr': 0.0004969385218678669, 'samples': 391008, 'steps': 8145, 'loss/train': 1.8040236234664917} +07/25/2024 12:03:46 - INFO - __main__ - Step 8147: {'lr': 0.0004969377010705944, 'samples': 391056, 'steps': 8146, 'loss/train': 2.0863239765167236} +07/25/2024 12:03:46 - INFO - __main__ - Step 8148: {'lr': 0.0004969368801639849, 'samples': 391104, 'steps': 8147, 'loss/train': 2.404184341430664} +07/25/2024 12:03:46 - INFO - __main__ - Step 8149: {'lr': 0.0004969360591480386, 'samples': 391152, 'steps': 8148, 'loss/train': 1.9858651161193848} +07/25/2024 12:03:46 - INFO - __main__ - Step 8150: {'lr': 0.000496935238022756, 'samples': 391200, 'steps': 8149, 'loss/train': 2.5710389614105225} +07/25/2024 12:03:47 - INFO - __main__ - Step 8151: {'lr': 0.0004969344167881373, 'samples': 391248, 'steps': 8150, 'loss/train': 2.9212095737457275} +07/25/2024 12:03:47 - INFO - __main__ - Step 8152: {'lr': 0.0004969335954441831, 'samples': 391296, 'steps': 8151, 'loss/train': 2.03253173828125} +07/25/2024 12:03:47 - INFO - __main__ - Step 8153: {'lr': 0.0004969327739908935, 'samples': 391344, 'steps': 8152, 'loss/train': 1.347037672996521} +07/25/2024 12:03:48 - INFO - __main__ - Step 8154: {'lr': 0.000496931952428269, 'samples': 391392, 'steps': 8153, 'loss/train': 1.8791593313217163} +07/25/2024 12:03:48 - INFO - __main__ - Step 8155: {'lr': 0.00049693113075631, 'samples': 391440, 'steps': 8154, 'loss/train': 0.8019172549247742} +07/25/2024 12:03:48 - INFO - __main__ - Step 8156: {'lr': 0.0004969303089750168, 'samples': 391488, 'steps': 8155, 'loss/train': 2.0094542503356934} +07/25/2024 12:03:48 - INFO - __main__ - Step 8157: {'lr': 0.0004969294870843897, 'samples': 391536, 'steps': 8156, 'loss/train': 2.1368749141693115} +07/25/2024 12:03:49 - INFO - __main__ - Step 8158: {'lr': 0.0004969286650844293, 'samples': 391584, 'steps': 8157, 'loss/train': 3.3713133335113525} +07/25/2024 12:03:49 - INFO - __main__ - Step 8159: {'lr': 0.0004969278429751356, 'samples': 391632, 'steps': 8158, 'loss/train': 2.1152493953704834} +07/25/2024 12:03:49 - INFO - __main__ - Step 8160: {'lr': 0.0004969270207565093, 'samples': 391680, 'steps': 8159, 'loss/train': 2.3741655349731445} +07/25/2024 12:03:50 - INFO - __main__ - Step 8161: {'lr': 0.0004969261984285507, 'samples': 391728, 'steps': 8160, 'loss/train': 1.3042612075805664} +07/25/2024 12:03:50 - INFO - __main__ - Step 8162: {'lr': 0.0004969253759912599, 'samples': 391776, 'steps': 8161, 'loss/train': 2.2310142517089844} +07/25/2024 12:03:50 - INFO - __main__ - Step 8163: {'lr': 0.0004969245534446376, 'samples': 391824, 'steps': 8162, 'loss/train': 3.6545021533966064} +07/25/2024 12:03:50 - INFO - __main__ - Step 8164: {'lr': 0.000496923730788684, 'samples': 391872, 'steps': 8163, 'loss/train': 1.3567975759506226} +07/25/2024 12:03:51 - INFO - __main__ - Step 8165: {'lr': 0.0004969229080233994, 'samples': 391920, 'steps': 8164, 'loss/train': 2.053001642227173} +07/25/2024 12:03:51 - INFO - __main__ - Step 8166: {'lr': 0.0004969220851487844, 'samples': 391968, 'steps': 8165, 'loss/train': 1.2059261798858643} +07/25/2024 12:03:51 - INFO - __main__ - Step 8167: {'lr': 0.0004969212621648392, 'samples': 392016, 'steps': 8166, 'loss/train': 2.2335832118988037} +07/25/2024 12:03:51 - INFO - __main__ - Step 8168: {'lr': 0.0004969204390715641, 'samples': 392064, 'steps': 8167, 'loss/train': 3.0472888946533203} +07/25/2024 12:03:52 - INFO - __main__ - Step 8169: {'lr': 0.0004969196158689596, 'samples': 392112, 'steps': 8168, 'loss/train': 2.219451904296875} +07/25/2024 12:03:52 - INFO - __main__ - Step 8170: {'lr': 0.0004969187925570261, 'samples': 392160, 'steps': 8169, 'loss/train': 1.499874472618103} +07/25/2024 12:03:52 - INFO - __main__ - Step 8171: {'lr': 0.0004969179691357638, 'samples': 392208, 'steps': 8170, 'loss/train': 1.8685206174850464} +07/25/2024 12:03:53 - INFO - __main__ - Step 8172: {'lr': 0.0004969171456051731, 'samples': 392256, 'steps': 8171, 'loss/train': 2.2073380947113037} +07/25/2024 12:03:53 - INFO - __main__ - Step 8173: {'lr': 0.0004969163219652544, 'samples': 392304, 'steps': 8172, 'loss/train': 1.284214973449707} +07/25/2024 12:03:53 - INFO - __main__ - Step 8174: {'lr': 0.0004969154982160082, 'samples': 392352, 'steps': 8173, 'loss/train': 2.066328763961792} +07/25/2024 12:03:53 - INFO - __main__ - Step 8175: {'lr': 0.0004969146743574346, 'samples': 392400, 'steps': 8174, 'loss/train': 1.956792950630188} +07/25/2024 12:03:54 - INFO - __main__ - Step 8176: {'lr': 0.000496913850389534, 'samples': 392448, 'steps': 8175, 'loss/train': 1.7942644357681274} +07/25/2024 12:03:54 - INFO - __main__ - Step 8177: {'lr': 0.0004969130263123071, 'samples': 392496, 'steps': 8176, 'loss/train': 0.900884211063385} +07/25/2024 12:03:54 - INFO - __main__ - Step 8178: {'lr': 0.0004969122021257539, 'samples': 392544, 'steps': 8177, 'loss/train': 2.061025857925415} +07/25/2024 12:03:55 - INFO - __main__ - Step 8179: {'lr': 0.000496911377829875, 'samples': 392592, 'steps': 8178, 'loss/train': 0.8312451243400574} +07/25/2024 12:03:55 - INFO - __main__ - Step 8180: {'lr': 0.0004969105534246706, 'samples': 392640, 'steps': 8179, 'loss/train': 1.965819001197815} +07/25/2024 12:03:55 - INFO - __main__ - Step 8181: {'lr': 0.000496909728910141, 'samples': 392688, 'steps': 8180, 'loss/train': 2.2832062244415283} +07/25/2024 12:03:55 - INFO - __main__ - Step 8182: {'lr': 0.0004969089042862869, 'samples': 392736, 'steps': 8181, 'loss/train': 1.7777132987976074} +07/25/2024 12:03:56 - INFO - __main__ - Step 8183: {'lr': 0.0004969080795531084, 'samples': 392784, 'steps': 8182, 'loss/train': 2.071399688720703} +07/25/2024 12:03:56 - INFO - __main__ - Step 8184: {'lr': 0.0004969072547106058, 'samples': 392832, 'steps': 8183, 'loss/train': 2.0085220336914062} +07/25/2024 12:03:56 - INFO - __main__ - Step 8185: {'lr': 0.0004969064297587798, 'samples': 392880, 'steps': 8184, 'loss/train': 1.2488183975219727} +07/25/2024 12:03:57 - INFO - __main__ - Step 8186: {'lr': 0.0004969056046976304, 'samples': 392928, 'steps': 8185, 'loss/train': 1.916652798652649} +07/25/2024 12:03:57 - INFO - __main__ - Step 8187: {'lr': 0.000496904779527158, 'samples': 392976, 'steps': 8186, 'loss/train': 3.5331790447235107} +07/25/2024 12:03:57 - INFO - __main__ - Step 8188: {'lr': 0.0004969039542473633, 'samples': 393024, 'steps': 8187, 'loss/train': 1.7265712022781372} +07/25/2024 12:03:57 - INFO - __main__ - Step 8189: {'lr': 0.0004969031288582464, 'samples': 393072, 'steps': 8188, 'loss/train': 1.8495755195617676} +07/25/2024 12:03:58 - INFO - __main__ - Step 8190: {'lr': 0.0004969023033598077, 'samples': 393120, 'steps': 8189, 'loss/train': 1.4980957508087158} +07/25/2024 12:03:58 - INFO - __main__ - Step 8191: {'lr': 0.0004969014777520476, 'samples': 393168, 'steps': 8190, 'loss/train': 2.1069750785827637} +07/25/2024 12:03:58 - INFO - __main__ - Step 8192: {'lr': 0.0004969006520349663, 'samples': 393216, 'steps': 8191, 'loss/train': 2.065701723098755} +07/25/2024 12:03:59 - INFO - __main__ - Step 8193: {'lr': 0.0004968998262085644, 'samples': 393264, 'steps': 8192, 'loss/train': 2.3996798992156982} +07/25/2024 12:03:59 - INFO - __main__ - Step 8194: {'lr': 0.0004968990002728423, 'samples': 393312, 'steps': 8193, 'loss/train': 2.4022209644317627} +07/25/2024 12:03:59 - INFO - __main__ - Step 8195: {'lr': 0.0004968981742278, 'samples': 393360, 'steps': 8194, 'loss/train': 2.021348714828491} +07/25/2024 12:03:59 - INFO - __main__ - Step 8196: {'lr': 0.0004968973480734383, 'samples': 393408, 'steps': 8195, 'loss/train': 2.4462289810180664} +07/25/2024 12:04:00 - INFO - __main__ - Step 8197: {'lr': 0.0004968965218097573, 'samples': 393456, 'steps': 8196, 'loss/train': 1.3528800010681152} +07/25/2024 12:04:00 - INFO - __main__ - Step 8198: {'lr': 0.0004968956954367575, 'samples': 393504, 'steps': 8197, 'loss/train': 1.9324641227722168} +07/25/2024 12:04:00 - INFO - __main__ - Step 8199: {'lr': 0.0004968948689544391, 'samples': 393552, 'steps': 8198, 'loss/train': 2.058765172958374} +07/25/2024 12:04:01 - INFO - __main__ - Step 8200: {'lr': 0.0004968940423628026, 'samples': 393600, 'steps': 8199, 'loss/train': 2.45918869972229} +07/25/2024 12:04:01 - INFO - __main__ - Step 8201: {'lr': 0.0004968932156618484, 'samples': 393648, 'steps': 8200, 'loss/train': 2.0163278579711914} +07/25/2024 12:04:01 - INFO - __main__ - Step 8202: {'lr': 0.0004968923888515768, 'samples': 393696, 'steps': 8201, 'loss/train': 2.0421700477600098} +07/25/2024 12:04:01 - INFO - __main__ - Step 8203: {'lr': 0.000496891561931988, 'samples': 393744, 'steps': 8202, 'loss/train': 0.8782869577407837} +07/25/2024 12:04:02 - INFO - __main__ - Step 8204: {'lr': 0.0004968907349030827, 'samples': 393792, 'steps': 8203, 'loss/train': 1.5199339389801025} +07/25/2024 12:04:02 - INFO - __main__ - Step 8205: {'lr': 0.0004968899077648611, 'samples': 393840, 'steps': 8204, 'loss/train': 2.4464621543884277} +07/25/2024 12:04:02 - INFO - __main__ - Step 8206: {'lr': 0.0004968890805173235, 'samples': 393888, 'steps': 8205, 'loss/train': 1.6824333667755127} +07/25/2024 12:04:03 - INFO - __main__ - Step 8207: {'lr': 0.0004968882531604704, 'samples': 393936, 'steps': 8206, 'loss/train': 1.9463717937469482} +07/25/2024 12:04:03 - INFO - __main__ - Step 8208: {'lr': 0.000496887425694302, 'samples': 393984, 'steps': 8207, 'loss/train': 2.1283352375030518} +07/25/2024 12:04:03 - INFO - __main__ - Step 8209: {'lr': 0.0004968865981188189, 'samples': 394032, 'steps': 8208, 'loss/train': 1.2789169549942017} +07/25/2024 12:04:03 - INFO - __main__ - Step 8210: {'lr': 0.0004968857704340211, 'samples': 394080, 'steps': 8209, 'loss/train': 1.7039259672164917} +07/25/2024 12:04:04 - INFO - __main__ - Step 8211: {'lr': 0.0004968849426399094, 'samples': 394128, 'steps': 8210, 'loss/train': 5.811275959014893} +07/25/2024 12:04:04 - INFO - __main__ - Step 8212: {'lr': 0.0004968841147364839, 'samples': 394176, 'steps': 8211, 'loss/train': 1.4684823751449585} +07/25/2024 12:04:04 - INFO - __main__ - Step 8213: {'lr': 0.0004968832867237452, 'samples': 394224, 'steps': 8212, 'loss/train': 2.097271203994751} +07/25/2024 12:04:05 - INFO - __main__ - Step 8214: {'lr': 0.0004968824586016932, 'samples': 394272, 'steps': 8213, 'loss/train': 2.5311574935913086} +07/25/2024 12:04:05 - INFO - __main__ - Step 8215: {'lr': 0.0004968816303703288, 'samples': 394320, 'steps': 8214, 'loss/train': 2.5872762203216553} +07/25/2024 12:04:05 - INFO - __main__ - Step 8216: {'lr': 0.000496880802029652, 'samples': 394368, 'steps': 8215, 'loss/train': 0.8462949991226196} +07/25/2024 12:04:05 - INFO - __main__ - Step 8217: {'lr': 0.0004968799735796634, 'samples': 394416, 'steps': 8216, 'loss/train': 2.8814799785614014} +07/25/2024 12:04:06 - INFO - __main__ - Step 8218: {'lr': 0.0004968791450203631, 'samples': 394464, 'steps': 8217, 'loss/train': 1.9620633125305176} +07/25/2024 12:04:06 - INFO - __main__ - Step 8219: {'lr': 0.0004968783163517518, 'samples': 394512, 'steps': 8218, 'loss/train': 2.1567015647888184} +07/25/2024 12:04:06 - INFO - __main__ - Step 8220: {'lr': 0.0004968774875738297, 'samples': 394560, 'steps': 8219, 'loss/train': 2.0574045181274414} +07/25/2024 12:04:07 - INFO - __main__ - Step 8221: {'lr': 0.0004968766586865971, 'samples': 394608, 'steps': 8220, 'loss/train': 2.4184412956237793} +07/25/2024 12:04:07 - INFO - __main__ - Step 8222: {'lr': 0.0004968758296900544, 'samples': 394656, 'steps': 8221, 'loss/train': 2.061213254928589} +07/25/2024 12:04:07 - INFO - __main__ - Step 8223: {'lr': 0.0004968750005842021, 'samples': 394704, 'steps': 8222, 'loss/train': 2.397578001022339} +07/25/2024 12:04:07 - INFO - __main__ - Step 8224: {'lr': 0.0004968741713690404, 'samples': 394752, 'steps': 8223, 'loss/train': 1.8143737316131592} +07/25/2024 12:04:08 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488416 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 12:04:08 - INFO - __main__ - Step 8225: {'lr': 0.0004968733420445696, 'samples': 394800, 'steps': 8224, 'loss/train': 2.2035202980041504} +07/25/2024 12:04:08 - INFO - __main__ - Step 8226: {'lr': 0.0004968725126107904, 'samples': 394848, 'steps': 8225, 'loss/train': 2.412010669708252} +07/25/2024 12:04:08 - INFO - __main__ - Step 8227: {'lr': 0.000496871683067703, 'samples': 394896, 'steps': 8226, 'loss/train': 0.8133450150489807} +07/25/2024 12:04:09 - INFO - __main__ - Step 8228: {'lr': 0.0004968708534153076, 'samples': 394944, 'steps': 8227, 'loss/train': 1.485832691192627} +07/25/2024 12:04:09 - INFO - __main__ - Step 8229: {'lr': 0.0004968700236536047, 'samples': 394992, 'steps': 8228, 'loss/train': 1.3206204175949097} +07/25/2024 12:04:09 - INFO - __main__ - Step 8230: {'lr': 0.0004968691937825947, 'samples': 395040, 'steps': 8229, 'loss/train': 2.171898126602173} +07/25/2024 12:04:09 - INFO - __main__ - Step 8231: {'lr': 0.000496868363802278, 'samples': 395088, 'steps': 8230, 'loss/train': 2.4945805072784424} +07/25/2024 12:04:10 - INFO - __main__ - Step 8232: {'lr': 0.0004968675337126548, 'samples': 395136, 'steps': 8231, 'loss/train': 1.4826163053512573} +07/25/2024 12:04:10 - INFO - __main__ - Step 8233: {'lr': 0.0004968667035137257, 'samples': 395184, 'steps': 8232, 'loss/train': 1.2529455423355103} +07/25/2024 12:04:10 - INFO - __main__ - Step 8234: {'lr': 0.000496865873205491, 'samples': 395232, 'steps': 8233, 'loss/train': 1.7621060609817505} +07/25/2024 12:04:11 - INFO - __main__ - Step 8235: {'lr': 0.0004968650427879508, 'samples': 395280, 'steps': 8234, 'loss/train': 3.2729668617248535} +07/25/2024 12:04:11 - INFO - __main__ - Step 8236: {'lr': 0.0004968642122611057, 'samples': 395328, 'steps': 8235, 'loss/train': 2.4850921630859375} +07/25/2024 12:04:11 - INFO - __main__ - Step 8237: {'lr': 0.0004968633816249562, 'samples': 395376, 'steps': 8236, 'loss/train': 2.182070732116699} +07/25/2024 12:04:11 - INFO - __main__ - Step 8238: {'lr': 0.0004968625508795024, 'samples': 395424, 'steps': 8237, 'loss/train': 1.8581764698028564} +07/25/2024 12:04:12 - INFO - __main__ - Step 8239: {'lr': 0.0004968617200247447, 'samples': 395472, 'steps': 8238, 'loss/train': 2.2748212814331055} +07/25/2024 12:04:12 - INFO - __main__ - Step 8240: {'lr': 0.0004968608890606837, 'samples': 395520, 'steps': 8239, 'loss/train': 1.6482348442077637} +07/25/2024 12:04:12 - INFO - __main__ - Step 8241: {'lr': 0.0004968600579873196, 'samples': 395568, 'steps': 8240, 'loss/train': 2.7250969409942627} +07/25/2024 12:04:13 - INFO - __main__ - Step 8242: {'lr': 0.0004968592268046526, 'samples': 395616, 'steps': 8241, 'loss/train': 2.753957509994507} +07/25/2024 12:04:13 - INFO - __main__ - Step 8243: {'lr': 0.0004968583955126836, 'samples': 395664, 'steps': 8242, 'loss/train': 2.3073790073394775} +07/25/2024 12:04:13 - INFO - __main__ - Step 8244: {'lr': 0.0004968575641114123, 'samples': 395712, 'steps': 8243, 'loss/train': 2.0567071437835693} +07/25/2024 12:04:13 - INFO - __main__ - Step 8245: {'lr': 0.0004968567326008395, 'samples': 395760, 'steps': 8244, 'loss/train': 2.614067792892456} +07/25/2024 12:04:14 - INFO - __main__ - Step 8246: {'lr': 0.0004968559009809654, 'samples': 395808, 'steps': 8245, 'loss/train': 1.6980628967285156} +07/25/2024 12:04:14 - INFO - __main__ - Step 8247: {'lr': 0.0004968550692517905, 'samples': 395856, 'steps': 8246, 'loss/train': 2.5259809494018555} +07/25/2024 12:04:14 - INFO - __main__ - Step 8248: {'lr': 0.0004968542374133152, 'samples': 395904, 'steps': 8247, 'loss/train': 1.9846323728561401} +07/25/2024 12:04:15 - INFO - __main__ - Step 8249: {'lr': 0.0004968534054655396, 'samples': 395952, 'steps': 8248, 'loss/train': 2.1652941703796387} +07/25/2024 12:04:15 - INFO - __main__ - Step 8250: {'lr': 0.0004968525734084643, 'samples': 396000, 'steps': 8249, 'loss/train': 2.617671489715576} +07/25/2024 12:04:15 - INFO - __main__ - Step 8251: {'lr': 0.0004968517412420895, 'samples': 396048, 'steps': 8250, 'loss/train': 0.5301505923271179} +07/25/2024 12:04:15 - INFO - __main__ - Step 8252: {'lr': 0.0004968509089664158, 'samples': 396096, 'steps': 8251, 'loss/train': 2.0174612998962402} +07/25/2024 12:04:16 - INFO - __main__ - Step 8253: {'lr': 0.0004968500765814433, 'samples': 396144, 'steps': 8252, 'loss/train': 1.9875237941741943} +07/25/2024 12:04:16 - INFO - __main__ - Step 8254: {'lr': 0.0004968492440871725, 'samples': 396192, 'steps': 8253, 'loss/train': 2.4469411373138428} +07/25/2024 12:04:16 - INFO - __main__ - Step 8255: {'lr': 0.0004968484114836038, 'samples': 396240, 'steps': 8254, 'loss/train': 1.995434284210205} +07/25/2024 12:04:16 - INFO - __main__ - Step 8256: {'lr': 0.0004968475787707376, 'samples': 396288, 'steps': 8255, 'loss/train': 2.3264222145080566} +07/25/2024 12:04:17 - INFO - __main__ - Step 8257: {'lr': 0.0004968467459485743, 'samples': 396336, 'steps': 8256, 'loss/train': 1.1856379508972168} +07/25/2024 12:04:17 - INFO - __main__ - Step 8258: {'lr': 0.000496845913017114, 'samples': 396384, 'steps': 8257, 'loss/train': 2.281003952026367} +07/25/2024 12:04:17 - INFO - __main__ - Step 8259: {'lr': 0.0004968450799763573, 'samples': 396432, 'steps': 8258, 'loss/train': 1.9685198068618774} +07/25/2024 12:04:18 - INFO - __main__ - Step 8260: {'lr': 0.0004968442468263045, 'samples': 396480, 'steps': 8259, 'loss/train': 2.1057686805725098} +07/25/2024 12:04:18 - INFO - __main__ - Step 8261: {'lr': 0.000496843413566956, 'samples': 396528, 'steps': 8260, 'loss/train': 1.8820228576660156} +07/25/2024 12:04:18 - INFO - __main__ - Step 8262: {'lr': 0.0004968425801983122, 'samples': 396576, 'steps': 8261, 'loss/train': 2.0685696601867676} +07/25/2024 12:04:18 - INFO - __main__ - Step 8263: {'lr': 0.0004968417467203734, 'samples': 396624, 'steps': 8262, 'loss/train': 2.33888840675354} +07/25/2024 12:04:19 - INFO - __main__ - Step 8264: {'lr': 0.00049684091313314, 'samples': 396672, 'steps': 8263, 'loss/train': 2.794583559036255} +07/25/2024 12:04:19 - INFO - __main__ - Step 8265: {'lr': 0.0004968400794366123, 'samples': 396720, 'steps': 8264, 'loss/train': 2.513441324234009} +07/25/2024 12:04:19 - INFO - __main__ - Step 8266: {'lr': 0.0004968392456307908, 'samples': 396768, 'steps': 8265, 'loss/train': 1.3598570823669434} +07/25/2024 12:04:20 - INFO - __main__ - Step 8267: {'lr': 0.0004968384117156756, 'samples': 396816, 'steps': 8266, 'loss/train': 2.6932528018951416} +07/25/2024 12:04:20 - INFO - __main__ - Step 8268: {'lr': 0.0004968375776912675, 'samples': 396864, 'steps': 8267, 'loss/train': 2.0200352668762207} +07/25/2024 12:04:20 - INFO - __main__ - Step 8269: {'lr': 0.0004968367435575666, 'samples': 396912, 'steps': 8268, 'loss/train': 2.17647647857666} +07/25/2024 12:04:20 - INFO - __main__ - Step 8270: {'lr': 0.0004968359093145732, 'samples': 396960, 'steps': 8269, 'loss/train': 1.476043462753296} +07/25/2024 12:04:21 - INFO - __main__ - Step 8271: {'lr': 0.0004968350749622878, 'samples': 397008, 'steps': 8270, 'loss/train': 2.489985942840576} +07/25/2024 12:04:21 - INFO - __main__ - Step 8272: {'lr': 0.000496834240500711, 'samples': 397056, 'steps': 8271, 'loss/train': 1.662456750869751} +07/25/2024 12:04:21 - INFO - __main__ - Step 8273: {'lr': 0.0004968334059298426, 'samples': 397104, 'steps': 8272, 'loss/train': 2.452561140060425} +07/25/2024 12:04:22 - INFO - __main__ - Step 8274: {'lr': 0.0004968325712496834, 'samples': 397152, 'steps': 8273, 'loss/train': 2.294750452041626} +07/25/2024 12:04:22 - INFO - __main__ - Step 8275: {'lr': 0.0004968317364602336, 'samples': 397200, 'steps': 8274, 'loss/train': 0.6908778548240662} +07/25/2024 12:04:22 - INFO - __main__ - Step 8276: {'lr': 0.0004968309015614937, 'samples': 397248, 'steps': 8275, 'loss/train': 1.7728370428085327} +07/25/2024 12:04:22 - INFO - __main__ - Step 8277: {'lr': 0.000496830066553464, 'samples': 397296, 'steps': 8276, 'loss/train': 1.6734033823013306} +07/25/2024 12:04:23 - INFO - __main__ - Step 8278: {'lr': 0.0004968292314361447, 'samples': 397344, 'steps': 8277, 'loss/train': 2.315056800842285} +07/25/2024 12:04:23 - INFO - __main__ - Step 8279: {'lr': 0.0004968283962095365, 'samples': 397392, 'steps': 8278, 'loss/train': 1.7217522859573364} +07/25/2024 12:04:23 - INFO - __main__ - Step 8280: {'lr': 0.0004968275608736396, 'samples': 397440, 'steps': 8279, 'loss/train': 2.307922124862671} +07/25/2024 12:04:24 - INFO - __main__ - Step 8281: {'lr': 0.0004968267254284543, 'samples': 397488, 'steps': 8280, 'loss/train': 1.2356027364730835} +07/25/2024 12:04:24 - INFO - __main__ - Step 8282: {'lr': 0.000496825889873981, 'samples': 397536, 'steps': 8281, 'loss/train': 2.063737392425537} +07/25/2024 12:04:24 - INFO - __main__ - Step 8283: {'lr': 0.0004968250542102202, 'samples': 397584, 'steps': 8282, 'loss/train': 1.9801263809204102} +07/25/2024 12:04:24 - INFO - __main__ - Step 8284: {'lr': 0.0004968242184371722, 'samples': 397632, 'steps': 8283, 'loss/train': 2.1211154460906982} +07/25/2024 12:04:25 - INFO - __main__ - Step 8285: {'lr': 0.0004968233825548373, 'samples': 397680, 'steps': 8284, 'loss/train': 1.9635528326034546} +07/25/2024 12:04:25 - INFO - __main__ - Step 8286: {'lr': 0.000496822546563216, 'samples': 397728, 'steps': 8285, 'loss/train': 2.202341318130493} +07/25/2024 12:04:25 - INFO - __main__ - Step 8287: {'lr': 0.0004968217104623085, 'samples': 397776, 'steps': 8286, 'loss/train': 2.3280017375946045} +07/25/2024 12:04:26 - INFO - __main__ - Step 8288: {'lr': 0.0004968208742521153, 'samples': 397824, 'steps': 8287, 'loss/train': 2.360443592071533} +07/25/2024 12:04:26 - INFO - __main__ - Step 8289: {'lr': 0.0004968200379326367, 'samples': 397872, 'steps': 8288, 'loss/train': 2.142441987991333} +07/25/2024 12:04:26 - INFO - __main__ - Step 8290: {'lr': 0.0004968192015038732, 'samples': 397920, 'steps': 8289, 'loss/train': 0.6730526685714722} +07/25/2024 12:04:26 - INFO - __main__ - Step 8291: {'lr': 0.0004968183649658249, 'samples': 397968, 'steps': 8290, 'loss/train': 2.1805572509765625} +07/25/2024 12:04:27 - INFO - __main__ - Step 8292: {'lr': 0.0004968175283184925, 'samples': 398016, 'steps': 8291, 'loss/train': 2.484246015548706} +07/25/2024 12:04:27 - INFO - __main__ - Step 8293: {'lr': 0.0004968166915618761, 'samples': 398064, 'steps': 8292, 'loss/train': 2.0730974674224854} +07/25/2024 12:04:27 - INFO - __main__ - Step 8294: {'lr': 0.0004968158546959763, 'samples': 398112, 'steps': 8293, 'loss/train': 1.7309337854385376} +07/25/2024 12:04:28 - INFO - __main__ - Step 8295: {'lr': 0.0004968150177207933, 'samples': 398160, 'steps': 8294, 'loss/train': 2.4978771209716797} +07/25/2024 12:04:28 - INFO - __main__ - Step 8296: {'lr': 0.0004968141806363274, 'samples': 398208, 'steps': 8295, 'loss/train': 2.17156982421875} +07/25/2024 12:04:28 - INFO - __main__ - Step 8297: {'lr': 0.0004968133434425793, 'samples': 398256, 'steps': 8296, 'loss/train': 1.940806269645691} +07/25/2024 12:04:28 - INFO - __main__ - Step 8298: {'lr': 0.0004968125061395491, 'samples': 398304, 'steps': 8297, 'loss/train': 2.1026511192321777} +07/25/2024 12:04:29 - INFO - __main__ - Step 8299: {'lr': 0.0004968116687272372, 'samples': 398352, 'steps': 8298, 'loss/train': 1.341143250465393} +07/25/2024 12:04:29 - INFO - __main__ - Step 8300: {'lr': 0.0004968108312056441, 'samples': 398400, 'steps': 8299, 'loss/train': 1.9161217212677002} +07/25/2024 12:04:29 - INFO - __main__ - Step 8301: {'lr': 0.00049680999357477, 'samples': 398448, 'steps': 8300, 'loss/train': 0.49789759516716003} +07/25/2024 12:04:30 - INFO - __main__ - Step 8302: {'lr': 0.0004968091558346154, 'samples': 398496, 'steps': 8301, 'loss/train': 2.0898659229278564} +07/25/2024 12:04:30 - INFO - __main__ - Step 8303: {'lr': 0.0004968083179851806, 'samples': 398544, 'steps': 8302, 'loss/train': 2.2966675758361816} +07/25/2024 12:04:30 - INFO - __main__ - Step 8304: {'lr': 0.0004968074800264659, 'samples': 398592, 'steps': 8303, 'loss/train': 2.048795461654663} +07/25/2024 12:04:30 - INFO - __main__ - Step 8305: {'lr': 0.0004968066419584719, 'samples': 398640, 'steps': 8304, 'loss/train': 1.3007279634475708} +07/25/2024 12:04:31 - INFO - __main__ - Step 8306: {'lr': 0.0004968058037811987, 'samples': 398688, 'steps': 8305, 'loss/train': 2.482872247695923} +07/25/2024 12:04:31 - INFO - __main__ - Step 8307: {'lr': 0.0004968049654946469, 'samples': 398736, 'steps': 8306, 'loss/train': 2.067812442779541} +07/25/2024 12:04:31 - INFO - __main__ - Step 8308: {'lr': 0.0004968041270988168, 'samples': 398784, 'steps': 8307, 'loss/train': 2.2661898136138916} +07/25/2024 12:04:32 - INFO - __main__ - Step 8309: {'lr': 0.0004968032885937087, 'samples': 398832, 'steps': 8308, 'loss/train': 1.3497804403305054} +07/25/2024 12:04:32 - INFO - __main__ - Step 8310: {'lr': 0.000496802449979323, 'samples': 398880, 'steps': 8309, 'loss/train': 1.7125269174575806} +07/25/2024 12:04:32 - INFO - __main__ - Step 8311: {'lr': 0.0004968016112556602, 'samples': 398928, 'steps': 8310, 'loss/train': 3.2380552291870117} +07/25/2024 12:04:32 - INFO - __main__ - Step 8312: {'lr': 0.0004968007724227203, 'samples': 398976, 'steps': 8311, 'loss/train': 1.939308524131775} +07/25/2024 12:04:33 - INFO - __main__ - Step 8313: {'lr': 0.0004967999334805042, 'samples': 399024, 'steps': 8312, 'loss/train': 2.8519065380096436} +07/25/2024 12:04:33 - INFO - __main__ - Step 8314: {'lr': 0.0004967990944290119, 'samples': 399072, 'steps': 8313, 'loss/train': 1.1408121585845947} +07/25/2024 12:04:33 - INFO - __main__ - Step 8315: {'lr': 0.0004967982552682439, 'samples': 399120, 'steps': 8314, 'loss/train': 1.954535722732544} +07/25/2024 12:04:34 - INFO - __main__ - Step 8316: {'lr': 0.0004967974159982005, 'samples': 399168, 'steps': 8315, 'loss/train': 2.3579812049865723} +07/25/2024 12:04:34 - INFO - __main__ - Step 8317: {'lr': 0.0004967965766188822, 'samples': 399216, 'steps': 8316, 'loss/train': 1.523182988166809} +07/25/2024 12:04:34 - INFO - __main__ - Step 8318: {'lr': 0.0004967957371302893, 'samples': 399264, 'steps': 8317, 'loss/train': 2.3052568435668945} +07/25/2024 12:04:34 - INFO - __main__ - Step 8319: {'lr': 0.000496794897532422, 'samples': 399312, 'steps': 8318, 'loss/train': 1.7360190153121948} +07/25/2024 12:04:35 - INFO - __main__ - Step 8320: {'lr': 0.0004967940578252811, 'samples': 399360, 'steps': 8319, 'loss/train': 1.9671131372451782} +07/25/2024 12:04:35 - INFO - __main__ - Step 8321: {'lr': 0.0004967932180088664, 'samples': 399408, 'steps': 8320, 'loss/train': 2.182501792907715} +07/25/2024 12:04:35 - INFO - __main__ - Step 8322: {'lr': 0.0004967923780831788, 'samples': 399456, 'steps': 8321, 'loss/train': 2.0856287479400635} +07/25/2024 12:04:36 - INFO - __main__ - Step 8323: {'lr': 0.0004967915380482184, 'samples': 399504, 'steps': 8322, 'loss/train': 2.0741231441497803} +07/25/2024 12:04:36 - INFO - __main__ - Step 8324: {'lr': 0.0004967906979039856, 'samples': 399552, 'steps': 8323, 'loss/train': 1.8731149435043335} +07/25/2024 12:04:36 - INFO - __main__ - Step 8325: {'lr': 0.0004967898576504808, 'samples': 399600, 'steps': 8324, 'loss/train': 1.7421393394470215} +07/25/2024 12:04:36 - INFO - __main__ - Step 8326: {'lr': 0.0004967890172877043, 'samples': 399648, 'steps': 8325, 'loss/train': 2.249288320541382} +07/25/2024 12:04:37 - INFO - __main__ - Step 8327: {'lr': 0.0004967881768156567, 'samples': 399696, 'steps': 8326, 'loss/train': 1.851499319076538} +07/25/2024 12:04:37 - INFO - __main__ - Step 8328: {'lr': 0.0004967873362343381, 'samples': 399744, 'steps': 8327, 'loss/train': 2.1938259601593018} +07/25/2024 12:04:37 - INFO - __main__ - Step 8329: {'lr': 0.000496786495543749, 'samples': 399792, 'steps': 8328, 'loss/train': 1.346934199333191} +07/25/2024 12:04:38 - INFO - __main__ - Step 8330: {'lr': 0.0004967856547438897, 'samples': 399840, 'steps': 8329, 'loss/train': 1.7710447311401367} +07/25/2024 12:04:38 - INFO - __main__ - Step 8331: {'lr': 0.0004967848138347607, 'samples': 399888, 'steps': 8330, 'loss/train': 1.9034605026245117} +07/25/2024 12:04:38 - INFO - __main__ - Step 8332: {'lr': 0.0004967839728163622, 'samples': 399936, 'steps': 8331, 'loss/train': 2.1207802295684814} +07/25/2024 12:04:38 - INFO - __main__ - Step 8333: {'lr': 0.0004967831316886948, 'samples': 399984, 'steps': 8332, 'loss/train': 1.762438416481018} +07/25/2024 12:04:39 - INFO - __main__ - Step 8334: {'lr': 0.0004967822904517588, 'samples': 400032, 'steps': 8333, 'loss/train': 2.2017102241516113} +07/25/2024 12:04:39 - INFO - __main__ - Step 8335: {'lr': 0.0004967814491055544, 'samples': 400080, 'steps': 8334, 'loss/train': 4.037234783172607} +07/25/2024 12:04:39 - INFO - __main__ - Step 8336: {'lr': 0.000496780607650082, 'samples': 400128, 'steps': 8335, 'loss/train': 2.0743443965911865} +07/25/2024 12:04:40 - INFO - __main__ - Step 8337: {'lr': 0.0004967797660853421, 'samples': 400176, 'steps': 8336, 'loss/train': 2.488917112350464} +07/25/2024 12:04:40 - INFO - __main__ - Step 8338: {'lr': 0.0004967789244113352, 'samples': 400224, 'steps': 8337, 'loss/train': 2.07915997505188} +07/25/2024 12:04:40 - INFO - __main__ - Step 8339: {'lr': 0.0004967780826280613, 'samples': 400272, 'steps': 8338, 'loss/train': 1.745278239250183} +07/25/2024 12:04:40 - INFO - __main__ - Step 8340: {'lr': 0.0004967772407355211, 'samples': 400320, 'steps': 8339, 'loss/train': 2.1547815799713135} +07/25/2024 12:04:41 - INFO - __main__ - Step 8341: {'lr': 0.0004967763987337149, 'samples': 400368, 'steps': 8340, 'loss/train': 2.07041335105896} +07/25/2024 12:04:41 - INFO - __main__ - Step 8342: {'lr': 0.0004967755566226429, 'samples': 400416, 'steps': 8341, 'loss/train': 1.7064441442489624} +07/25/2024 12:04:41 - INFO - __main__ - Step 8343: {'lr': 0.0004967747144023057, 'samples': 400464, 'steps': 8342, 'loss/train': 2.29443621635437} +07/25/2024 12:04:41 - INFO - __main__ - Step 8344: {'lr': 0.0004967738720727035, 'samples': 400512, 'steps': 8343, 'loss/train': 2.187941789627075} +07/25/2024 12:04:42 - INFO - __main__ - Step 8345: {'lr': 0.0004967730296338368, 'samples': 400560, 'steps': 8344, 'loss/train': 1.7529391050338745} +07/25/2024 12:04:42 - INFO - __main__ - Step 8346: {'lr': 0.0004967721870857059, 'samples': 400608, 'steps': 8345, 'loss/train': 1.9602110385894775} +07/25/2024 12:04:42 - INFO - __main__ - Step 8347: {'lr': 0.0004967713444283112, 'samples': 400656, 'steps': 8346, 'loss/train': 2.0630900859832764} +07/25/2024 12:04:43 - INFO - __main__ - Step 8348: {'lr': 0.000496770501661653, 'samples': 400704, 'steps': 8347, 'loss/train': 1.5371383428573608} +07/25/2024 12:04:43 - INFO - __main__ - Step 8349: {'lr': 0.0004967696587857318, 'samples': 400752, 'steps': 8348, 'loss/train': 0.6394417881965637} +07/25/2024 12:04:43 - INFO - __main__ - Step 8350: {'lr': 0.0004967688158005479, 'samples': 400800, 'steps': 8349, 'loss/train': 1.7363346815109253} +07/25/2024 12:04:43 - INFO - __main__ - Step 8351: {'lr': 0.0004967679727061016, 'samples': 400848, 'steps': 8350, 'loss/train': 1.4696986675262451} +07/25/2024 12:04:44 - INFO - __main__ - Step 8352: {'lr': 0.0004967671295023935, 'samples': 400896, 'steps': 8351, 'loss/train': 2.354426860809326} +07/25/2024 12:04:44 - INFO - __main__ - Step 8353: {'lr': 0.0004967662861894237, 'samples': 400944, 'steps': 8352, 'loss/train': 2.107466697692871} +07/25/2024 12:04:44 - INFO - __main__ - Step 8354: {'lr': 0.0004967654427671927, 'samples': 400992, 'steps': 8353, 'loss/train': 2.4284183979034424} +07/25/2024 12:04:45 - INFO - __main__ - Step 8355: {'lr': 0.0004967645992357009, 'samples': 401040, 'steps': 8354, 'loss/train': 1.6997408866882324} +07/25/2024 12:04:45 - INFO - __main__ - Step 8356: {'lr': 0.0004967637555949487, 'samples': 401088, 'steps': 8355, 'loss/train': 1.8568581342697144} +07/25/2024 12:04:45 - INFO - __main__ - Step 8357: {'lr': 0.0004967629118449365, 'samples': 401136, 'steps': 8356, 'loss/train': 1.799271821975708} +07/25/2024 12:04:45 - INFO - __main__ - Step 8358: {'lr': 0.0004967620679856645, 'samples': 401184, 'steps': 8357, 'loss/train': 1.841350793838501} +07/25/2024 12:04:46 - INFO - __main__ - Step 8359: {'lr': 0.0004967612240171332, 'samples': 401232, 'steps': 8358, 'loss/train': 1.8457728624343872} +07/25/2024 12:04:46 - INFO - __main__ - Step 8360: {'lr': 0.0004967603799393429, 'samples': 401280, 'steps': 8359, 'loss/train': 2.318779945373535} +07/25/2024 12:04:46 - INFO - __main__ - Step 8361: {'lr': 0.000496759535752294, 'samples': 401328, 'steps': 8360, 'loss/train': 1.4890005588531494} +07/25/2024 12:04:47 - INFO - __main__ - Step 8362: {'lr': 0.0004967586914559869, 'samples': 401376, 'steps': 8361, 'loss/train': 1.7536842823028564} +07/25/2024 12:04:47 - INFO - __main__ - Step 8363: {'lr': 0.000496757847050422, 'samples': 401424, 'steps': 8362, 'loss/train': 2.573963165283203} +07/25/2024 12:04:47 - INFO - __main__ - Step 8364: {'lr': 0.0004967570025355997, 'samples': 401472, 'steps': 8363, 'loss/train': 2.3232553005218506} +07/25/2024 12:04:47 - INFO - __main__ - Step 8365: {'lr': 0.0004967561579115202, 'samples': 401520, 'steps': 8364, 'loss/train': 2.041369676589966} +07/25/2024 12:04:48 - INFO - __main__ - Step 8366: {'lr': 0.000496755313178184, 'samples': 401568, 'steps': 8365, 'loss/train': 2.006737232208252} +07/25/2024 12:04:48 - INFO - __main__ - Step 8367: {'lr': 0.0004967544683355915, 'samples': 401616, 'steps': 8366, 'loss/train': 2.153614044189453} +07/25/2024 12:04:48 - INFO - __main__ - Step 8368: {'lr': 0.000496753623383743, 'samples': 401664, 'steps': 8367, 'loss/train': 1.9987822771072388} +07/25/2024 12:04:49 - INFO - __main__ - Step 8369: {'lr': 0.000496752778322639, 'samples': 401712, 'steps': 8368, 'loss/train': 2.407287120819092} +07/25/2024 12:04:49 - INFO - __main__ - Step 8370: {'lr': 0.0004967519331522798, 'samples': 401760, 'steps': 8369, 'loss/train': 2.527500867843628} +07/25/2024 12:04:49 - INFO - __main__ - Step 8371: {'lr': 0.0004967510878726656, 'samples': 401808, 'steps': 8370, 'loss/train': 0.6783971786499023} +07/25/2024 12:04:49 - INFO - __main__ - Step 8372: {'lr': 0.0004967502424837969, 'samples': 401856, 'steps': 8371, 'loss/train': 2.1486706733703613} +07/25/2024 12:04:50 - INFO - __main__ - Step 8373: {'lr': 0.0004967493969856744, 'samples': 401904, 'steps': 8372, 'loss/train': 0.6951618194580078} +07/25/2024 12:04:50 - INFO - __main__ - Step 8374: {'lr': 0.0004967485513782979, 'samples': 401952, 'steps': 8373, 'loss/train': 2.567842721939087} +07/25/2024 12:04:50 - INFO - __main__ - Step 8375: {'lr': 0.0004967477056616682, 'samples': 402000, 'steps': 8374, 'loss/train': 2.324998378753662} +07/25/2024 12:04:51 - INFO - __main__ - Step 8376: {'lr': 0.0004967468598357854, 'samples': 402048, 'steps': 8375, 'loss/train': 2.156447172164917} +07/25/2024 12:04:51 - INFO - __main__ - Step 8377: {'lr': 0.0004967460139006501, 'samples': 402096, 'steps': 8376, 'loss/train': 1.9892433881759644} +07/25/2024 12:04:51 - INFO - __main__ - Step 8378: {'lr': 0.0004967451678562626, 'samples': 402144, 'steps': 8377, 'loss/train': 2.3694393634796143} +07/25/2024 12:04:51 - INFO - __main__ - Step 8379: {'lr': 0.0004967443217026232, 'samples': 402192, 'steps': 8378, 'loss/train': 1.602359652519226} +07/25/2024 12:04:52 - INFO - __main__ - Step 8380: {'lr': 0.0004967434754397324, 'samples': 402240, 'steps': 8379, 'loss/train': 2.104947328567505} +07/25/2024 12:04:52 - INFO - __main__ - Step 8381: {'lr': 0.0004967426290675903, 'samples': 402288, 'steps': 8380, 'loss/train': 1.4752848148345947} +07/25/2024 12:04:52 - INFO - __main__ - Step 8382: {'lr': 0.0004967417825861978, 'samples': 402336, 'steps': 8381, 'loss/train': 1.7628891468048096} +07/25/2024 12:04:53 - INFO - __main__ - Step 8383: {'lr': 0.0004967409359955547, 'samples': 402384, 'steps': 8382, 'loss/train': 2.098215341567993} +07/25/2024 12:04:53 - INFO - __main__ - Step 8384: {'lr': 0.0004967400892956618, 'samples': 402432, 'steps': 8383, 'loss/train': 1.9974595308303833} +07/25/2024 12:04:53 - INFO - __main__ - Step 8385: {'lr': 0.0004967392424865191, 'samples': 402480, 'steps': 8384, 'loss/train': 2.3658339977264404} +07/25/2024 12:04:53 - INFO - __main__ - Step 8386: {'lr': 0.0004967383955681273, 'samples': 402528, 'steps': 8385, 'loss/train': 2.080949306488037} +07/25/2024 12:04:54 - INFO - __main__ - Step 8387: {'lr': 0.0004967375485404867, 'samples': 402576, 'steps': 8386, 'loss/train': 2.001070022583008} +07/25/2024 12:04:54 - INFO - __main__ - Step 8388: {'lr': 0.0004967367014035975, 'samples': 402624, 'steps': 8387, 'loss/train': 1.8898266553878784} +07/25/2024 12:04:54 - INFO - __main__ - Step 8389: {'lr': 0.0004967358541574604, 'samples': 402672, 'steps': 8388, 'loss/train': 0.8993946313858032} +07/25/2024 12:04:55 - INFO - __main__ - Step 8390: {'lr': 0.0004967350068020754, 'samples': 402720, 'steps': 8389, 'loss/train': 1.5271477699279785} +07/25/2024 12:04:55 - INFO - __main__ - Step 8391: {'lr': 0.0004967341593374431, 'samples': 402768, 'steps': 8390, 'loss/train': 2.0006906986236572} +07/25/2024 12:04:55 - INFO - __main__ - Step 8392: {'lr': 0.0004967333117635638, 'samples': 402816, 'steps': 8391, 'loss/train': 1.9920809268951416} +07/25/2024 12:04:55 - INFO - __main__ - Step 8393: {'lr': 0.000496732464080438, 'samples': 402864, 'steps': 8392, 'loss/train': 1.4501296281814575} +07/25/2024 12:04:56 - INFO - __main__ - Step 8394: {'lr': 0.0004967316162880659, 'samples': 402912, 'steps': 8393, 'loss/train': 2.2589361667633057} +07/25/2024 12:04:56 - INFO - __main__ - Step 8395: {'lr': 0.0004967307683864479, 'samples': 402960, 'steps': 8394, 'loss/train': 2.287384033203125} +07/25/2024 12:04:56 - INFO - __main__ - Step 8396: {'lr': 0.0004967299203755845, 'samples': 403008, 'steps': 8395, 'loss/train': 1.7515209913253784} +07/25/2024 12:04:57 - INFO - __main__ - Step 8397: {'lr': 0.0004967290722554759, 'samples': 403056, 'steps': 8396, 'loss/train': 0.5446632504463196} +07/25/2024 12:04:57 - INFO - __main__ - Step 8398: {'lr': 0.0004967282240261227, 'samples': 403104, 'steps': 8397, 'loss/train': 2.080981969833374} +07/25/2024 12:04:57 - INFO - __main__ - Step 8399: {'lr': 0.0004967273756875251, 'samples': 403152, 'steps': 8398, 'loss/train': 1.3432313203811646} +07/25/2024 12:04:57 - INFO - __main__ - Step 8400: {'lr': 0.0004967265272396836, 'samples': 403200, 'steps': 8399, 'loss/train': 1.977624535560608} +07/25/2024 12:04:58 - INFO - __main__ - Step 8401: {'lr': 0.0004967256786825985, 'samples': 403248, 'steps': 8400, 'loss/train': 1.661677598953247} +07/25/2024 12:04:58 - INFO - __main__ - Step 8402: {'lr': 0.0004967248300162702, 'samples': 403296, 'steps': 8401, 'loss/train': 2.244856357574463} +07/25/2024 12:04:58 - INFO - __main__ - Step 8403: {'lr': 0.000496723981240699, 'samples': 403344, 'steps': 8402, 'loss/train': 1.979982852935791} +07/25/2024 12:04:59 - INFO - __main__ - Step 8404: {'lr': 0.0004967231323558853, 'samples': 403392, 'steps': 8403, 'loss/train': 0.9412580728530884} +07/25/2024 12:04:59 - INFO - __main__ - Step 8405: {'lr': 0.0004967222833618296, 'samples': 403440, 'steps': 8404, 'loss/train': 1.702345371246338} +07/25/2024 12:04:59 - INFO - __main__ - Step 8406: {'lr': 0.0004967214342585321, 'samples': 403488, 'steps': 8405, 'loss/train': 2.486311912536621} +07/25/2024 12:04:59 - INFO - __main__ - Step 8407: {'lr': 0.0004967205850459933, 'samples': 403536, 'steps': 8406, 'loss/train': 1.764628291130066} +07/25/2024 12:05:00 - INFO - __main__ - Step 8408: {'lr': 0.0004967197357242135, 'samples': 403584, 'steps': 8407, 'loss/train': 2.017430067062378} +07/25/2024 12:05:00 - INFO - __main__ - Step 8409: {'lr': 0.0004967188862931932, 'samples': 403632, 'steps': 8408, 'loss/train': 2.0261712074279785} +07/25/2024 12:05:00 - INFO - __main__ - Step 8410: {'lr': 0.0004967180367529326, 'samples': 403680, 'steps': 8409, 'loss/train': 2.1123998165130615} +07/25/2024 12:05:01 - INFO - __main__ - Step 8411: {'lr': 0.0004967171871034322, 'samples': 403728, 'steps': 8410, 'loss/train': 1.92788565158844} +07/25/2024 12:05:01 - INFO - __main__ - Step 8412: {'lr': 0.0004967163373446923, 'samples': 403776, 'steps': 8411, 'loss/train': 2.2937216758728027} +07/25/2024 12:05:01 - INFO - __main__ - Step 8413: {'lr': 0.0004967154874767135, 'samples': 403824, 'steps': 8412, 'loss/train': 0.2535994052886963} +07/25/2024 12:05:01 - INFO - __main__ - Step 8414: {'lr': 0.0004967146374994959, 'samples': 403872, 'steps': 8413, 'loss/train': 2.4395432472229004} +07/25/2024 12:05:02 - INFO - __main__ - Step 8415: {'lr': 0.0004967137874130398, 'samples': 403920, 'steps': 8414, 'loss/train': 2.036571741104126} +07/25/2024 12:05:02 - INFO - __main__ - Step 8416: {'lr': 0.0004967129372173459, 'samples': 403968, 'steps': 8415, 'loss/train': 2.199547529220581} +07/25/2024 12:05:02 - INFO - __main__ - Step 8417: {'lr': 0.0004967120869124144, 'samples': 404016, 'steps': 8416, 'loss/train': 0.8892855048179626} +07/25/2024 12:05:03 - INFO - __main__ - Step 8418: {'lr': 0.0004967112364982458, 'samples': 404064, 'steps': 8417, 'loss/train': 2.1930439472198486} +07/25/2024 12:05:03 - INFO - __main__ - Step 8419: {'lr': 0.0004967103859748402, 'samples': 404112, 'steps': 8418, 'loss/train': 2.3112940788269043} +07/25/2024 12:05:03 - INFO - __main__ - Step 8420: {'lr': 0.0004967095353421983, 'samples': 404160, 'steps': 8419, 'loss/train': 1.958461880683899} +07/25/2024 12:05:03 - INFO - __main__ - Step 8421: {'lr': 0.0004967086846003202, 'samples': 404208, 'steps': 8420, 'loss/train': 0.49494293332099915} +07/25/2024 12:05:04 - INFO - __main__ - Step 8422: {'lr': 0.0004967078337492065, 'samples': 404256, 'steps': 8421, 'loss/train': 2.160832643508911} +07/25/2024 12:05:04 - INFO - __main__ - Step 8423: {'lr': 0.0004967069827888575, 'samples': 404304, 'steps': 8422, 'loss/train': 1.4782601594924927} +07/25/2024 12:05:04 - INFO - __main__ - Step 8424: {'lr': 0.0004967061317192736, 'samples': 404352, 'steps': 8423, 'loss/train': 1.9400023221969604} +07/25/2024 12:05:04 - INFO - __main__ - Step 8425: {'lr': 0.000496705280540455, 'samples': 404400, 'steps': 8424, 'loss/train': 2.411261558532715} +07/25/2024 12:05:05 - INFO - __main__ - Step 8426: {'lr': 0.0004967044292524022, 'samples': 404448, 'steps': 8425, 'loss/train': 0.8961418867111206} +07/25/2024 12:05:05 - INFO - __main__ - Step 8427: {'lr': 0.0004967035778551157, 'samples': 404496, 'steps': 8426, 'loss/train': 2.0237748622894287} +07/25/2024 12:05:05 - INFO - __main__ - Step 8428: {'lr': 0.0004967027263485958, 'samples': 404544, 'steps': 8427, 'loss/train': 1.9895825386047363} +07/25/2024 12:05:06 - INFO - __main__ - Step 8429: {'lr': 0.0004967018747328428, 'samples': 404592, 'steps': 8428, 'loss/train': 1.585852026939392} +07/25/2024 12:05:06 - INFO - __main__ - Step 8430: {'lr': 0.000496701023007857, 'samples': 404640, 'steps': 8429, 'loss/train': 1.57892906665802} +07/25/2024 12:05:06 - INFO - __main__ - Step 8431: {'lr': 0.000496700171173639, 'samples': 404688, 'steps': 8430, 'loss/train': 2.2921864986419678} +07/25/2024 12:05:06 - INFO - __main__ - Step 8432: {'lr': 0.0004966993192301891, 'samples': 404736, 'steps': 8431, 'loss/train': 1.6673253774642944} +07/25/2024 12:05:07 - INFO - __main__ - Step 8433: {'lr': 0.0004966984671775077, 'samples': 404784, 'steps': 8432, 'loss/train': 2.0824756622314453} +07/25/2024 12:05:07 - INFO - __main__ - Step 8434: {'lr': 0.000496697615015595, 'samples': 404832, 'steps': 8433, 'loss/train': 1.824018120765686} +07/25/2024 12:05:07 - INFO - __main__ - Step 8435: {'lr': 0.0004966967627444516, 'samples': 404880, 'steps': 8434, 'loss/train': 2.3631255626678467} +07/25/2024 12:05:08 - INFO - __main__ - Step 8436: {'lr': 0.0004966959103640778, 'samples': 404928, 'steps': 8435, 'loss/train': 1.9678412675857544} +07/25/2024 12:05:08 - INFO - __main__ - Step 8437: {'lr': 0.0004966950578744739, 'samples': 404976, 'steps': 8436, 'loss/train': 0.19747687876224518} +07/25/2024 12:05:08 - INFO - __main__ - Step 8438: {'lr': 0.0004966942052756403, 'samples': 405024, 'steps': 8437, 'loss/train': 2.589932441711426} +07/25/2024 12:05:08 - INFO - __main__ - Step 8439: {'lr': 0.0004966933525675775, 'samples': 405072, 'steps': 8438, 'loss/train': 1.5164384841918945} +07/25/2024 12:05:09 - INFO - __main__ - Step 8440: {'lr': 0.0004966924997502858, 'samples': 405120, 'steps': 8439, 'loss/train': 2.334099769592285} +07/25/2024 12:05:09 - INFO - __main__ - Step 8441: {'lr': 0.0004966916468237655, 'samples': 405168, 'steps': 8440, 'loss/train': 1.8622548580169678} +07/25/2024 12:05:09 - INFO - __main__ - Step 8442: {'lr': 0.0004966907937880171, 'samples': 405216, 'steps': 8441, 'loss/train': 2.138606309890747} +07/25/2024 12:05:10 - INFO - __main__ - Step 8443: {'lr': 0.0004966899406430409, 'samples': 405264, 'steps': 8442, 'loss/train': 2.2538561820983887} +07/25/2024 12:05:10 - INFO - __main__ - Step 8444: {'lr': 0.0004966890873888374, 'samples': 405312, 'steps': 8443, 'loss/train': 1.9500033855438232} +07/25/2024 12:05:10 - INFO - __main__ - Step 8445: {'lr': 0.0004966882340254068, 'samples': 405360, 'steps': 8444, 'loss/train': 0.44776174426078796} +07/25/2024 12:05:10 - INFO - __main__ - Step 8446: {'lr': 0.0004966873805527496, 'samples': 405408, 'steps': 8445, 'loss/train': 1.3445910215377808} +07/25/2024 12:05:11 - INFO - __main__ - Step 8447: {'lr': 0.0004966865269708661, 'samples': 405456, 'steps': 8446, 'loss/train': 2.225375175476074} +07/25/2024 12:05:11 - INFO - __main__ - Step 8448: {'lr': 0.0004966856732797568, 'samples': 405504, 'steps': 8447, 'loss/train': 2.1322519779205322} +07/25/2024 12:05:11 - INFO - __main__ - Step 8449: {'lr': 0.0004966848194794219, 'samples': 405552, 'steps': 8448, 'loss/train': 2.440295457839966} +07/25/2024 12:05:12 - INFO - __main__ - Step 8450: {'lr': 0.0004966839655698618, 'samples': 405600, 'steps': 8449, 'loss/train': 1.0798312425613403} +07/25/2024 12:05:12 - INFO - __main__ - Step 8451: {'lr': 0.0004966831115510771, 'samples': 405648, 'steps': 8450, 'loss/train': 2.181558847427368} +07/25/2024 12:05:12 - INFO - __main__ - Step 8452: {'lr': 0.000496682257423068, 'samples': 405696, 'steps': 8451, 'loss/train': 1.9079111814498901} +07/25/2024 12:05:12 - INFO - __main__ - Step 8453: {'lr': 0.000496681403185835, 'samples': 405744, 'steps': 8452, 'loss/train': 1.7292563915252686} +07/25/2024 12:05:13 - INFO - __main__ - Step 8454: {'lr': 0.0004966805488393782, 'samples': 405792, 'steps': 8453, 'loss/train': 1.6332018375396729} +07/25/2024 12:05:13 - INFO - __main__ - Step 8455: {'lr': 0.0004966796943836983, 'samples': 405840, 'steps': 8454, 'loss/train': 1.3411757946014404} +07/25/2024 12:05:13 - INFO - __main__ - Step 8456: {'lr': 0.0004966788398187954, 'samples': 405888, 'steps': 8455, 'loss/train': 2.120298385620117} +07/25/2024 12:05:14 - INFO - __main__ - Step 8457: {'lr': 0.0004966779851446702, 'samples': 405936, 'steps': 8456, 'loss/train': 2.32818865776062} +07/25/2024 12:05:14 - INFO - __main__ - Step 8458: {'lr': 0.0004966771303613228, 'samples': 405984, 'steps': 8457, 'loss/train': 2.2426412105560303} +07/25/2024 12:05:14 - INFO - __main__ - Step 8459: {'lr': 0.0004966762754687538, 'samples': 406032, 'steps': 8458, 'loss/train': 2.3285887241363525} +07/25/2024 12:05:14 - INFO - __main__ - Step 8460: {'lr': 0.0004966754204669632, 'samples': 406080, 'steps': 8459, 'loss/train': 2.0248820781707764} +07/25/2024 12:05:15 - INFO - __main__ - Step 8461: {'lr': 0.0004966745653559518, 'samples': 406128, 'steps': 8460, 'loss/train': 0.21014907956123352} +07/25/2024 12:05:15 - INFO - __main__ - Step 8462: {'lr': 0.0004966737101357199, 'samples': 406176, 'steps': 8461, 'loss/train': 1.6247793436050415} +07/25/2024 12:05:15 - INFO - __main__ - Step 8463: {'lr': 0.0004966728548062676, 'samples': 406224, 'steps': 8462, 'loss/train': 1.1765234470367432} +07/25/2024 12:05:16 - INFO - __main__ - Step 8464: {'lr': 0.0004966719993675956, 'samples': 406272, 'steps': 8463, 'loss/train': 1.8142082691192627} +07/25/2024 12:05:16 - INFO - __main__ - Step 8465: {'lr': 0.0004966711438197042, 'samples': 406320, 'steps': 8464, 'loss/train': 2.042073965072632} +07/25/2024 12:05:16 - INFO - __main__ - Step 8466: {'lr': 0.0004966702881625936, 'samples': 406368, 'steps': 8465, 'loss/train': 1.931062936782837} +07/25/2024 12:05:16 - INFO - __main__ - Step 8467: {'lr': 0.0004966694323962643, 'samples': 406416, 'steps': 8466, 'loss/train': 2.084097385406494} +07/25/2024 12:05:17 - INFO - __main__ - Step 8468: {'lr': 0.0004966685765207167, 'samples': 406464, 'steps': 8467, 'loss/train': 1.99198579788208} +07/25/2024 12:05:17 - INFO - __main__ - Step 8469: {'lr': 0.0004966677205359512, 'samples': 406512, 'steps': 8468, 'loss/train': 0.3934519588947296} +07/25/2024 12:05:17 - INFO - __main__ - Step 8470: {'lr': 0.0004966668644419682, 'samples': 406560, 'steps': 8469, 'loss/train': 1.7731597423553467} +07/25/2024 12:05:18 - INFO - __main__ - Step 8471: {'lr': 0.000496666008238768, 'samples': 406608, 'steps': 8470, 'loss/train': 2.4420716762542725} +07/25/2024 12:05:18 - INFO - __main__ - Step 8472: {'lr': 0.0004966651519263509, 'samples': 406656, 'steps': 8471, 'loss/train': 2.1180365085601807} +07/25/2024 12:05:18 - INFO - __main__ - Step 8473: {'lr': 0.0004966642955047176, 'samples': 406704, 'steps': 8472, 'loss/train': 2.1529531478881836} +07/25/2024 12:05:18 - INFO - __main__ - Step 8474: {'lr': 0.000496663438973868, 'samples': 406752, 'steps': 8473, 'loss/train': 1.0920677185058594} +07/25/2024 12:05:19 - INFO - __main__ - Step 8475: {'lr': 0.0004966625823338029, 'samples': 406800, 'steps': 8474, 'loss/train': 2.0133180618286133} +07/25/2024 12:05:19 - INFO - __main__ - Step 8476: {'lr': 0.0004966617255845225, 'samples': 406848, 'steps': 8475, 'loss/train': 1.8177666664123535} +07/25/2024 12:05:19 - INFO - __main__ - Step 8477: {'lr': 0.0004966608687260272, 'samples': 406896, 'steps': 8476, 'loss/train': 1.6416386365890503} +07/25/2024 12:05:20 - INFO - __main__ - Step 8478: {'lr': 0.0004966600117583173, 'samples': 406944, 'steps': 8477, 'loss/train': 2.1936848163604736} +07/25/2024 12:05:20 - INFO - __main__ - Step 8479: {'lr': 0.0004966591546813933, 'samples': 406992, 'steps': 8478, 'loss/train': 2.139564275741577} +07/25/2024 12:05:20 - INFO - __main__ - Step 8480: {'lr': 0.0004966582974952556, 'samples': 407040, 'steps': 8479, 'loss/train': 5.451416969299316} +07/25/2024 12:05:20 - INFO - __main__ - Step 8481: {'lr': 0.0004966574401999045, 'samples': 407088, 'steps': 8480, 'loss/train': 2.056274890899658} +07/25/2024 12:05:21 - INFO - __main__ - Step 8482: {'lr': 0.0004966565827953404, 'samples': 407136, 'steps': 8481, 'loss/train': 2.1908936500549316} +07/25/2024 12:05:21 - INFO - __main__ - Step 8483: {'lr': 0.0004966557252815636, 'samples': 407184, 'steps': 8482, 'loss/train': 1.8485909700393677} +07/25/2024 12:05:21 - INFO - __main__ - Step 8484: {'lr': 0.0004966548676585746, 'samples': 407232, 'steps': 8483, 'loss/train': 0.8318679928779602} +07/25/2024 12:05:22 - INFO - __main__ - Step 8485: {'lr': 0.0004966540099263738, 'samples': 407280, 'steps': 8484, 'loss/train': 0.2326323688030243} +07/25/2024 12:05:22 - INFO - __main__ - Step 8486: {'lr': 0.0004966531520849614, 'samples': 407328, 'steps': 8485, 'loss/train': 1.851102590560913} +07/25/2024 12:05:22 - INFO - __main__ - Step 8487: {'lr': 0.000496652294134338, 'samples': 407376, 'steps': 8486, 'loss/train': 1.4471861124038696} +07/25/2024 12:05:22 - INFO - __main__ - Step 8488: {'lr': 0.0004966514360745037, 'samples': 407424, 'steps': 8487, 'loss/train': 2.338290214538574} +07/25/2024 12:05:23 - INFO - __main__ - Step 8489: {'lr': 0.0004966505779054592, 'samples': 407472, 'steps': 8488, 'loss/train': 2.5794289112091064} +07/25/2024 12:05:23 - INFO - __main__ - Step 8490: {'lr': 0.0004966497196272048, 'samples': 407520, 'steps': 8489, 'loss/train': 1.9038928747177124} +07/25/2024 12:05:23 - INFO - __main__ - Step 8491: {'lr': 0.0004966488612397407, 'samples': 407568, 'steps': 8490, 'loss/train': 2.0383238792419434} +07/25/2024 12:05:23 - INFO - __main__ - Step 8492: {'lr': 0.0004966480027430674, 'samples': 407616, 'steps': 8491, 'loss/train': 1.8838845491409302} +07/25/2024 12:05:24 - INFO - __main__ - Step 8493: {'lr': 0.0004966471441371853, 'samples': 407664, 'steps': 8492, 'loss/train': 0.4193074405193329} +07/25/2024 12:05:24 - INFO - __main__ - Step 8494: {'lr': 0.0004966462854220948, 'samples': 407712, 'steps': 8493, 'loss/train': 2.0495638847351074} +07/25/2024 12:05:24 - INFO - __main__ - Step 8495: {'lr': 0.0004966454265977961, 'samples': 407760, 'steps': 8494, 'loss/train': 1.8616118431091309} +07/25/2024 12:05:25 - INFO - __main__ - Step 8496: {'lr': 0.0004966445676642899, 'samples': 407808, 'steps': 8495, 'loss/train': 1.8503751754760742} +07/25/2024 12:05:25 - INFO - __main__ - Step 8497: {'lr': 0.0004966437086215763, 'samples': 407856, 'steps': 8496, 'loss/train': 2.3453524112701416} +07/25/2024 12:05:25 - INFO - __main__ - Step 8498: {'lr': 0.0004966428494696558, 'samples': 407904, 'steps': 8497, 'loss/train': 2.122035264968872} +07/25/2024 12:05:25 - INFO - __main__ - Step 8499: {'lr': 0.0004966419902085287, 'samples': 407952, 'steps': 8498, 'loss/train': 1.9781101942062378} +07/25/2024 12:05:26 - INFO - __main__ - Step 8500: {'lr': 0.0004966411308381955, 'samples': 408000, 'steps': 8499, 'loss/train': 1.7144430875778198} +07/25/2024 12:05:26 - INFO - __main__ - Step 8501: {'lr': 0.0004966402713586564, 'samples': 408048, 'steps': 8500, 'loss/train': 1.7663663625717163} +07/25/2024 12:05:26 - INFO - __main__ - Step 8502: {'lr': 0.000496639411769912, 'samples': 408096, 'steps': 8501, 'loss/train': 2.206798553466797} +07/25/2024 12:05:27 - INFO - __main__ - Step 8503: {'lr': 0.0004966385520719627, 'samples': 408144, 'steps': 8502, 'loss/train': 1.9463423490524292} +07/25/2024 12:05:27 - INFO - __main__ - Step 8504: {'lr': 0.0004966376922648085, 'samples': 408192, 'steps': 8503, 'loss/train': 4.3819732666015625} +07/25/2024 12:05:27 - INFO - __main__ - Step 8505: {'lr': 0.0004966368323484502, 'samples': 408240, 'steps': 8504, 'loss/train': 1.8888534307479858} +07/25/2024 12:05:27 - INFO - __main__ - Step 8506: {'lr': 0.000496635972322888, 'samples': 408288, 'steps': 8505, 'loss/train': 1.2620059251785278} +07/25/2024 12:05:28 - INFO - __main__ - Step 8507: {'lr': 0.0004966351121881223, 'samples': 408336, 'steps': 8506, 'loss/train': 1.6307634115219116} +07/25/2024 12:05:28 - INFO - __main__ - Step 8508: {'lr': 0.0004966342519441535, 'samples': 408384, 'steps': 8507, 'loss/train': 1.8330146074295044} +07/25/2024 12:05:28 - INFO - __main__ - Step 8509: {'lr': 0.0004966333915909819, 'samples': 408432, 'steps': 8508, 'loss/train': 0.21067851781845093} +07/25/2024 12:05:29 - INFO - __main__ - Step 8510: {'lr': 0.000496632531128608, 'samples': 408480, 'steps': 8509, 'loss/train': 2.140705108642578} +07/25/2024 12:05:29 - INFO - __main__ - Step 8511: {'lr': 0.0004966316705570321, 'samples': 408528, 'steps': 8510, 'loss/train': 2.226113796234131} +07/25/2024 12:05:29 - INFO - __main__ - Step 8512: {'lr': 0.0004966308098762547, 'samples': 408576, 'steps': 8511, 'loss/train': 2.0011966228485107} +07/25/2024 12:05:29 - INFO - __main__ - Step 8513: {'lr': 0.000496629949086276, 'samples': 408624, 'steps': 8512, 'loss/train': 2.3976058959960938} +07/25/2024 12:05:30 - INFO - __main__ - Step 8514: {'lr': 0.0004966290881870964, 'samples': 408672, 'steps': 8513, 'loss/train': 2.184356212615967} +07/25/2024 12:05:30 - INFO - __main__ - Step 8515: {'lr': 0.0004966282271787164, 'samples': 408720, 'steps': 8514, 'loss/train': 1.9374247789382935} +07/25/2024 12:05:30 - INFO - __main__ - Step 8516: {'lr': 0.0004966273660611364, 'samples': 408768, 'steps': 8515, 'loss/train': 1.9841538667678833} +07/25/2024 12:05:31 - INFO - __main__ - Step 8517: {'lr': 0.0004966265048343566, 'samples': 408816, 'steps': 8516, 'loss/train': 0.3504413962364197} +07/25/2024 12:05:31 - INFO - __main__ - Step 8518: {'lr': 0.0004966256434983776, 'samples': 408864, 'steps': 8517, 'loss/train': 2.22424578666687} +07/25/2024 12:05:31 - INFO - __main__ - Step 8519: {'lr': 0.0004966247820531996, 'samples': 408912, 'steps': 8518, 'loss/train': 2.142953395843506} +07/25/2024 12:05:31 - INFO - __main__ - Step 8520: {'lr': 0.0004966239204988232, 'samples': 408960, 'steps': 8519, 'loss/train': 1.5993481874465942} +07/25/2024 12:05:32 - INFO - __main__ - Step 8521: {'lr': 0.0004966230588352485, 'samples': 409008, 'steps': 8520, 'loss/train': 1.8876824378967285} +07/25/2024 12:05:32 - INFO - __main__ - Step 8522: {'lr': 0.000496622197062476, 'samples': 409056, 'steps': 8521, 'loss/train': 1.7061152458190918} +07/25/2024 12:05:32 - INFO - __main__ - Step 8523: {'lr': 0.0004966213351805062, 'samples': 409104, 'steps': 8522, 'loss/train': 1.8683452606201172} +07/25/2024 12:05:33 - INFO - __main__ - Step 8524: {'lr': 0.0004966204731893393, 'samples': 409152, 'steps': 8523, 'loss/train': 1.9457080364227295} +07/25/2024 12:05:33 - INFO - __main__ - Step 8525: {'lr': 0.0004966196110889759, 'samples': 409200, 'steps': 8524, 'loss/train': 1.3797709941864014} +07/25/2024 12:05:33 - INFO - __main__ - Step 8526: {'lr': 0.0004966187488794162, 'samples': 409248, 'steps': 8525, 'loss/train': 1.9238028526306152} +07/25/2024 12:05:33 - INFO - __main__ - Step 8527: {'lr': 0.0004966178865606605, 'samples': 409296, 'steps': 8526, 'loss/train': 1.813035488128662} +07/25/2024 12:05:34 - INFO - __main__ - Step 8528: {'lr': 0.0004966170241327094, 'samples': 409344, 'steps': 8527, 'loss/train': 2.2098538875579834} +07/25/2024 12:05:34 - INFO - __main__ - Step 8529: {'lr': 0.0004966161615955632, 'samples': 409392, 'steps': 8528, 'loss/train': 1.6305365562438965} +07/25/2024 12:05:34 - INFO - __main__ - Step 8530: {'lr': 0.0004966152989492222, 'samples': 409440, 'steps': 8529, 'loss/train': 1.2994575500488281} +07/25/2024 12:05:35 - INFO - __main__ - Step 8531: {'lr': 0.0004966144361936869, 'samples': 409488, 'steps': 8530, 'loss/train': 1.8932965993881226} +07/25/2024 12:05:35 - INFO - __main__ - Step 8532: {'lr': 0.0004966135733289576, 'samples': 409536, 'steps': 8531, 'loss/train': 1.9285050630569458} +07/25/2024 12:05:35 - INFO - __main__ - Step 8533: {'lr': 0.0004966127103550349, 'samples': 409584, 'steps': 8532, 'loss/train': 1.3400800228118896} +07/25/2024 12:05:35 - INFO - __main__ - Step 8534: {'lr': 0.0004966118472719187, 'samples': 409632, 'steps': 8533, 'loss/train': 1.9066965579986572} +07/25/2024 12:05:36 - INFO - __main__ - Step 8535: {'lr': 0.0004966109840796099, 'samples': 409680, 'steps': 8534, 'loss/train': 1.78803551197052} +07/25/2024 12:05:36 - INFO - __main__ - Step 8536: {'lr': 0.0004966101207781086, 'samples': 409728, 'steps': 8535, 'loss/train': 2.056518793106079} +07/25/2024 12:05:36 - INFO - __main__ - Step 8537: {'lr': 0.0004966092573674153, 'samples': 409776, 'steps': 8536, 'loss/train': 1.628585934638977} +07/25/2024 12:05:37 - INFO - __main__ - Step 8538: {'lr': 0.0004966083938475303, 'samples': 409824, 'steps': 8537, 'loss/train': 1.8148049116134644} +07/25/2024 12:05:37 - INFO - __main__ - Step 8539: {'lr': 0.000496607530218454, 'samples': 409872, 'steps': 8538, 'loss/train': 1.8394606113433838} +07/25/2024 12:05:37 - INFO - __main__ - Step 8540: {'lr': 0.0004966066664801868, 'samples': 409920, 'steps': 8539, 'loss/train': 2.2972989082336426} +07/25/2024 12:05:37 - INFO - __main__ - Step 8541: {'lr': 0.0004966058026327292, 'samples': 409968, 'steps': 8540, 'loss/train': 1.9562495946884155} +07/25/2024 12:05:38 - INFO - __main__ - Step 8542: {'lr': 0.0004966049386760813, 'samples': 410016, 'steps': 8541, 'loss/train': 1.717575192451477} +07/25/2024 12:05:38 - INFO - __main__ - Step 8543: {'lr': 0.0004966040746102438, 'samples': 410064, 'steps': 8542, 'loss/train': 2.5203840732574463} +07/25/2024 12:05:38 - INFO - __main__ - Step 8544: {'lr': 0.0004966032104352168, 'samples': 410112, 'steps': 8543, 'loss/train': 0.39065220952033997} +07/25/2024 12:05:39 - INFO - __main__ - Step 8545: {'lr': 0.0004966023461510007, 'samples': 410160, 'steps': 8544, 'loss/train': 2.34439754486084} +07/25/2024 12:05:39 - INFO - __main__ - Step 8546: {'lr': 0.0004966014817575961, 'samples': 410208, 'steps': 8545, 'loss/train': 2.4757742881774902} +07/25/2024 12:05:39 - INFO - __main__ - Step 8547: {'lr': 0.0004966006172550032, 'samples': 410256, 'steps': 8546, 'loss/train': 1.8318657875061035} +07/25/2024 12:05:39 - INFO - __main__ - Step 8548: {'lr': 0.0004965997526432226, 'samples': 410304, 'steps': 8547, 'loss/train': 1.5429966449737549} +07/25/2024 12:05:40 - INFO - __main__ - Step 8549: {'lr': 0.0004965988879222545, 'samples': 410352, 'steps': 8548, 'loss/train': 2.23600435256958} +07/25/2024 12:05:40 - INFO - __main__ - Step 8550: {'lr': 0.0004965980230920993, 'samples': 410400, 'steps': 8549, 'loss/train': 1.9485052824020386} +07/25/2024 12:05:40 - INFO - __main__ - Step 8551: {'lr': 0.0004965971581527574, 'samples': 410448, 'steps': 8550, 'loss/train': 2.5011043548583984} +07/25/2024 12:05:41 - INFO - __main__ - Step 8552: {'lr': 0.0004965962931042292, 'samples': 410496, 'steps': 8551, 'loss/train': 1.9544490575790405} +07/25/2024 12:05:41 - INFO - __main__ - Step 8553: {'lr': 0.0004965954279465151, 'samples': 410544, 'steps': 8552, 'loss/train': 1.749611258506775} +07/25/2024 12:05:41 - INFO - __main__ - Step 8554: {'lr': 0.0004965945626796154, 'samples': 410592, 'steps': 8553, 'loss/train': 2.3358168601989746} +07/25/2024 12:05:41 - INFO - __main__ - Step 8555: {'lr': 0.0004965936973035306, 'samples': 410640, 'steps': 8554, 'loss/train': 2.1458945274353027} +07/25/2024 12:05:42 - INFO - __main__ - Step 8556: {'lr': 0.000496592831818261, 'samples': 410688, 'steps': 8555, 'loss/train': 1.4500702619552612} +07/25/2024 12:05:42 - INFO - __main__ - Step 8557: {'lr': 0.000496591966223807, 'samples': 410736, 'steps': 8556, 'loss/train': 2.05572509765625} +07/25/2024 12:05:42 - INFO - __main__ - Step 8558: {'lr': 0.0004965911005201689, 'samples': 410784, 'steps': 8557, 'loss/train': 2.06471848487854} +07/25/2024 12:05:42 - INFO - __main__ - Step 8559: {'lr': 0.0004965902347073472, 'samples': 410832, 'steps': 8558, 'loss/train': 1.530542016029358} +07/25/2024 12:05:43 - INFO - __main__ - Step 8560: {'lr': 0.0004965893687853423, 'samples': 410880, 'steps': 8559, 'loss/train': 1.631116509437561} +07/25/2024 12:05:43 - INFO - __main__ - Step 8561: {'lr': 0.0004965885027541546, 'samples': 410928, 'steps': 8560, 'loss/train': 1.8285927772521973} +07/25/2024 12:05:43 - INFO - __main__ - Step 8562: {'lr': 0.0004965876366137843, 'samples': 410976, 'steps': 8561, 'loss/train': 1.5393579006195068} +07/25/2024 12:05:44 - INFO - __main__ - Step 8563: {'lr': 0.000496586770364232, 'samples': 411024, 'steps': 8562, 'loss/train': 2.0825512409210205} +07/25/2024 12:05:44 - INFO - __main__ - Step 8564: {'lr': 0.0004965859040054979, 'samples': 411072, 'steps': 8563, 'loss/train': 1.7465757131576538} +07/25/2024 12:05:44 - INFO - __main__ - Step 8565: {'lr': 0.0004965850375375825, 'samples': 411120, 'steps': 8564, 'loss/train': 1.7137178182601929} +07/25/2024 12:05:44 - INFO - __main__ - Step 8566: {'lr': 0.0004965841709604862, 'samples': 411168, 'steps': 8565, 'loss/train': 1.849355936050415} +07/25/2024 12:05:45 - INFO - __main__ - Step 8567: {'lr': 0.0004965833042742093, 'samples': 411216, 'steps': 8566, 'loss/train': 2.5812277793884277} +07/25/2024 12:05:45 - INFO - __main__ - Step 8568: {'lr': 0.0004965824374787522, 'samples': 411264, 'steps': 8567, 'loss/train': 0.29001158475875854} +07/25/2024 12:05:45 - INFO - __main__ - Step 8569: {'lr': 0.0004965815705741154, 'samples': 411312, 'steps': 8568, 'loss/train': 2.0392420291900635} +07/25/2024 12:05:46 - INFO - __main__ - Step 8570: {'lr': 0.000496580703560299, 'samples': 411360, 'steps': 8569, 'loss/train': 2.0695700645446777} +07/25/2024 12:05:46 - INFO - __main__ - Step 8571: {'lr': 0.0004965798364373038, 'samples': 411408, 'steps': 8570, 'loss/train': 1.822100281715393} +07/25/2024 12:05:46 - INFO - __main__ - Step 8572: {'lr': 0.0004965789692051298, 'samples': 411456, 'steps': 8571, 'loss/train': 1.9531124830245972} +07/25/2024 12:05:46 - INFO - __main__ - Step 8573: {'lr': 0.0004965781018637776, 'samples': 411504, 'steps': 8572, 'loss/train': 2.2892653942108154} +07/25/2024 12:05:47 - INFO - __main__ - Step 8574: {'lr': 0.0004965772344132475, 'samples': 411552, 'steps': 8573, 'loss/train': 2.1355836391448975} +07/25/2024 12:05:47 - INFO - __main__ - Step 8575: {'lr': 0.0004965763668535399, 'samples': 411600, 'steps': 8574, 'loss/train': 1.6570698022842407} +07/25/2024 12:05:47 - INFO - __main__ - Step 8576: {'lr': 0.0004965754991846553, 'samples': 411648, 'steps': 8575, 'loss/train': 2.364182949066162} +07/25/2024 12:05:48 - INFO - __main__ - Step 8577: {'lr': 0.0004965746314065938, 'samples': 411696, 'steps': 8576, 'loss/train': 2.4779295921325684} +07/25/2024 12:05:48 - INFO - __main__ - Step 8578: {'lr': 0.0004965737635193561, 'samples': 411744, 'steps': 8577, 'loss/train': 2.0437395572662354} +07/25/2024 12:05:48 - INFO - __main__ - Step 8579: {'lr': 0.0004965728955229425, 'samples': 411792, 'steps': 8578, 'loss/train': 1.657602071762085} +07/25/2024 12:05:48 - INFO - __main__ - Step 8580: {'lr': 0.0004965720274173531, 'samples': 411840, 'steps': 8579, 'loss/train': 1.2932757139205933} +07/25/2024 12:05:49 - INFO - __main__ - Step 8581: {'lr': 0.0004965711592025887, 'samples': 411888, 'steps': 8580, 'loss/train': 1.8069604635238647} +07/25/2024 12:05:49 - INFO - __main__ - Step 8582: {'lr': 0.0004965702908786495, 'samples': 411936, 'steps': 8581, 'loss/train': 2.453312635421753} +07/25/2024 12:05:49 - INFO - __main__ - Step 8583: {'lr': 0.0004965694224455358, 'samples': 411984, 'steps': 8582, 'loss/train': 3.761838674545288} +07/25/2024 12:05:50 - INFO - __main__ - Step 8584: {'lr': 0.0004965685539032482, 'samples': 412032, 'steps': 8583, 'loss/train': 2.0728182792663574} +07/25/2024 12:05:50 - INFO - __main__ - Step 8585: {'lr': 0.0004965676852517869, 'samples': 412080, 'steps': 8584, 'loss/train': 2.2438173294067383} +07/25/2024 12:05:50 - INFO - __main__ - Step 8586: {'lr': 0.0004965668164911522, 'samples': 412128, 'steps': 8585, 'loss/train': 1.3934910297393799} +07/25/2024 12:05:50 - INFO - __main__ - Step 8587: {'lr': 0.0004965659476213448, 'samples': 412176, 'steps': 8586, 'loss/train': 1.8788012266159058} +07/25/2024 12:05:51 - INFO - __main__ - Step 8588: {'lr': 0.0004965650786423648, 'samples': 412224, 'steps': 8587, 'loss/train': 2.0110702514648438} +07/25/2024 12:05:51 - INFO - __main__ - Step 8589: {'lr': 0.0004965642095542126, 'samples': 412272, 'steps': 8588, 'loss/train': 2.345170259475708} +07/25/2024 12:05:51 - INFO - __main__ - Step 8590: {'lr': 0.0004965633403568889, 'samples': 412320, 'steps': 8589, 'loss/train': 2.147881031036377} +07/25/2024 12:05:52 - INFO - __main__ - Step 8591: {'lr': 0.0004965624710503938, 'samples': 412368, 'steps': 8590, 'loss/train': 2.284315347671509} +07/25/2024 12:05:52 - INFO - __main__ - Step 8592: {'lr': 0.0004965616016347277, 'samples': 412416, 'steps': 8591, 'loss/train': 0.19762471318244934} +07/25/2024 12:05:52 - INFO - __main__ - Step 8593: {'lr': 0.000496560732109891, 'samples': 412464, 'steps': 8592, 'loss/train': 1.8363244533538818} +07/25/2024 12:05:52 - INFO - __main__ - Step 8594: {'lr': 0.0004965598624758842, 'samples': 412512, 'steps': 8593, 'loss/train': 2.838754177093506} +07/25/2024 12:05:53 - INFO - __main__ - Step 8595: {'lr': 0.0004965589927327075, 'samples': 412560, 'steps': 8594, 'loss/train': 1.946584939956665} +07/25/2024 12:05:53 - INFO - __main__ - Step 8596: {'lr': 0.0004965581228803614, 'samples': 412608, 'steps': 8595, 'loss/train': 3.525810956954956} +07/25/2024 12:05:53 - INFO - __main__ - Step 8597: {'lr': 0.0004965572529188464, 'samples': 412656, 'steps': 8596, 'loss/train': 1.725903034210205} +07/25/2024 12:05:54 - INFO - __main__ - Step 8598: {'lr': 0.0004965563828481627, 'samples': 412704, 'steps': 8597, 'loss/train': 1.6209121942520142} +07/25/2024 12:05:54 - INFO - __main__ - Step 8599: {'lr': 0.0004965555126683107, 'samples': 412752, 'steps': 8598, 'loss/train': 2.355233907699585} +07/25/2024 12:05:54 - INFO - __main__ - Step 8600: {'lr': 0.0004965546423792908, 'samples': 412800, 'steps': 8599, 'loss/train': 1.988811731338501} +07/25/2024 12:05:54 - INFO - __main__ - Step 8601: {'lr': 0.0004965537719811034, 'samples': 412848, 'steps': 8600, 'loss/train': 1.4692254066467285} +07/25/2024 12:05:55 - INFO - __main__ - Step 8602: {'lr': 0.000496552901473749, 'samples': 412896, 'steps': 8601, 'loss/train': 2.2269744873046875} +07/25/2024 12:05:55 - INFO - __main__ - Step 8603: {'lr': 0.0004965520308572277, 'samples': 412944, 'steps': 8602, 'loss/train': 1.7125695943832397} +07/25/2024 12:05:55 - INFO - __main__ - Step 8604: {'lr': 0.0004965511601315403, 'samples': 412992, 'steps': 8603, 'loss/train': 1.505915641784668} +07/25/2024 12:05:56 - INFO - __main__ - Step 8605: {'lr': 0.0004965502892966869, 'samples': 413040, 'steps': 8604, 'loss/train': 1.6701689958572388} +07/25/2024 12:05:56 - INFO - __main__ - Step 8606: {'lr': 0.0004965494183526678, 'samples': 413088, 'steps': 8605, 'loss/train': 2.1767966747283936} +07/25/2024 12:05:56 - INFO - __main__ - Step 8607: {'lr': 0.0004965485472994837, 'samples': 413136, 'steps': 8606, 'loss/train': 1.8803417682647705} +07/25/2024 12:05:56 - INFO - __main__ - Step 8608: {'lr': 0.0004965476761371347, 'samples': 413184, 'steps': 8607, 'loss/train': 1.8149224519729614} +07/25/2024 12:05:57 - INFO - __main__ - Step 8609: {'lr': 0.0004965468048656214, 'samples': 413232, 'steps': 8608, 'loss/train': 1.812838077545166} +07/25/2024 12:05:57 - INFO - __main__ - Step 8610: {'lr': 0.0004965459334849439, 'samples': 413280, 'steps': 8609, 'loss/train': 2.093517541885376} +07/25/2024 12:05:57 - INFO - __main__ - Step 8611: {'lr': 0.000496545061995103, 'samples': 413328, 'steps': 8610, 'loss/train': 1.9935340881347656} +07/25/2024 12:05:58 - INFO - __main__ - Step 8612: {'lr': 0.0004965441903960986, 'samples': 413376, 'steps': 8611, 'loss/train': 2.2908718585968018} +07/25/2024 12:05:58 - INFO - __main__ - Step 8613: {'lr': 0.0004965433186879315, 'samples': 413424, 'steps': 8612, 'loss/train': 2.7036306858062744} +07/25/2024 12:05:58 - INFO - __main__ - Step 8614: {'lr': 0.0004965424468706019, 'samples': 413472, 'steps': 8613, 'loss/train': 2.0467336177825928} +07/25/2024 12:05:58 - INFO - __main__ - Step 8615: {'lr': 0.0004965415749441103, 'samples': 413520, 'steps': 8614, 'loss/train': 2.5381197929382324} +07/25/2024 12:05:59 - INFO - __main__ - Step 8616: {'lr': 0.0004965407029084569, 'samples': 413568, 'steps': 8615, 'loss/train': 0.1878552883863449} +07/25/2024 12:05:59 - INFO - __main__ - Step 8617: {'lr': 0.0004965398307636422, 'samples': 413616, 'steps': 8616, 'loss/train': 2.089154005050659} +07/25/2024 12:05:59 - INFO - __main__ - Step 8618: {'lr': 0.0004965389585096665, 'samples': 413664, 'steps': 8617, 'loss/train': 2.5644724369049072} +07/25/2024 12:06:00 - INFO - __main__ - Step 8619: {'lr': 0.0004965380861465304, 'samples': 413712, 'steps': 8618, 'loss/train': 1.5986360311508179} +07/25/2024 12:06:00 - INFO - __main__ - Step 8620: {'lr': 0.0004965372136742341, 'samples': 413760, 'steps': 8619, 'loss/train': 2.286794424057007} +07/25/2024 12:06:00 - INFO - __main__ - Step 8621: {'lr': 0.0004965363410927779, 'samples': 413808, 'steps': 8620, 'loss/train': 2.2583680152893066} +07/25/2024 12:06:00 - INFO - __main__ - Step 8622: {'lr': 0.0004965354684021624, 'samples': 413856, 'steps': 8621, 'loss/train': 1.8930991888046265} +07/25/2024 12:06:01 - INFO - __main__ - Step 8623: {'lr': 0.000496534595602388, 'samples': 413904, 'steps': 8622, 'loss/train': 2.0274593830108643} +07/25/2024 12:06:01 - INFO - __main__ - Step 8624: {'lr': 0.000496533722693455, 'samples': 413952, 'steps': 8623, 'loss/train': 1.749462366104126} +07/25/2024 12:06:01 - INFO - __main__ - Step 8625: {'lr': 0.0004965328496753636, 'samples': 414000, 'steps': 8624, 'loss/train': 2.6781387329101562} +07/25/2024 12:06:02 - INFO - __main__ - Step 8626: {'lr': 0.0004965319765481144, 'samples': 414048, 'steps': 8625, 'loss/train': 2.4407966136932373} +07/25/2024 12:06:02 - INFO - __main__ - Step 8627: {'lr': 0.0004965311033117079, 'samples': 414096, 'steps': 8626, 'loss/train': 1.9713517427444458} +07/25/2024 12:06:02 - INFO - __main__ - Step 8628: {'lr': 0.0004965302299661441, 'samples': 414144, 'steps': 8627, 'loss/train': 1.664381980895996} +07/25/2024 12:06:02 - INFO - __main__ - Step 8629: {'lr': 0.0004965293565114238, 'samples': 414192, 'steps': 8628, 'loss/train': 1.6595755815505981} +07/25/2024 12:06:03 - INFO - __main__ - Step 8630: {'lr': 0.0004965284829475472, 'samples': 414240, 'steps': 8629, 'loss/train': 1.9513671398162842} +07/25/2024 12:06:03 - INFO - __main__ - Step 8631: {'lr': 0.0004965276092745146, 'samples': 414288, 'steps': 8630, 'loss/train': 1.9682388305664062} +07/25/2024 12:06:03 - INFO - __main__ - Step 8632: {'lr': 0.0004965267354923266, 'samples': 414336, 'steps': 8631, 'loss/train': 1.967344045639038} +07/25/2024 12:06:04 - INFO - __main__ - Step 8633: {'lr': 0.0004965258616009833, 'samples': 414384, 'steps': 8632, 'loss/train': 2.1438090801239014} +07/25/2024 12:06:04 - INFO - __main__ - Step 8634: {'lr': 0.0004965249876004855, 'samples': 414432, 'steps': 8633, 'loss/train': 2.4882442951202393} +07/25/2024 12:06:04 - INFO - __main__ - Step 8635: {'lr': 0.0004965241134908332, 'samples': 414480, 'steps': 8634, 'loss/train': 1.9162856340408325} +07/25/2024 12:06:04 - INFO - __main__ - Step 8636: {'lr': 0.0004965232392720269, 'samples': 414528, 'steps': 8635, 'loss/train': 2.3702805042266846} +07/25/2024 12:06:05 - INFO - __main__ - Step 8637: {'lr': 0.000496522364944067, 'samples': 414576, 'steps': 8636, 'loss/train': 2.552412748336792} +07/25/2024 12:06:05 - INFO - __main__ - Step 8638: {'lr': 0.000496521490506954, 'samples': 414624, 'steps': 8637, 'loss/train': 1.598814606666565} +07/25/2024 12:06:05 - INFO - __main__ - Step 8639: {'lr': 0.0004965206159606881, 'samples': 414672, 'steps': 8638, 'loss/train': 2.483914852142334} +07/25/2024 12:06:05 - INFO - __main__ - Step 8640: {'lr': 0.0004965197413052699, 'samples': 414720, 'steps': 8639, 'loss/train': 0.31468191742897034} +07/25/2024 12:06:06 - INFO - __main__ - Step 8641: {'lr': 0.0004965188665406996, 'samples': 414768, 'steps': 8640, 'loss/train': 2.2553913593292236} +07/25/2024 12:06:06 - INFO - __main__ - Step 8642: {'lr': 0.0004965179916669776, 'samples': 414816, 'steps': 8641, 'loss/train': 2.80778169631958} +07/25/2024 12:06:06 - INFO - __main__ - Step 8643: {'lr': 0.0004965171166841044, 'samples': 414864, 'steps': 8642, 'loss/train': 1.6573530435562134} +07/25/2024 12:06:07 - INFO - __main__ - Step 8644: {'lr': 0.0004965162415920803, 'samples': 414912, 'steps': 8643, 'loss/train': 2.0202784538269043} +07/25/2024 12:06:07 - INFO - __main__ - Step 8645: {'lr': 0.0004965153663909058, 'samples': 414960, 'steps': 8644, 'loss/train': 2.211916446685791} +07/25/2024 12:06:07 - INFO - __main__ - Step 8646: {'lr': 0.0004965144910805811, 'samples': 415008, 'steps': 8645, 'loss/train': 2.041975736618042} +07/25/2024 12:06:07 - INFO - __main__ - Step 8647: {'lr': 0.0004965136156611067, 'samples': 415056, 'steps': 8646, 'loss/train': 2.0811855792999268} +07/25/2024 12:06:08 - INFO - __main__ - Step 8648: {'lr': 0.000496512740132483, 'samples': 415104, 'steps': 8647, 'loss/train': 1.1399317979812622} +07/25/2024 12:06:08 - INFO - __main__ - Step 8649: {'lr': 0.0004965118644947103, 'samples': 415152, 'steps': 8648, 'loss/train': 1.926718831062317} +07/25/2024 12:06:08 - INFO - __main__ - Step 8650: {'lr': 0.0004965109887477892, 'samples': 415200, 'steps': 8649, 'loss/train': 2.046893835067749} +07/25/2024 12:06:09 - INFO - __main__ - Step 8651: {'lr': 0.0004965101128917199, 'samples': 415248, 'steps': 8650, 'loss/train': 0.9098075032234192} +07/25/2024 12:06:09 - INFO - __main__ - Step 8652: {'lr': 0.0004965092369265027, 'samples': 415296, 'steps': 8651, 'loss/train': 2.4676170349121094} +07/25/2024 12:06:09 - INFO - __main__ - Step 8653: {'lr': 0.0004965083608521382, 'samples': 415344, 'steps': 8652, 'loss/train': 2.2651312351226807} +07/25/2024 12:06:09 - INFO - __main__ - Step 8654: {'lr': 0.0004965074846686268, 'samples': 415392, 'steps': 8653, 'loss/train': 1.4534947872161865} +07/25/2024 12:06:10 - INFO - __main__ - Step 8655: {'lr': 0.0004965066083759687, 'samples': 415440, 'steps': 8654, 'loss/train': 2.0434954166412354} +07/25/2024 12:06:10 - INFO - __main__ - Step 8656: {'lr': 0.0004965057319741644, 'samples': 415488, 'steps': 8655, 'loss/train': 1.7001454830169678} +07/25/2024 12:06:10 - INFO - __main__ - Step 8657: {'lr': 0.0004965048554632142, 'samples': 415536, 'steps': 8656, 'loss/train': 2.0497896671295166} +07/25/2024 12:06:11 - INFO - __main__ - Step 8658: {'lr': 0.0004965039788431186, 'samples': 415584, 'steps': 8657, 'loss/train': 2.1254191398620605} +07/25/2024 12:06:11 - INFO - __main__ - Step 8659: {'lr': 0.000496503102113878, 'samples': 415632, 'steps': 8658, 'loss/train': 1.8424391746520996} +07/25/2024 12:06:11 - INFO - __main__ - Step 8660: {'lr': 0.0004965022252754927, 'samples': 415680, 'steps': 8659, 'loss/train': 1.842748761177063} +07/25/2024 12:06:11 - INFO - __main__ - Step 8661: {'lr': 0.0004965013483279631, 'samples': 415728, 'steps': 8660, 'loss/train': 2.08957839012146} +07/25/2024 12:06:12 - INFO - __main__ - Step 8662: {'lr': 0.0004965004712712897, 'samples': 415776, 'steps': 8661, 'loss/train': 1.9346319437026978} +07/25/2024 12:06:12 - INFO - __main__ - Step 8663: {'lr': 0.0004964995941054727, 'samples': 415824, 'steps': 8662, 'loss/train': 2.303583860397339} +07/25/2024 12:06:12 - INFO - __main__ - Step 8664: {'lr': 0.0004964987168305125, 'samples': 415872, 'steps': 8663, 'loss/train': 0.21036946773529053} +07/25/2024 12:06:13 - INFO - __main__ - Step 8665: {'lr': 0.0004964978394464098, 'samples': 415920, 'steps': 8664, 'loss/train': 2.2084591388702393} +07/25/2024 12:06:13 - INFO - __main__ - Step 8666: {'lr': 0.0004964969619531646, 'samples': 415968, 'steps': 8665, 'loss/train': 2.190577507019043} +07/25/2024 12:06:13 - INFO - __main__ - Step 8667: {'lr': 0.0004964960843507775, 'samples': 416016, 'steps': 8666, 'loss/train': 2.695309638977051} +07/25/2024 12:06:13 - INFO - __main__ - Step 8668: {'lr': 0.0004964952066392489, 'samples': 416064, 'steps': 8667, 'loss/train': 2.1599996089935303} +07/25/2024 12:06:14 - INFO - __main__ - Step 8669: {'lr': 0.000496494328818579, 'samples': 416112, 'steps': 8668, 'loss/train': 1.6952412128448486} +07/25/2024 12:06:14 - INFO - __main__ - Step 8670: {'lr': 0.0004964934508887684, 'samples': 416160, 'steps': 8669, 'loss/train': 2.132901668548584} +07/25/2024 12:06:14 - INFO - __main__ - Step 8671: {'lr': 0.0004964925728498175, 'samples': 416208, 'steps': 8670, 'loss/train': 1.706072449684143} +07/25/2024 12:06:15 - INFO - __main__ - Step 8672: {'lr': 0.0004964916947017264, 'samples': 416256, 'steps': 8671, 'loss/train': 1.1446226835250854} +07/25/2024 12:06:15 - INFO - __main__ - Step 8673: {'lr': 0.0004964908164444957, 'samples': 416304, 'steps': 8672, 'loss/train': 2.248512029647827} +07/25/2024 12:06:15 - INFO - __main__ - Step 8674: {'lr': 0.0004964899380781258, 'samples': 416352, 'steps': 8673, 'loss/train': 1.9853497743606567} +07/25/2024 12:06:15 - INFO - __main__ - Step 8675: {'lr': 0.0004964890596026172, 'samples': 416400, 'steps': 8674, 'loss/train': 1.6636536121368408} +07/25/2024 12:06:16 - INFO - __main__ - Step 8676: {'lr': 0.00049648818101797, 'samples': 416448, 'steps': 8675, 'loss/train': 2.1998090744018555} +07/25/2024 12:06:16 - INFO - __main__ - Step 8677: {'lr': 0.0004964873023241848, 'samples': 416496, 'steps': 8676, 'loss/train': 1.7923920154571533} +07/25/2024 12:06:16 - INFO - __main__ - Step 8678: {'lr': 0.000496486423521262, 'samples': 416544, 'steps': 8677, 'loss/train': 2.1119801998138428} +07/25/2024 12:06:17 - INFO - __main__ - Step 8679: {'lr': 0.0004964855446092018, 'samples': 416592, 'steps': 8678, 'loss/train': 1.8521239757537842} +07/25/2024 12:06:17 - INFO - __main__ - Step 8680: {'lr': 0.0004964846655880048, 'samples': 416640, 'steps': 8679, 'loss/train': 1.9019163846969604} +07/25/2024 12:06:17 - INFO - __main__ - Step 8681: {'lr': 0.0004964837864576713, 'samples': 416688, 'steps': 8680, 'loss/train': 1.4676011800765991} +07/25/2024 12:06:17 - INFO - __main__ - Step 8682: {'lr': 0.0004964829072182015, 'samples': 416736, 'steps': 8681, 'loss/train': 2.055935859680176} +07/25/2024 12:06:18 - INFO - __main__ - Step 8683: {'lr': 0.0004964820278695962, 'samples': 416784, 'steps': 8682, 'loss/train': 1.9749773740768433} +07/25/2024 12:06:18 - INFO - __main__ - Step 8684: {'lr': 0.0004964811484118554, 'samples': 416832, 'steps': 8683, 'loss/train': 2.183880567550659} +07/25/2024 12:06:18 - INFO - __main__ - Step 8685: {'lr': 0.0004964802688449798, 'samples': 416880, 'steps': 8684, 'loss/train': 1.882232427597046} +07/25/2024 12:06:19 - INFO - __main__ - Step 8686: {'lr': 0.0004964793891689695, 'samples': 416928, 'steps': 8685, 'loss/train': 2.022183418273926} +07/25/2024 12:06:19 - INFO - __main__ - Step 8687: {'lr': 0.0004964785093838251, 'samples': 416976, 'steps': 8686, 'loss/train': 2.1014535427093506} +07/25/2024 12:06:19 - INFO - __main__ - Step 8688: {'lr': 0.000496477629489547, 'samples': 417024, 'steps': 8687, 'loss/train': 0.2238093614578247} +07/25/2024 12:06:19 - INFO - __main__ - Step 8689: {'lr': 0.0004964767494861354, 'samples': 417072, 'steps': 8688, 'loss/train': 2.208261489868164} +07/25/2024 12:06:20 - INFO - __main__ - Step 8690: {'lr': 0.0004964758693735908, 'samples': 417120, 'steps': 8689, 'loss/train': 1.8535603284835815} +07/25/2024 12:06:20 - INFO - __main__ - Step 8691: {'lr': 0.0004964749891519137, 'samples': 417168, 'steps': 8690, 'loss/train': 2.0625534057617188} +07/25/2024 12:06:20 - INFO - __main__ - Step 8692: {'lr': 0.0004964741088211044, 'samples': 417216, 'steps': 8691, 'loss/train': 2.2381863594055176} +07/25/2024 12:06:21 - INFO - __main__ - Step 8693: {'lr': 0.000496473228381163, 'samples': 417264, 'steps': 8692, 'loss/train': 1.6510261297225952} +07/25/2024 12:06:21 - INFO - __main__ - Step 8694: {'lr': 0.0004964723478320904, 'samples': 417312, 'steps': 8693, 'loss/train': 2.098165988922119} +07/25/2024 12:06:21 - INFO - __main__ - Step 8695: {'lr': 0.0004964714671738867, 'samples': 417360, 'steps': 8694, 'loss/train': 1.9203418493270874} +07/25/2024 12:06:21 - INFO - __main__ - Step 8696: {'lr': 0.0004964705864065524, 'samples': 417408, 'steps': 8695, 'loss/train': 1.4791886806488037} +07/25/2024 12:06:22 - INFO - __main__ - Step 8697: {'lr': 0.0004964697055300877, 'samples': 417456, 'steps': 8696, 'loss/train': 1.995285987854004} +07/25/2024 12:06:22 - INFO - __main__ - Step 8698: {'lr': 0.0004964688245444932, 'samples': 417504, 'steps': 8697, 'loss/train': 1.928328514099121} +07/25/2024 12:06:22 - INFO - __main__ - Step 8699: {'lr': 0.0004964679434497692, 'samples': 417552, 'steps': 8698, 'loss/train': 1.7791733741760254} +07/25/2024 12:06:23 - INFO - __main__ - Step 8700: {'lr': 0.0004964670622459161, 'samples': 417600, 'steps': 8699, 'loss/train': 2.1428236961364746} +07/25/2024 12:06:23 - INFO - __main__ - Step 8701: {'lr': 0.0004964661809329343, 'samples': 417648, 'steps': 8700, 'loss/train': 1.8594543933868408} +07/25/2024 12:06:23 - INFO - __main__ - Step 8702: {'lr': 0.0004964652995108241, 'samples': 417696, 'steps': 8701, 'loss/train': 2.1609597206115723} +07/25/2024 12:06:23 - INFO - __main__ - Step 8703: {'lr': 0.0004964644179795861, 'samples': 417744, 'steps': 8702, 'loss/train': 2.1569316387176514} +07/25/2024 12:06:24 - INFO - __main__ - Step 8704: {'lr': 0.0004964635363392205, 'samples': 417792, 'steps': 8703, 'loss/train': 2.0115089416503906} +07/25/2024 12:06:24 - INFO - __main__ - Step 8705: {'lr': 0.0004964626545897277, 'samples': 417840, 'steps': 8704, 'loss/train': 1.885124683380127} +07/25/2024 12:06:24 - INFO - __main__ - Step 8706: {'lr': 0.0004964617727311083, 'samples': 417888, 'steps': 8705, 'loss/train': 2.243560552597046} +07/25/2024 12:06:25 - INFO - __main__ - Step 8707: {'lr': 0.0004964608907633624, 'samples': 417936, 'steps': 8706, 'loss/train': 1.9678610563278198} +07/25/2024 12:06:25 - INFO - __main__ - Step 8708: {'lr': 0.0004964600086864905, 'samples': 417984, 'steps': 8707, 'loss/train': 1.9458812475204468} +07/25/2024 12:06:25 - INFO - __main__ - Step 8709: {'lr': 0.0004964591265004931, 'samples': 418032, 'steps': 8708, 'loss/train': 2.1472675800323486} +07/25/2024 12:06:25 - INFO - __main__ - Step 8710: {'lr': 0.0004964582442053704, 'samples': 418080, 'steps': 8709, 'loss/train': 2.1628384590148926} +07/25/2024 12:06:26 - INFO - __main__ - Step 8711: {'lr': 0.000496457361801123, 'samples': 418128, 'steps': 8710, 'loss/train': 2.237912178039551} +07/25/2024 12:06:26 - INFO - __main__ - Step 8712: {'lr': 0.0004964564792877511, 'samples': 418176, 'steps': 8711, 'loss/train': 0.1636090725660324} +07/25/2024 12:06:26 - INFO - __main__ - Step 8713: {'lr': 0.0004964555966652553, 'samples': 418224, 'steps': 8712, 'loss/train': 1.998898983001709} +07/25/2024 12:06:26 - INFO - __main__ - Step 8714: {'lr': 0.0004964547139336357, 'samples': 418272, 'steps': 8713, 'loss/train': 1.9158852100372314} +07/25/2024 12:06:27 - INFO - __main__ - Step 8715: {'lr': 0.0004964538310928931, 'samples': 418320, 'steps': 8714, 'loss/train': 2.2817771434783936} +07/25/2024 12:06:27 - INFO - __main__ - Step 8716: {'lr': 0.0004964529481430274, 'samples': 418368, 'steps': 8715, 'loss/train': 1.970592975616455} +07/25/2024 12:06:27 - INFO - __main__ - Step 8717: {'lr': 0.0004964520650840394, 'samples': 418416, 'steps': 8716, 'loss/train': 1.8140653371810913} +07/25/2024 12:06:28 - INFO - __main__ - Step 8718: {'lr': 0.0004964511819159292, 'samples': 418464, 'steps': 8717, 'loss/train': 2.0262463092803955} +07/25/2024 12:06:28 - INFO - __main__ - Step 8719: {'lr': 0.0004964502986386975, 'samples': 418512, 'steps': 8718, 'loss/train': 2.055180311203003} +07/25/2024 12:06:28 - INFO - __main__ - Step 8720: {'lr': 0.0004964494152523444, 'samples': 418560, 'steps': 8719, 'loss/train': 2.1730761528015137} +07/25/2024 12:06:28 - INFO - __main__ - Step 8721: {'lr': 0.0004964485317568703, 'samples': 418608, 'steps': 8720, 'loss/train': 1.5814265012741089} +07/25/2024 12:06:29 - INFO - __main__ - Step 8722: {'lr': 0.0004964476481522759, 'samples': 418656, 'steps': 8721, 'loss/train': 2.036705255508423} +07/25/2024 12:06:29 - INFO - __main__ - Step 8723: {'lr': 0.0004964467644385613, 'samples': 418704, 'steps': 8722, 'loss/train': 2.1268129348754883} +07/25/2024 12:06:29 - INFO - __main__ - Step 8724: {'lr': 0.0004964458806157269, 'samples': 418752, 'steps': 8723, 'loss/train': 0.9370791912078857} +07/25/2024 12:06:30 - INFO - __main__ - Step 8725: {'lr': 0.0004964449966837733, 'samples': 418800, 'steps': 8724, 'loss/train': 2.006704807281494} +07/25/2024 12:06:30 - INFO - __main__ - Step 8726: {'lr': 0.0004964441126427007, 'samples': 418848, 'steps': 8725, 'loss/train': 2.0224204063415527} +07/25/2024 12:06:30 - INFO - __main__ - Step 8727: {'lr': 0.0004964432284925096, 'samples': 418896, 'steps': 8726, 'loss/train': 2.108426332473755} +07/25/2024 12:06:30 - INFO - __main__ - Step 8728: {'lr': 0.0004964423442332003, 'samples': 418944, 'steps': 8727, 'loss/train': 1.4511992931365967} +07/25/2024 12:06:31 - INFO - __main__ - Step 8729: {'lr': 0.0004964414598647731, 'samples': 418992, 'steps': 8728, 'loss/train': 2.0961992740631104} +07/25/2024 12:06:31 - INFO - __main__ - Step 8730: {'lr': 0.0004964405753872288, 'samples': 419040, 'steps': 8729, 'loss/train': 2.8796091079711914} +07/25/2024 12:06:31 - INFO - __main__ - Step 8731: {'lr': 0.0004964396908005674, 'samples': 419088, 'steps': 8730, 'loss/train': 2.5026867389678955} +07/25/2024 12:06:32 - INFO - __main__ - Step 8732: {'lr': 0.0004964388061047893, 'samples': 419136, 'steps': 8731, 'loss/train': 2.190108060836792} +07/25/2024 12:06:32 - INFO - __main__ - Step 8733: {'lr': 0.0004964379212998951, 'samples': 419184, 'steps': 8732, 'loss/train': 1.6769908666610718} +07/25/2024 12:06:32 - INFO - __main__ - Step 8734: {'lr': 0.0004964370363858852, 'samples': 419232, 'steps': 8733, 'loss/train': 2.4419708251953125} +07/25/2024 12:06:32 - INFO - __main__ - Step 8735: {'lr': 0.0004964361513627596, 'samples': 419280, 'steps': 8734, 'loss/train': 0.9865064024925232} +07/25/2024 12:06:33 - INFO - __main__ - Step 8736: {'lr': 0.0004964352662305192, 'samples': 419328, 'steps': 8735, 'loss/train': 0.19931358098983765} +07/25/2024 12:06:33 - INFO - __main__ - Step 8737: {'lr': 0.0004964343809891642, 'samples': 419376, 'steps': 8736, 'loss/train': 2.0030360221862793} +07/25/2024 12:06:33 - INFO - __main__ - Step 8738: {'lr': 0.0004964334956386948, 'samples': 419424, 'steps': 8737, 'loss/train': 1.7096598148345947} +07/25/2024 12:06:34 - INFO - __main__ - Step 8739: {'lr': 0.0004964326101791117, 'samples': 419472, 'steps': 8738, 'loss/train': 2.0843124389648438} +07/25/2024 12:06:34 - INFO - __main__ - Step 8740: {'lr': 0.0004964317246104149, 'samples': 419520, 'steps': 8739, 'loss/train': 0.8037936687469482} +07/25/2024 12:06:34 - INFO - __main__ - Step 8741: {'lr': 0.0004964308389326053, 'samples': 419568, 'steps': 8740, 'loss/train': 1.5743581056594849} +07/25/2024 12:06:34 - INFO - __main__ - Step 8742: {'lr': 0.000496429953145683, 'samples': 419616, 'steps': 8741, 'loss/train': 2.3371737003326416} +07/25/2024 12:06:35 - INFO - __main__ - Step 8743: {'lr': 0.0004964290672496483, 'samples': 419664, 'steps': 8742, 'loss/train': 2.1164040565490723} +07/25/2024 12:06:35 - INFO - __main__ - Step 8744: {'lr': 0.0004964281812445019, 'samples': 419712, 'steps': 8743, 'loss/train': 1.5476644039154053} +07/25/2024 12:06:35 - INFO - __main__ - Step 8745: {'lr': 0.0004964272951302437, 'samples': 419760, 'steps': 8744, 'loss/train': 1.3911570310592651} +07/25/2024 12:06:36 - INFO - __main__ - Step 8746: {'lr': 0.0004964264089068747, 'samples': 419808, 'steps': 8745, 'loss/train': 2.3009579181671143} +07/25/2024 12:06:36 - INFO - __main__ - Step 8747: {'lr': 0.0004964255225743949, 'samples': 419856, 'steps': 8746, 'loss/train': 1.9632728099822998} +07/25/2024 12:06:36 - INFO - __main__ - Step 8748: {'lr': 0.0004964246361328047, 'samples': 419904, 'steps': 8747, 'loss/train': 2.0555431842803955} +07/25/2024 12:06:36 - INFO - __main__ - Step 8749: {'lr': 0.0004964237495821046, 'samples': 419952, 'steps': 8748, 'loss/train': 2.132488250732422} +07/25/2024 12:06:37 - INFO - __main__ - Step 8750: {'lr': 0.0004964228629222951, 'samples': 420000, 'steps': 8749, 'loss/train': 1.538543701171875} +07/25/2024 12:06:37 - INFO - __main__ - Step 8751: {'lr': 0.0004964219761533763, 'samples': 420048, 'steps': 8750, 'loss/train': 2.2856314182281494} +07/25/2024 12:06:37 - INFO - __main__ - Step 8752: {'lr': 0.0004964210892753488, 'samples': 420096, 'steps': 8751, 'loss/train': 0.9694915413856506} +07/25/2024 12:06:38 - INFO - __main__ - Step 8753: {'lr': 0.0004964202022882129, 'samples': 420144, 'steps': 8752, 'loss/train': 3.220620632171631} +07/25/2024 12:06:38 - INFO - __main__ - Step 8754: {'lr': 0.0004964193151919691, 'samples': 420192, 'steps': 8753, 'loss/train': 1.7077564001083374} +07/25/2024 12:06:38 - INFO - __main__ - Step 8755: {'lr': 0.0004964184279866178, 'samples': 420240, 'steps': 8754, 'loss/train': 2.13814377784729} +07/25/2024 12:06:38 - INFO - __main__ - Step 8756: {'lr': 0.0004964175406721593, 'samples': 420288, 'steps': 8755, 'loss/train': 1.4708410501480103} +07/25/2024 12:06:39 - INFO - __main__ - Step 8757: {'lr': 0.000496416653248594, 'samples': 420336, 'steps': 8756, 'loss/train': 1.7661494016647339} +07/25/2024 12:06:39 - INFO - __main__ - Step 8758: {'lr': 0.0004964157657159223, 'samples': 420384, 'steps': 8757, 'loss/train': 2.0114877223968506} +07/25/2024 12:06:39 - INFO - __main__ - Step 8759: {'lr': 0.0004964148780741446, 'samples': 420432, 'steps': 8758, 'loss/train': 2.2473716735839844} +07/25/2024 12:06:40 - INFO - __main__ - Step 8760: {'lr': 0.0004964139903232611, 'samples': 420480, 'steps': 8759, 'loss/train': 0.1551145315170288} +07/25/2024 12:06:40 - INFO - __main__ - Step 8761: {'lr': 0.0004964131024632727, 'samples': 420528, 'steps': 8760, 'loss/train': 1.8149056434631348} +07/25/2024 12:06:40 - INFO - __main__ - Step 8762: {'lr': 0.0004964122144941794, 'samples': 420576, 'steps': 8761, 'loss/train': 2.1749520301818848} +07/25/2024 12:06:40 - INFO - __main__ - Step 8763: {'lr': 0.0004964113264159816, 'samples': 420624, 'steps': 8762, 'loss/train': 2.032931327819824} +07/25/2024 12:06:41 - INFO - __main__ - Step 8764: {'lr': 0.0004964104382286798, 'samples': 420672, 'steps': 8763, 'loss/train': 1.8992013931274414} +07/25/2024 12:06:41 - INFO - __main__ - Step 8765: {'lr': 0.0004964095499322744, 'samples': 420720, 'steps': 8764, 'loss/train': 1.7845261096954346} +07/25/2024 12:06:41 - INFO - __main__ - Step 8766: {'lr': 0.0004964086615267657, 'samples': 420768, 'steps': 8765, 'loss/train': 1.8342740535736084} +07/25/2024 12:06:42 - INFO - __main__ - Step 8767: {'lr': 0.0004964077730121541, 'samples': 420816, 'steps': 8766, 'loss/train': 2.0897128582000732} +07/25/2024 12:06:42 - INFO - __main__ - Step 8768: {'lr': 0.0004964068843884402, 'samples': 420864, 'steps': 8767, 'loss/train': 1.516122817993164} +07/25/2024 12:06:42 - INFO - __main__ - Step 8769: {'lr': 0.000496405995655624, 'samples': 420912, 'steps': 8768, 'loss/train': 2.2135467529296875} +07/25/2024 12:06:42 - INFO - __main__ - Step 8770: {'lr': 0.0004964051068137063, 'samples': 420960, 'steps': 8769, 'loss/train': 2.2197301387786865} +07/25/2024 12:06:43 - INFO - __main__ - Step 8771: {'lr': 0.0004964042178626874, 'samples': 421008, 'steps': 8770, 'loss/train': 2.739027738571167} +07/25/2024 12:06:43 - INFO - __main__ - Step 8772: {'lr': 0.0004964033288025674, 'samples': 421056, 'steps': 8771, 'loss/train': 2.271254301071167} +07/25/2024 12:06:43 - INFO - __main__ - Step 8773: {'lr': 0.000496402439633347, 'samples': 421104, 'steps': 8772, 'loss/train': 1.9573312997817993} +07/25/2024 12:06:44 - INFO - __main__ - Step 8774: {'lr': 0.0004964015503550267, 'samples': 421152, 'steps': 8773, 'loss/train': 1.7358351945877075} +07/25/2024 12:06:44 - INFO - __main__ - Step 8775: {'lr': 0.0004964006609676065, 'samples': 421200, 'steps': 8774, 'loss/train': 1.7662140130996704} +07/25/2024 12:06:44 - INFO - __main__ - Step 8776: {'lr': 0.0004963997714710869, 'samples': 421248, 'steps': 8775, 'loss/train': 2.1337132453918457} +07/25/2024 12:06:44 - INFO - __main__ - Step 8777: {'lr': 0.0004963988818654686, 'samples': 421296, 'steps': 8776, 'loss/train': 4.221245288848877} +07/25/2024 12:06:45 - INFO - __main__ - Step 8778: {'lr': 0.0004963979921507516, 'samples': 421344, 'steps': 8777, 'loss/train': 1.9534568786621094} +07/25/2024 12:06:45 - INFO - __main__ - Step 8779: {'lr': 0.0004963971023269366, 'samples': 421392, 'steps': 8778, 'loss/train': 1.5803754329681396} +07/25/2024 12:06:45 - INFO - __main__ - Step 8780: {'lr': 0.0004963962123940239, 'samples': 421440, 'steps': 8779, 'loss/train': 0.7229654788970947} +07/25/2024 12:06:46 - INFO - __main__ - Step 8781: {'lr': 0.0004963953223520137, 'samples': 421488, 'steps': 8780, 'loss/train': 1.917388677597046} +07/25/2024 12:06:46 - INFO - __main__ - Step 8782: {'lr': 0.0004963944322009066, 'samples': 421536, 'steps': 8781, 'loss/train': 1.1358267068862915} +07/25/2024 12:06:46 - INFO - __main__ - Step 8783: {'lr': 0.0004963935419407031, 'samples': 421584, 'steps': 8782, 'loss/train': 1.8978615999221802} +07/25/2024 12:06:46 - INFO - __main__ - Step 8784: {'lr': 0.0004963926515714033, 'samples': 421632, 'steps': 8783, 'loss/train': 0.27977967262268066} +07/25/2024 12:06:47 - INFO - __main__ - Step 8785: {'lr': 0.0004963917610930077, 'samples': 421680, 'steps': 8784, 'loss/train': 2.1603446006774902} +07/25/2024 12:06:47 - INFO - __main__ - Step 8786: {'lr': 0.0004963908705055169, 'samples': 421728, 'steps': 8785, 'loss/train': 1.8135309219360352} +07/25/2024 12:06:47 - INFO - __main__ - Step 8787: {'lr': 0.000496389979808931, 'samples': 421776, 'steps': 8786, 'loss/train': 1.9183887243270874} +07/25/2024 12:06:48 - INFO - __main__ - Step 8788: {'lr': 0.0004963890890032505, 'samples': 421824, 'steps': 8787, 'loss/train': 1.5977751016616821} +07/25/2024 12:06:48 - INFO - __main__ - Step 8789: {'lr': 0.0004963881980884759, 'samples': 421872, 'steps': 8788, 'loss/train': 1.7120898962020874} +07/25/2024 12:06:48 - INFO - __main__ - Step 8790: {'lr': 0.0004963873070646074, 'samples': 421920, 'steps': 8789, 'loss/train': 2.083191394805908} +07/25/2024 12:06:48 - INFO - __main__ - Step 8791: {'lr': 0.0004963864159316457, 'samples': 421968, 'steps': 8790, 'loss/train': 1.8954882621765137} +07/25/2024 12:06:49 - INFO - __main__ - Step 8792: {'lr': 0.0004963855246895908, 'samples': 422016, 'steps': 8791, 'loss/train': 1.888128399848938} +07/25/2024 12:06:49 - INFO - __main__ - Step 8793: {'lr': 0.0004963846333384434, 'samples': 422064, 'steps': 8792, 'loss/train': 2.150864362716675} +07/25/2024 12:06:49 - INFO - __main__ - Step 8794: {'lr': 0.0004963837418782038, 'samples': 422112, 'steps': 8793, 'loss/train': 1.960623860359192} +07/25/2024 12:06:49 - INFO - __main__ - Step 8795: {'lr': 0.0004963828503088723, 'samples': 422160, 'steps': 8794, 'loss/train': 4.398273468017578} +07/25/2024 12:06:50 - INFO - __main__ - Step 8796: {'lr': 0.0004963819586304494, 'samples': 422208, 'steps': 8795, 'loss/train': 1.904762864112854} +07/25/2024 12:06:50 - INFO - __main__ - Step 8797: {'lr': 0.0004963810668429355, 'samples': 422256, 'steps': 8796, 'loss/train': 2.1266989707946777} +07/25/2024 12:06:50 - INFO - __main__ - Step 8798: {'lr': 0.000496380174946331, 'samples': 422304, 'steps': 8797, 'loss/train': 1.753923773765564} +07/25/2024 12:06:51 - INFO - __main__ - Step 8799: {'lr': 0.0004963792829406361, 'samples': 422352, 'steps': 8798, 'loss/train': 2.0000112056732178} +07/25/2024 12:06:51 - INFO - __main__ - Step 8800: {'lr': 0.0004963783908258516, 'samples': 422400, 'steps': 8799, 'loss/train': 2.182521343231201} +07/25/2024 12:06:51 - INFO - __main__ - Step 8801: {'lr': 0.0004963774986019775, 'samples': 422448, 'steps': 8800, 'loss/train': 4.453976631164551} +07/25/2024 12:06:51 - INFO - __main__ - Step 8802: {'lr': 0.0004963766062690144, 'samples': 422496, 'steps': 8801, 'loss/train': 1.998836636543274} +07/25/2024 12:06:52 - INFO - __main__ - Step 8803: {'lr': 0.0004963757138269626, 'samples': 422544, 'steps': 8802, 'loss/train': 1.8476300239562988} +07/25/2024 12:06:52 - INFO - __main__ - Step 8804: {'lr': 0.0004963748212758226, 'samples': 422592, 'steps': 8803, 'loss/train': 1.8761401176452637} +07/25/2024 12:06:52 - INFO - __main__ - Step 8805: {'lr': 0.0004963739286155946, 'samples': 422640, 'steps': 8804, 'loss/train': 1.909708023071289} +07/25/2024 12:06:53 - INFO - __main__ - Step 8806: {'lr': 0.0004963730358462792, 'samples': 422688, 'steps': 8805, 'loss/train': 2.0628840923309326} +07/25/2024 12:06:53 - INFO - __main__ - Step 8807: {'lr': 0.0004963721429678768, 'samples': 422736, 'steps': 8806, 'loss/train': 2.2381479740142822} +07/25/2024 12:06:53 - INFO - __main__ - Step 8808: {'lr': 0.0004963712499803876, 'samples': 422784, 'steps': 8807, 'loss/train': 0.34347257018089294} +07/25/2024 12:06:53 - INFO - __main__ - Step 8809: {'lr': 0.0004963703568838122, 'samples': 422832, 'steps': 8808, 'loss/train': 1.737383246421814} +07/25/2024 12:06:54 - INFO - __main__ - Step 8810: {'lr': 0.0004963694636781509, 'samples': 422880, 'steps': 8809, 'loss/train': 2.164241075515747} +07/25/2024 12:06:54 - INFO - __main__ - Step 8811: {'lr': 0.0004963685703634041, 'samples': 422928, 'steps': 8810, 'loss/train': 1.78754723072052} +07/25/2024 12:06:54 - INFO - __main__ - Step 8812: {'lr': 0.0004963676769395722, 'samples': 422976, 'steps': 8811, 'loss/train': 1.7861396074295044} +07/25/2024 12:06:55 - INFO - __main__ - Step 8813: {'lr': 0.0004963667834066556, 'samples': 423024, 'steps': 8812, 'loss/train': 2.0589473247528076} +07/25/2024 12:06:55 - INFO - __main__ - Step 8814: {'lr': 0.0004963658897646546, 'samples': 423072, 'steps': 8813, 'loss/train': 1.986033320426941} +07/25/2024 12:06:55 - INFO - __main__ - Step 8815: {'lr': 0.0004963649960135698, 'samples': 423120, 'steps': 8814, 'loss/train': 2.4159882068634033} +07/25/2024 12:06:55 - INFO - __main__ - Step 8816: {'lr': 0.0004963641021534014, 'samples': 423168, 'steps': 8815, 'loss/train': 2.2162230014801025} +07/25/2024 12:06:56 - INFO - __main__ - Step 8817: {'lr': 0.0004963632081841499, 'samples': 423216, 'steps': 8816, 'loss/train': 1.9388152360916138} +07/25/2024 12:06:56 - INFO - __main__ - Step 8818: {'lr': 0.0004963623141058158, 'samples': 423264, 'steps': 8817, 'loss/train': 2.065281629562378} +07/25/2024 12:06:56 - INFO - __main__ - Step 8819: {'lr': 0.0004963614199183993, 'samples': 423312, 'steps': 8818, 'loss/train': 1.9340685606002808} +07/25/2024 12:06:57 - INFO - __main__ - Step 8820: {'lr': 0.0004963605256219007, 'samples': 423360, 'steps': 8819, 'loss/train': 1.931196689605713} +07/25/2024 12:06:57 - INFO - __main__ - Step 8821: {'lr': 0.0004963596312163207, 'samples': 423408, 'steps': 8820, 'loss/train': 1.8062293529510498} +07/25/2024 12:06:57 - INFO - __main__ - Step 8822: {'lr': 0.0004963587367016595, 'samples': 423456, 'steps': 8821, 'loss/train': 2.1128973960876465} +07/25/2024 12:06:57 - INFO - __main__ - Step 8823: {'lr': 0.0004963578420779177, 'samples': 423504, 'steps': 8822, 'loss/train': 1.92576265335083} +07/25/2024 12:06:58 - INFO - __main__ - Step 8824: {'lr': 0.0004963569473450954, 'samples': 423552, 'steps': 8823, 'loss/train': 1.9983413219451904} +07/25/2024 12:06:58 - INFO - __main__ - Step 8825: {'lr': 0.0004963560525031933, 'samples': 423600, 'steps': 8824, 'loss/train': 1.8870811462402344} +07/25/2024 12:06:58 - INFO - __main__ - Step 8826: {'lr': 0.0004963551575522115, 'samples': 423648, 'steps': 8825, 'loss/train': 1.7291837930679321} +07/25/2024 12:06:59 - INFO - __main__ - Step 8827: {'lr': 0.0004963542624921506, 'samples': 423696, 'steps': 8826, 'loss/train': 2.0820889472961426} +07/25/2024 12:06:59 - INFO - __main__ - Step 8828: {'lr': 0.0004963533673230108, 'samples': 423744, 'steps': 8827, 'loss/train': 1.832537293434143} +07/25/2024 12:06:59 - INFO - __main__ - Step 8829: {'lr': 0.0004963524720447928, 'samples': 423792, 'steps': 8828, 'loss/train': 1.6105308532714844} +07/25/2024 12:06:59 - INFO - __main__ - Step 8830: {'lr': 0.0004963515766574968, 'samples': 423840, 'steps': 8829, 'loss/train': 2.2748963832855225} +07/25/2024 12:07:00 - INFO - __main__ - Step 8831: {'lr': 0.0004963506811611232, 'samples': 423888, 'steps': 8830, 'loss/train': 1.7951823472976685} +07/25/2024 12:07:00 - INFO - __main__ - Step 8832: {'lr': 0.0004963497855556725, 'samples': 423936, 'steps': 8831, 'loss/train': 0.35380038619041443} +07/25/2024 12:07:00 - INFO - __main__ - Step 8833: {'lr': 0.0004963488898411449, 'samples': 423984, 'steps': 8832, 'loss/train': 1.9661076068878174} +07/25/2024 12:07:01 - INFO - __main__ - Step 8834: {'lr': 0.000496347994017541, 'samples': 424032, 'steps': 8833, 'loss/train': 1.5520962476730347} +07/25/2024 12:07:01 - INFO - __main__ - Step 8835: {'lr': 0.000496347098084861, 'samples': 424080, 'steps': 8834, 'loss/train': 1.962259292602539} +07/25/2024 12:07:01 - INFO - __main__ - Step 8836: {'lr': 0.0004963462020431055, 'samples': 424128, 'steps': 8835, 'loss/train': 1.7222336530685425} +07/25/2024 12:07:01 - INFO - __main__ - Step 8837: {'lr': 0.0004963453058922748, 'samples': 424176, 'steps': 8836, 'loss/train': 1.8186290264129639} +07/25/2024 12:07:02 - INFO - __main__ - Step 8838: {'lr': 0.0004963444096323693, 'samples': 424224, 'steps': 8837, 'loss/train': 1.6942188739776611} +07/25/2024 12:07:02 - INFO - __main__ - Step 8839: {'lr': 0.0004963435132633894, 'samples': 424272, 'steps': 8838, 'loss/train': 1.5400390625} +07/25/2024 12:07:02 - INFO - __main__ - Step 8840: {'lr': 0.0004963426167853354, 'samples': 424320, 'steps': 8839, 'loss/train': 1.992006778717041} +07/25/2024 12:07:03 - INFO - __main__ - Step 8841: {'lr': 0.0004963417201982079, 'samples': 424368, 'steps': 8840, 'loss/train': 2.2625606060028076} +07/25/2024 12:07:03 - INFO - __main__ - Step 8842: {'lr': 0.0004963408235020071, 'samples': 424416, 'steps': 8841, 'loss/train': 2.3460586071014404} +07/25/2024 12:07:03 - INFO - __main__ - Step 8843: {'lr': 0.0004963399266967336, 'samples': 424464, 'steps': 8842, 'loss/train': 1.8925114870071411} +07/25/2024 12:07:03 - INFO - __main__ - Step 8844: {'lr': 0.0004963390297823875, 'samples': 424512, 'steps': 8843, 'loss/train': 2.0305557250976562} +07/25/2024 12:07:04 - INFO - __main__ - Step 8845: {'lr': 0.0004963381327589695, 'samples': 424560, 'steps': 8844, 'loss/train': 1.8650521039962769} +07/25/2024 12:07:04 - INFO - __main__ - Step 8846: {'lr': 0.0004963372356264799, 'samples': 424608, 'steps': 8845, 'loss/train': 2.1057300567626953} +07/25/2024 12:07:04 - INFO - __main__ - Step 8847: {'lr': 0.000496336338384919, 'samples': 424656, 'steps': 8846, 'loss/train': 1.6333045959472656} +07/25/2024 12:07:05 - INFO - __main__ - Step 8848: {'lr': 0.0004963354410342874, 'samples': 424704, 'steps': 8847, 'loss/train': 2.179651975631714} +07/25/2024 12:07:05 - INFO - __main__ - Step 8849: {'lr': 0.0004963345435745853, 'samples': 424752, 'steps': 8848, 'loss/train': 1.9082996845245361} +07/25/2024 12:07:05 - INFO - __main__ - Step 8850: {'lr': 0.0004963336460058132, 'samples': 424800, 'steps': 8849, 'loss/train': 1.628095269203186} +07/25/2024 12:07:05 - INFO - __main__ - Step 8851: {'lr': 0.0004963327483279714, 'samples': 424848, 'steps': 8850, 'loss/train': 1.9421418905258179} +07/25/2024 12:07:06 - INFO - __main__ - Step 8852: {'lr': 0.0004963318505410604, 'samples': 424896, 'steps': 8851, 'loss/train': 2.167438268661499} +07/25/2024 12:07:06 - INFO - __main__ - Step 8853: {'lr': 0.0004963309526450806, 'samples': 424944, 'steps': 8852, 'loss/train': 2.2110471725463867} +07/25/2024 12:07:06 - INFO - __main__ - Step 8854: {'lr': 0.0004963300546400322, 'samples': 424992, 'steps': 8853, 'loss/train': 1.9576064348220825} +07/25/2024 12:07:07 - INFO - __main__ - Step 8855: {'lr': 0.0004963291565259159, 'samples': 425040, 'steps': 8854, 'loss/train': 1.8268455266952515} +07/25/2024 12:07:07 - INFO - __main__ - Step 8856: {'lr': 0.0004963282583027318, 'samples': 425088, 'steps': 8855, 'loss/train': 1.7756229639053345} +07/25/2024 12:07:07 - INFO - __main__ - Step 8857: {'lr': 0.0004963273599704806, 'samples': 425136, 'steps': 8856, 'loss/train': 2.080449104309082} +07/25/2024 12:07:07 - INFO - __main__ - Step 8858: {'lr': 0.0004963264615291626, 'samples': 425184, 'steps': 8857, 'loss/train': 2.3175106048583984} +07/25/2024 12:07:08 - INFO - __main__ - Step 8859: {'lr': 0.000496325562978778, 'samples': 425232, 'steps': 8858, 'loss/train': 2.309762477874756} +07/25/2024 12:07:08 - INFO - __main__ - Step 8860: {'lr': 0.0004963246643193273, 'samples': 425280, 'steps': 8859, 'loss/train': 1.8695988655090332} +07/25/2024 12:07:08 - INFO - __main__ - Step 8861: {'lr': 0.0004963237655508111, 'samples': 425328, 'steps': 8860, 'loss/train': 2.1139538288116455} +07/25/2024 12:07:09 - INFO - __main__ - Step 8862: {'lr': 0.0004963228666732296, 'samples': 425376, 'steps': 8861, 'loss/train': 1.8803932666778564} +07/25/2024 12:07:09 - INFO - __main__ - Step 8863: {'lr': 0.0004963219676865832, 'samples': 425424, 'steps': 8862, 'loss/train': 1.95668363571167} +07/25/2024 12:07:09 - INFO - __main__ - Step 8864: {'lr': 0.0004963210685908723, 'samples': 425472, 'steps': 8863, 'loss/train': 2.1321585178375244} +07/25/2024 12:07:09 - INFO - __main__ - Step 8865: {'lr': 0.0004963201693860973, 'samples': 425520, 'steps': 8864, 'loss/train': 2.4979751110076904} +07/25/2024 12:07:10 - INFO - __main__ - Step 8866: {'lr': 0.0004963192700722588, 'samples': 425568, 'steps': 8865, 'loss/train': 1.63153076171875} +07/25/2024 12:07:10 - INFO - __main__ - Step 8867: {'lr': 0.000496318370649357, 'samples': 425616, 'steps': 8866, 'loss/train': 2.013719320297241} +07/25/2024 12:07:10 - INFO - __main__ - Step 8868: {'lr': 0.0004963174711173922, 'samples': 425664, 'steps': 8867, 'loss/train': 2.2722058296203613} +07/25/2024 12:07:10 - INFO - __main__ - Step 8869: {'lr': 0.000496316571476365, 'samples': 425712, 'steps': 8868, 'loss/train': 2.024536371231079} +07/25/2024 12:07:11 - INFO - __main__ - Step 8870: {'lr': 0.0004963156717262757, 'samples': 425760, 'steps': 8869, 'loss/train': 1.8438926935195923} +07/25/2024 12:07:11 - INFO - __main__ - Step 8871: {'lr': 0.0004963147718671247, 'samples': 425808, 'steps': 8870, 'loss/train': 1.9408423900604248} +07/25/2024 12:07:11 - INFO - __main__ - Step 8872: {'lr': 0.0004963138718989125, 'samples': 425856, 'steps': 8871, 'loss/train': 2.6444480419158936} +07/25/2024 12:07:12 - INFO - __main__ - Step 8873: {'lr': 0.0004963129718216394, 'samples': 425904, 'steps': 8872, 'loss/train': 1.9962483644485474} +07/25/2024 12:07:12 - INFO - __main__ - Step 8874: {'lr': 0.0004963120716353057, 'samples': 425952, 'steps': 8873, 'loss/train': 2.1888980865478516} +07/25/2024 12:07:12 - INFO - __main__ - Step 8875: {'lr': 0.000496311171339912, 'samples': 426000, 'steps': 8874, 'loss/train': 1.8271236419677734} +07/25/2024 12:07:12 - INFO - __main__ - Step 8876: {'lr': 0.0004963102709354587, 'samples': 426048, 'steps': 8875, 'loss/train': 1.8911285400390625} +07/25/2024 12:07:13 - INFO - __main__ - Step 8877: {'lr': 0.000496309370421946, 'samples': 426096, 'steps': 8876, 'loss/train': 1.9471491575241089} +07/25/2024 12:07:13 - INFO - __main__ - Step 8878: {'lr': 0.0004963084697993744, 'samples': 426144, 'steps': 8877, 'loss/train': 1.3413870334625244} +07/25/2024 12:07:13 - INFO - __main__ - Step 8879: {'lr': 0.0004963075690677444, 'samples': 426192, 'steps': 8878, 'loss/train': 1.8922836780548096} +07/25/2024 12:07:14 - INFO - __main__ - Step 8880: {'lr': 0.0004963066682270563, 'samples': 426240, 'steps': 8879, 'loss/train': 2.1451168060302734} +07/25/2024 12:07:14 - INFO - __main__ - Step 8881: {'lr': 0.0004963057672773104, 'samples': 426288, 'steps': 8880, 'loss/train': 1.829953670501709} +07/25/2024 12:07:14 - INFO - __main__ - Step 8882: {'lr': 0.0004963048662185073, 'samples': 426336, 'steps': 8881, 'loss/train': 2.0749351978302} +07/25/2024 12:07:14 - INFO - __main__ - Step 8883: {'lr': 0.0004963039650506473, 'samples': 426384, 'steps': 8882, 'loss/train': 2.3004653453826904} +07/25/2024 12:07:15 - INFO - __main__ - Step 8884: {'lr': 0.0004963030637737308, 'samples': 426432, 'steps': 8883, 'loss/train': 2.138352632522583} +07/25/2024 12:07:15 - INFO - __main__ - Step 8885: {'lr': 0.0004963021623877582, 'samples': 426480, 'steps': 8884, 'loss/train': 1.9915471076965332} +07/25/2024 12:07:15 - INFO - __main__ - Step 8886: {'lr': 0.0004963012608927298, 'samples': 426528, 'steps': 8885, 'loss/train': 2.416487455368042} +07/25/2024 12:07:16 - INFO - __main__ - Step 8887: {'lr': 0.0004963003592886463, 'samples': 426576, 'steps': 8886, 'loss/train': 1.8369134664535522} +07/25/2024 12:07:16 - INFO - __main__ - Step 8888: {'lr': 0.0004962994575755078, 'samples': 426624, 'steps': 8887, 'loss/train': 2.609710454940796} +07/25/2024 12:07:16 - INFO - __main__ - Step 8889: {'lr': 0.0004962985557533148, 'samples': 426672, 'steps': 8888, 'loss/train': 1.8114144802093506} +07/25/2024 12:07:16 - INFO - __main__ - Step 8890: {'lr': 0.0004962976538220677, 'samples': 426720, 'steps': 8889, 'loss/train': 1.4638609886169434} +07/25/2024 12:07:17 - INFO - __main__ - Step 8891: {'lr': 0.000496296751781767, 'samples': 426768, 'steps': 8890, 'loss/train': 1.9223450422286987} +07/25/2024 12:07:17 - INFO - __main__ - Step 8892: {'lr': 0.0004962958496324128, 'samples': 426816, 'steps': 8891, 'loss/train': 1.943318486213684} +07/25/2024 12:07:17 - INFO - __main__ - Step 8893: {'lr': 0.0004962949473740058, 'samples': 426864, 'steps': 8892, 'loss/train': 2.048276424407959} +07/25/2024 12:07:18 - INFO - __main__ - Step 8894: {'lr': 0.0004962940450065464, 'samples': 426912, 'steps': 8893, 'loss/train': 2.0249221324920654} +07/25/2024 12:07:18 - INFO - __main__ - Step 8895: {'lr': 0.0004962931425300347, 'samples': 426960, 'steps': 8894, 'loss/train': 1.5969910621643066} +07/25/2024 12:07:18 - INFO - __main__ - Step 8896: {'lr': 0.0004962922399444715, 'samples': 427008, 'steps': 8895, 'loss/train': 1.7436604499816895} +07/25/2024 12:07:18 - INFO - __main__ - Step 8897: {'lr': 0.0004962913372498569, 'samples': 427056, 'steps': 8896, 'loss/train': 1.5263895988464355} +07/25/2024 12:07:19 - INFO - __main__ - Step 8898: {'lr': 0.0004962904344461914, 'samples': 427104, 'steps': 8897, 'loss/train': 1.794862985610962} +07/25/2024 12:07:19 - INFO - __main__ - Step 8899: {'lr': 0.0004962895315334753, 'samples': 427152, 'steps': 8898, 'loss/train': 1.9935272932052612} +07/25/2024 12:07:19 - INFO - __main__ - Step 8900: {'lr': 0.0004962886285117093, 'samples': 427200, 'steps': 8899, 'loss/train': 2.224844455718994} +07/25/2024 12:07:20 - INFO - __main__ - Step 8901: {'lr': 0.0004962877253808934, 'samples': 427248, 'steps': 8900, 'loss/train': 2.322193145751953} +07/25/2024 12:07:20 - INFO - __main__ - Step 8902: {'lr': 0.0004962868221410283, 'samples': 427296, 'steps': 8901, 'loss/train': 2.1641499996185303} +07/25/2024 12:07:20 - INFO - __main__ - Step 8903: {'lr': 0.0004962859187921143, 'samples': 427344, 'steps': 8902, 'loss/train': 1.9540053606033325} +07/25/2024 12:07:20 - INFO - __main__ - Step 8904: {'lr': 0.0004962850153341518, 'samples': 427392, 'steps': 8903, 'loss/train': 2.1586132049560547} +07/25/2024 12:07:21 - INFO - __main__ - Step 8905: {'lr': 0.0004962841117671412, 'samples': 427440, 'steps': 8904, 'loss/train': 2.1778485774993896} +07/25/2024 12:07:21 - INFO - __main__ - Step 8906: {'lr': 0.0004962832080910828, 'samples': 427488, 'steps': 8905, 'loss/train': 2.4365100860595703} +07/25/2024 12:07:21 - INFO - __main__ - Step 8907: {'lr': 0.0004962823043059772, 'samples': 427536, 'steps': 8906, 'loss/train': 1.7134885787963867} +07/25/2024 12:07:22 - INFO - __main__ - Step 8908: {'lr': 0.0004962814004118246, 'samples': 427584, 'steps': 8907, 'loss/train': 2.4388136863708496} +07/25/2024 12:07:22 - INFO - __main__ - Step 8909: {'lr': 0.0004962804964086256, 'samples': 427632, 'steps': 8908, 'loss/train': 1.6959816217422485} +07/25/2024 12:07:22 - INFO - __main__ - Step 8910: {'lr': 0.0004962795922963804, 'samples': 427680, 'steps': 8909, 'loss/train': 1.9758392572402954} +07/25/2024 12:07:22 - INFO - __main__ - Step 8911: {'lr': 0.0004962786880750896, 'samples': 427728, 'steps': 8910, 'loss/train': 2.234808921813965} +07/25/2024 12:07:23 - INFO - __main__ - Step 8912: {'lr': 0.0004962777837447534, 'samples': 427776, 'steps': 8911, 'loss/train': 1.9952280521392822} +07/25/2024 12:07:23 - INFO - __main__ - Step 8913: {'lr': 0.0004962768793053724, 'samples': 427824, 'steps': 8912, 'loss/train': 1.8604151010513306} +07/25/2024 12:07:23 - INFO - __main__ - Step 8914: {'lr': 0.0004962759747569468, 'samples': 427872, 'steps': 8913, 'loss/train': 2.0720736980438232} +07/25/2024 12:07:24 - INFO - __main__ - Step 8915: {'lr': 0.0004962750700994771, 'samples': 427920, 'steps': 8914, 'loss/train': 1.90615975856781} +07/25/2024 12:07:24 - INFO - __main__ - Step 8916: {'lr': 0.0004962741653329638, 'samples': 427968, 'steps': 8915, 'loss/train': 1.5509411096572876} +07/25/2024 12:07:24 - INFO - __main__ - Step 8917: {'lr': 0.0004962732604574071, 'samples': 428016, 'steps': 8916, 'loss/train': 1.9126075506210327} +07/25/2024 12:07:24 - INFO - __main__ - Step 8918: {'lr': 0.0004962723554728076, 'samples': 428064, 'steps': 8917, 'loss/train': 1.9757864475250244} +07/25/2024 12:07:25 - INFO - __main__ - Step 8919: {'lr': 0.0004962714503791656, 'samples': 428112, 'steps': 8918, 'loss/train': 1.5800679922103882} +07/25/2024 12:07:25 - INFO - __main__ - Step 8920: {'lr': 0.0004962705451764814, 'samples': 428160, 'steps': 8919, 'loss/train': 2.061871290206909} +07/25/2024 12:07:25 - INFO - __main__ - Step 8921: {'lr': 0.0004962696398647555, 'samples': 428208, 'steps': 8920, 'loss/train': 2.299915075302124} +07/25/2024 12:07:26 - INFO - __main__ - Step 8922: {'lr': 0.0004962687344439884, 'samples': 428256, 'steps': 8921, 'loss/train': 2.3615424633026123} +07/25/2024 12:07:26 - INFO - __main__ - Step 8923: {'lr': 0.0004962678289141803, 'samples': 428304, 'steps': 8922, 'loss/train': 1.936741590499878} +07/25/2024 12:07:26 - INFO - __main__ - Step 8924: {'lr': 0.0004962669232753318, 'samples': 428352, 'steps': 8923, 'loss/train': 1.9362787008285522} +07/25/2024 12:07:26 - INFO - __main__ - Step 8925: {'lr': 0.0004962660175274431, 'samples': 428400, 'steps': 8924, 'loss/train': 2.0054879188537598} +07/25/2024 12:07:27 - INFO - __main__ - Step 8926: {'lr': 0.0004962651116705148, 'samples': 428448, 'steps': 8925, 'loss/train': 1.727986454963684} +07/25/2024 12:07:27 - INFO - __main__ - Step 8927: {'lr': 0.0004962642057045472, 'samples': 428496, 'steps': 8926, 'loss/train': 1.8149367570877075} +07/25/2024 12:07:27 - INFO - __main__ - Step 8928: {'lr': 0.0004962632996295407, 'samples': 428544, 'steps': 8927, 'loss/train': 2.4335129261016846} +07/25/2024 12:07:28 - INFO - __main__ - Step 8929: {'lr': 0.0004962623934454957, 'samples': 428592, 'steps': 8928, 'loss/train': 1.9822486639022827} +07/25/2024 12:07:28 - INFO - __main__ - Step 8930: {'lr': 0.0004962614871524126, 'samples': 428640, 'steps': 8929, 'loss/train': 1.6209707260131836} +07/25/2024 12:07:28 - INFO - __main__ - Step 8931: {'lr': 0.0004962605807502919, 'samples': 428688, 'steps': 8930, 'loss/train': 1.8239315748214722} +07/25/2024 12:07:28 - INFO - __main__ - Step 8932: {'lr': 0.0004962596742391339, 'samples': 428736, 'steps': 8931, 'loss/train': 0.6117445826530457} +07/25/2024 12:07:29 - INFO - __main__ - Step 8933: {'lr': 0.0004962587676189389, 'samples': 428784, 'steps': 8932, 'loss/train': 1.1944599151611328} +07/25/2024 12:07:29 - INFO - __main__ - Step 8934: {'lr': 0.0004962578608897075, 'samples': 428832, 'steps': 8933, 'loss/train': 1.8225390911102295} +07/25/2024 12:07:29 - INFO - __main__ - Step 8935: {'lr': 0.00049625695405144, 'samples': 428880, 'steps': 8934, 'loss/train': 2.5924360752105713} +07/25/2024 12:07:29 - INFO - __main__ - Step 8936: {'lr': 0.0004962560471041368, 'samples': 428928, 'steps': 8935, 'loss/train': 1.7772034406661987} +07/25/2024 12:07:30 - INFO - __main__ - Step 8937: {'lr': 0.0004962551400477984, 'samples': 428976, 'steps': 8936, 'loss/train': 1.3060667514801025} +07/25/2024 12:07:30 - INFO - __main__ - Step 8938: {'lr': 0.0004962542328824249, 'samples': 429024, 'steps': 8937, 'loss/train': 2.4633681774139404} +07/25/2024 12:07:30 - INFO - __main__ - Step 8939: {'lr': 0.0004962533256080172, 'samples': 429072, 'steps': 8938, 'loss/train': 1.8029370307922363} +07/25/2024 12:07:31 - INFO - __main__ - Step 8940: {'lr': 0.0004962524182245754, 'samples': 429120, 'steps': 8939, 'loss/train': 1.584347128868103} +07/25/2024 12:07:31 - INFO - __main__ - Step 8941: {'lr': 0.0004962515107320998, 'samples': 429168, 'steps': 8940, 'loss/train': 2.1934595108032227} +07/25/2024 12:07:31 - INFO - __main__ - Step 8942: {'lr': 0.000496250603130591, 'samples': 429216, 'steps': 8941, 'loss/train': 2.001330614089966} +07/25/2024 12:07:31 - INFO - __main__ - Step 8943: {'lr': 0.0004962496954200493, 'samples': 429264, 'steps': 8942, 'loss/train': 1.7855170965194702} +07/25/2024 12:07:32 - INFO - __main__ - Step 8944: {'lr': 0.0004962487876004751, 'samples': 429312, 'steps': 8943, 'loss/train': 2.1110663414001465} +07/25/2024 12:07:32 - INFO - __main__ - Step 8945: {'lr': 0.0004962478796718689, 'samples': 429360, 'steps': 8944, 'loss/train': 2.0778441429138184} +07/25/2024 12:07:32 - INFO - __main__ - Step 8946: {'lr': 0.0004962469716342311, 'samples': 429408, 'steps': 8945, 'loss/train': 2.3426177501678467} +07/25/2024 12:07:33 - INFO - __main__ - Step 8947: {'lr': 0.000496246063487562, 'samples': 429456, 'steps': 8946, 'loss/train': 1.8325120210647583} +07/25/2024 12:07:33 - INFO - __main__ - Step 8948: {'lr': 0.000496245155231862, 'samples': 429504, 'steps': 8947, 'loss/train': 1.9368319511413574} +07/25/2024 12:07:33 - INFO - __main__ - Step 8949: {'lr': 0.0004962442468671315, 'samples': 429552, 'steps': 8948, 'loss/train': 1.696283221244812} +07/25/2024 12:07:33 - INFO - __main__ - Step 8950: {'lr': 0.000496243338393371, 'samples': 429600, 'steps': 8949, 'loss/train': 1.7824211120605469} +07/25/2024 12:07:34 - INFO - __main__ - Step 8951: {'lr': 0.0004962424298105808, 'samples': 429648, 'steps': 8950, 'loss/train': 2.170365333557129} +07/25/2024 12:07:34 - INFO - __main__ - Step 8952: {'lr': 0.0004962415211187614, 'samples': 429696, 'steps': 8951, 'loss/train': 1.875885248184204} +07/25/2024 12:07:34 - INFO - __main__ - Step 8953: {'lr': 0.0004962406123179132, 'samples': 429744, 'steps': 8952, 'loss/train': 2.2828872203826904} +07/25/2024 12:07:35 - INFO - __main__ - Step 8954: {'lr': 0.0004962397034080365, 'samples': 429792, 'steps': 8953, 'loss/train': 1.639630913734436} +07/25/2024 12:07:35 - INFO - __main__ - Step 8955: {'lr': 0.0004962387943891318, 'samples': 429840, 'steps': 8954, 'loss/train': 1.748415231704712} +07/25/2024 12:07:35 - INFO - __main__ - Step 8956: {'lr': 0.0004962378852611994, 'samples': 429888, 'steps': 8955, 'loss/train': 0.567937433719635} +07/25/2024 12:07:35 - INFO - __main__ - Step 8957: {'lr': 0.0004962369760242398, 'samples': 429936, 'steps': 8956, 'loss/train': 1.1068122386932373} +07/25/2024 12:07:36 - INFO - __main__ - Step 8958: {'lr': 0.0004962360666782534, 'samples': 429984, 'steps': 8957, 'loss/train': 1.891264796257019} +07/25/2024 12:07:36 - INFO - __main__ - Step 8959: {'lr': 0.0004962351572232405, 'samples': 430032, 'steps': 8958, 'loss/train': 1.9996156692504883} +07/25/2024 12:07:36 - INFO - __main__ - Step 8960: {'lr': 0.0004962342476592017, 'samples': 430080, 'steps': 8959, 'loss/train': 1.6222718954086304} +07/25/2024 12:07:37 - INFO - __main__ - Step 8961: {'lr': 0.0004962333379861371, 'samples': 430128, 'steps': 8960, 'loss/train': 1.9825559854507446} +07/25/2024 12:07:37 - INFO - __main__ - Step 8962: {'lr': 0.0004962324282040473, 'samples': 430176, 'steps': 8961, 'loss/train': 2.2333614826202393} +07/25/2024 12:07:37 - INFO - __main__ - Step 8963: {'lr': 0.0004962315183129328, 'samples': 430224, 'steps': 8962, 'loss/train': 1.6276203393936157} +07/25/2024 12:07:37 - INFO - __main__ - Step 8964: {'lr': 0.0004962306083127938, 'samples': 430272, 'steps': 8963, 'loss/train': 1.7708269357681274} +07/25/2024 12:07:38 - INFO - __main__ - Step 8965: {'lr': 0.0004962296982036308, 'samples': 430320, 'steps': 8964, 'loss/train': 1.4413642883300781} +07/25/2024 12:07:38 - INFO - __main__ - Step 8966: {'lr': 0.0004962287879854442, 'samples': 430368, 'steps': 8965, 'loss/train': 2.0089282989501953} +07/25/2024 12:07:38 - INFO - __main__ - Step 8967: {'lr': 0.0004962278776582344, 'samples': 430416, 'steps': 8966, 'loss/train': 1.9754576683044434} +07/25/2024 12:07:39 - INFO - __main__ - Step 8968: {'lr': 0.0004962269672220018, 'samples': 430464, 'steps': 8967, 'loss/train': 1.5103799104690552} +07/25/2024 12:07:39 - INFO - __main__ - Step 8969: {'lr': 0.0004962260566767467, 'samples': 430512, 'steps': 8968, 'loss/train': 1.389021396636963} +07/25/2024 12:07:39 - INFO - __main__ - Step 8970: {'lr': 0.0004962251460224697, 'samples': 430560, 'steps': 8969, 'loss/train': 2.273611307144165} +07/25/2024 12:07:39 - INFO - __main__ - Step 8971: {'lr': 0.0004962242352591711, 'samples': 430608, 'steps': 8970, 'loss/train': 1.4722251892089844} +07/25/2024 12:07:40 - INFO - __main__ - Step 8972: {'lr': 0.0004962233243868513, 'samples': 430656, 'steps': 8971, 'loss/train': 2.5498263835906982} +07/25/2024 12:07:40 - INFO - __main__ - Step 8973: {'lr': 0.0004962224134055107, 'samples': 430704, 'steps': 8972, 'loss/train': 2.4187939167022705} +07/25/2024 12:07:40 - INFO - __main__ - Step 8974: {'lr': 0.0004962215023151497, 'samples': 430752, 'steps': 8973, 'loss/train': 1.601417064666748} +07/25/2024 12:07:41 - INFO - __main__ - Step 8975: {'lr': 0.0004962205911157688, 'samples': 430800, 'steps': 8974, 'loss/train': 1.6843706369400024} +07/25/2024 12:07:41 - INFO - __main__ - Step 8976: {'lr': 0.0004962196798073682, 'samples': 430848, 'steps': 8975, 'loss/train': 0.953894317150116} +07/25/2024 12:07:41 - INFO - __main__ - Step 8977: {'lr': 0.0004962187683899486, 'samples': 430896, 'steps': 8976, 'loss/train': 2.2530431747436523} +07/25/2024 12:07:41 - INFO - __main__ - Step 8978: {'lr': 0.00049621785686351, 'samples': 430944, 'steps': 8977, 'loss/train': 2.243929147720337} +07/25/2024 12:07:42 - INFO - __main__ - Step 8979: {'lr': 0.0004962169452280531, 'samples': 430992, 'steps': 8978, 'loss/train': 3.1783173084259033} +07/25/2024 12:07:42 - INFO - __main__ - Step 8980: {'lr': 0.0004962160334835784, 'samples': 431040, 'steps': 8979, 'loss/train': 2.284615993499756} +07/25/2024 12:07:42 - INFO - __main__ - Step 8981: {'lr': 0.0004962151216300859, 'samples': 431088, 'steps': 8980, 'loss/train': 1.305092215538025} +07/25/2024 12:07:43 - INFO - __main__ - Step 8982: {'lr': 0.0004962142096675764, 'samples': 431136, 'steps': 8981, 'loss/train': 2.2215969562530518} +07/25/2024 12:07:43 - INFO - __main__ - Step 8983: {'lr': 0.0004962132975960501, 'samples': 431184, 'steps': 8982, 'loss/train': 0.7333295345306396} +07/25/2024 12:07:43 - INFO - __main__ - Step 8984: {'lr': 0.0004962123854155075, 'samples': 431232, 'steps': 8983, 'loss/train': 1.2271822690963745} +07/25/2024 12:07:43 - INFO - __main__ - Step 8985: {'lr': 0.0004962114731259489, 'samples': 431280, 'steps': 8984, 'loss/train': 2.2222862243652344} +07/25/2024 12:07:44 - INFO - __main__ - Step 8986: {'lr': 0.0004962105607273748, 'samples': 431328, 'steps': 8985, 'loss/train': 2.334182024002075} +07/25/2024 12:07:44 - INFO - __main__ - Step 8987: {'lr': 0.0004962096482197855, 'samples': 431376, 'steps': 8986, 'loss/train': 1.7719697952270508} +07/25/2024 12:07:44 - INFO - __main__ - Step 8988: {'lr': 0.0004962087356031815, 'samples': 431424, 'steps': 8987, 'loss/train': 2.126538038253784} +07/25/2024 12:07:45 - INFO - __main__ - Step 8989: {'lr': 0.0004962078228775632, 'samples': 431472, 'steps': 8988, 'loss/train': 2.161078691482544} +07/25/2024 12:07:45 - INFO - __main__ - Step 8990: {'lr': 0.0004962069100429309, 'samples': 431520, 'steps': 8989, 'loss/train': 1.6079028844833374} +07/25/2024 12:07:45 - INFO - __main__ - Step 8991: {'lr': 0.0004962059970992851, 'samples': 431568, 'steps': 8990, 'loss/train': 1.7067137956619263} +07/25/2024 12:07:45 - INFO - __main__ - Step 8992: {'lr': 0.0004962050840466262, 'samples': 431616, 'steps': 8991, 'loss/train': 2.3639698028564453} +07/25/2024 12:07:46 - INFO - __main__ - Step 8993: {'lr': 0.0004962041708849546, 'samples': 431664, 'steps': 8992, 'loss/train': 2.099879026412964} +07/25/2024 12:07:46 - INFO - __main__ - Step 8994: {'lr': 0.0004962032576142707, 'samples': 431712, 'steps': 8993, 'loss/train': 2.520432949066162} +07/25/2024 12:07:46 - INFO - __main__ - Step 8995: {'lr': 0.0004962023442345749, 'samples': 431760, 'steps': 8994, 'loss/train': 2.1314964294433594} +07/25/2024 12:07:47 - INFO - __main__ - Step 8996: {'lr': 0.0004962014307458675, 'samples': 431808, 'steps': 8995, 'loss/train': 2.4587531089782715} +07/25/2024 12:07:47 - INFO - __main__ - Step 8997: {'lr': 0.0004962005171481492, 'samples': 431856, 'steps': 8996, 'loss/train': 1.477249026298523} +07/25/2024 12:07:47 - INFO - __main__ - Step 8998: {'lr': 0.00049619960344142, 'samples': 431904, 'steps': 8997, 'loss/train': 1.2380279302597046} +07/25/2024 12:07:47 - INFO - __main__ - Step 8999: {'lr': 0.0004961986896256807, 'samples': 431952, 'steps': 8998, 'loss/train': 2.0984408855438232} +07/25/2024 12:07:48 - INFO - __main__ - Step 9000: {'lr': 0.0004961977757009314, 'samples': 432000, 'steps': 8999, 'loss/train': 1.5059088468551636} +07/25/2024 12:07:48 - INFO - __main__ - Step 9001: {'lr': 0.0004961968616671726, 'samples': 432048, 'steps': 9000, 'loss/train': 1.892431378364563} +07/25/2024 12:07:48 - INFO - __main__ - Step 9002: {'lr': 0.0004961959475244048, 'samples': 432096, 'steps': 9001, 'loss/train': 1.2934014797210693} +07/25/2024 12:07:48 - INFO - __main__ - Step 9003: {'lr': 0.0004961950332726283, 'samples': 432144, 'steps': 9002, 'loss/train': 2.9859211444854736} +07/25/2024 12:07:49 - INFO - __main__ - Step 9004: {'lr': 0.0004961941189118435, 'samples': 432192, 'steps': 9003, 'loss/train': 2.681957960128784} +07/25/2024 12:07:49 - INFO - __main__ - Step 9005: {'lr': 0.000496193204442051, 'samples': 432240, 'steps': 9004, 'loss/train': 1.4132256507873535} +07/25/2024 12:07:49 - INFO - __main__ - Step 9006: {'lr': 0.0004961922898632509, 'samples': 432288, 'steps': 9005, 'loss/train': 1.6980491876602173} +07/25/2024 12:07:50 - INFO - __main__ - Step 9007: {'lr': 0.0004961913751754438, 'samples': 432336, 'steps': 9006, 'loss/train': 2.349930763244629} +07/25/2024 12:07:50 - INFO - __main__ - Step 9008: {'lr': 0.00049619046037863, 'samples': 432384, 'steps': 9007, 'loss/train': 1.720242977142334} +07/25/2024 12:07:50 - INFO - __main__ - Step 9009: {'lr': 0.00049618954547281, 'samples': 432432, 'steps': 9008, 'loss/train': 2.5831615924835205} +07/25/2024 12:07:50 - INFO - __main__ - Step 9010: {'lr': 0.0004961886304579843, 'samples': 432480, 'steps': 9009, 'loss/train': 2.5703465938568115} +07/25/2024 12:07:51 - INFO - __main__ - Step 9011: {'lr': 0.000496187715334153, 'samples': 432528, 'steps': 9010, 'loss/train': 2.2358009815216064} +07/25/2024 12:07:51 - INFO - __main__ - Step 9012: {'lr': 0.0004961868001013167, 'samples': 432576, 'steps': 9011, 'loss/train': 2.4436800479888916} +07/25/2024 12:07:51 - INFO - __main__ - Step 9013: {'lr': 0.0004961858847594758, 'samples': 432624, 'steps': 9012, 'loss/train': 1.8101108074188232} +07/25/2024 12:07:52 - INFO - __main__ - Step 9014: {'lr': 0.0004961849693086307, 'samples': 432672, 'steps': 9013, 'loss/train': 1.6512782573699951} +07/25/2024 12:07:52 - INFO - __main__ - Step 9015: {'lr': 0.0004961840537487819, 'samples': 432720, 'steps': 9014, 'loss/train': 2.4251646995544434} +07/25/2024 12:07:52 - INFO - __main__ - Step 9016: {'lr': 0.0004961831380799295, 'samples': 432768, 'steps': 9015, 'loss/train': 1.6182159185409546} +07/25/2024 12:07:52 - INFO - __main__ - Step 9017: {'lr': 0.0004961822223020742, 'samples': 432816, 'steps': 9016, 'loss/train': 1.9678946733474731} +07/25/2024 12:07:53 - INFO - __main__ - Step 9018: {'lr': 0.0004961813064152163, 'samples': 432864, 'steps': 9017, 'loss/train': 2.2908546924591064} +07/25/2024 12:07:53 - INFO - __main__ - Step 9019: {'lr': 0.0004961803904193563, 'samples': 432912, 'steps': 9018, 'loss/train': 2.5618865489959717} +07/25/2024 12:07:53 - INFO - __main__ - Step 9020: {'lr': 0.0004961794743144944, 'samples': 432960, 'steps': 9019, 'loss/train': 1.9086581468582153} +07/25/2024 12:07:54 - INFO - __main__ - Step 9021: {'lr': 0.0004961785581006311, 'samples': 433008, 'steps': 9020, 'loss/train': 1.6408647298812866} +07/25/2024 12:07:54 - INFO - __main__ - Step 9022: {'lr': 0.0004961776417777669, 'samples': 433056, 'steps': 9021, 'loss/train': 1.406490445137024} +07/25/2024 12:07:54 - INFO - __main__ - Step 9023: {'lr': 0.0004961767253459022, 'samples': 433104, 'steps': 9022, 'loss/train': 1.940034031867981} +07/25/2024 12:07:54 - INFO - __main__ - Step 9024: {'lr': 0.0004961758088050373, 'samples': 433152, 'steps': 9023, 'loss/train': 1.875769853591919} +07/25/2024 12:07:55 - INFO - __main__ - Step 9025: {'lr': 0.0004961748921551726, 'samples': 433200, 'steps': 9024, 'loss/train': 2.045830726623535} +07/25/2024 12:07:55 - INFO - __main__ - Step 9026: {'lr': 0.0004961739753963086, 'samples': 433248, 'steps': 9025, 'loss/train': 1.9628686904907227} +07/25/2024 12:07:55 - INFO - __main__ - Step 9027: {'lr': 0.0004961730585284456, 'samples': 433296, 'steps': 9026, 'loss/train': 1.72319495677948} +07/25/2024 12:07:56 - INFO - __main__ - Step 9028: {'lr': 0.0004961721415515842, 'samples': 433344, 'steps': 9027, 'loss/train': 1.852109670639038} +07/25/2024 12:07:56 - INFO - __main__ - Step 9029: {'lr': 0.0004961712244657245, 'samples': 433392, 'steps': 9028, 'loss/train': 1.3377269506454468} +07/25/2024 12:07:56 - INFO - __main__ - Step 9030: {'lr': 0.0004961703072708672, 'samples': 433440, 'steps': 9029, 'loss/train': 1.4783834218978882} +07/25/2024 12:07:56 - INFO - __main__ - Step 9031: {'lr': 0.0004961693899670126, 'samples': 433488, 'steps': 9030, 'loss/train': 2.302154541015625} +07/25/2024 12:07:57 - INFO - __main__ - Step 9032: {'lr': 0.0004961684725541611, 'samples': 433536, 'steps': 9031, 'loss/train': 2.1214566230773926} +07/25/2024 12:07:57 - INFO - __main__ - Step 9033: {'lr': 0.000496167555032313, 'samples': 433584, 'steps': 9032, 'loss/train': 2.0995066165924072} +07/25/2024 12:07:57 - INFO - __main__ - Step 9034: {'lr': 0.0004961666374014689, 'samples': 433632, 'steps': 9033, 'loss/train': 2.0190012454986572} +07/25/2024 12:07:58 - INFO - __main__ - Step 9035: {'lr': 0.000496165719661629, 'samples': 433680, 'steps': 9034, 'loss/train': 2.271636486053467} +07/25/2024 12:07:58 - INFO - __main__ - Step 9036: {'lr': 0.0004961648018127939, 'samples': 433728, 'steps': 9035, 'loss/train': 1.7196838855743408} +07/25/2024 12:07:58 - INFO - __main__ - Step 9037: {'lr': 0.0004961638838549639, 'samples': 433776, 'steps': 9036, 'loss/train': 1.7017204761505127} +07/25/2024 12:07:58 - INFO - __main__ - Step 9038: {'lr': 0.0004961629657881394, 'samples': 433824, 'steps': 9037, 'loss/train': 1.7725836038589478} +07/25/2024 12:07:59 - INFO - __main__ - Step 9039: {'lr': 0.0004961620476123208, 'samples': 433872, 'steps': 9038, 'loss/train': 2.213869571685791} +07/25/2024 12:07:59 - INFO - __main__ - Step 9040: {'lr': 0.0004961611293275087, 'samples': 433920, 'steps': 9039, 'loss/train': 2.084958553314209} +07/25/2024 12:07:59 - INFO - __main__ - Step 9041: {'lr': 0.0004961602109337033, 'samples': 433968, 'steps': 9040, 'loss/train': 2.065716505050659} +07/25/2024 12:08:00 - INFO - __main__ - Step 9042: {'lr': 0.0004961592924309049, 'samples': 434016, 'steps': 9041, 'loss/train': 2.5075738430023193} +07/25/2024 12:08:00 - INFO - __main__ - Step 9043: {'lr': 0.0004961583738191142, 'samples': 434064, 'steps': 9042, 'loss/train': 2.479078769683838} +07/25/2024 12:08:00 - INFO - __main__ - Step 9044: {'lr': 0.0004961574550983315, 'samples': 434112, 'steps': 9043, 'loss/train': 1.8348252773284912} +07/25/2024 12:08:00 - INFO - __main__ - Step 9045: {'lr': 0.0004961565362685571, 'samples': 434160, 'steps': 9044, 'loss/train': 2.646843194961548} +07/25/2024 12:08:01 - INFO - __main__ - Step 9046: {'lr': 0.0004961556173297915, 'samples': 434208, 'steps': 9045, 'loss/train': 1.0714280605316162} +07/25/2024 12:08:01 - INFO - __main__ - Step 9047: {'lr': 0.0004961546982820351, 'samples': 434256, 'steps': 9046, 'loss/train': 2.2724521160125732} +07/25/2024 12:08:01 - INFO - __main__ - Step 9048: {'lr': 0.0004961537791252883, 'samples': 434304, 'steps': 9047, 'loss/train': 2.106943368911743} +07/25/2024 12:08:02 - INFO - __main__ - Step 9049: {'lr': 0.0004961528598595515, 'samples': 434352, 'steps': 9048, 'loss/train': 2.033627986907959} +07/25/2024 12:08:02 - INFO - __main__ - Step 9050: {'lr': 0.0004961519404848251, 'samples': 434400, 'steps': 9049, 'loss/train': 2.0973973274230957} +07/25/2024 12:08:02 - INFO - __main__ - Step 9051: {'lr': 0.0004961510210011095, 'samples': 434448, 'steps': 9050, 'loss/train': 1.7493435144424438} +07/25/2024 12:08:02 - INFO - __main__ - Step 9052: {'lr': 0.0004961501014084052, 'samples': 434496, 'steps': 9051, 'loss/train': 1.823591947555542} +07/25/2024 12:08:03 - INFO - __main__ - Step 9053: {'lr': 0.0004961491817067125, 'samples': 434544, 'steps': 9052, 'loss/train': 1.2007975578308105} +07/25/2024 12:08:03 - INFO - __main__ - Step 9054: {'lr': 0.0004961482618960318, 'samples': 434592, 'steps': 9053, 'loss/train': 1.784569501876831} +07/25/2024 12:08:03 - INFO - __main__ - Step 9055: {'lr': 0.0004961473419763636, 'samples': 434640, 'steps': 9054, 'loss/train': 1.9043282270431519} +07/25/2024 12:08:04 - INFO - __main__ - Step 9056: {'lr': 0.0004961464219477082, 'samples': 434688, 'steps': 9055, 'loss/train': 1.9513888359069824} +07/25/2024 12:08:04 - INFO - __main__ - Step 9057: {'lr': 0.0004961455018100661, 'samples': 434736, 'steps': 9056, 'loss/train': 2.095285415649414} +07/25/2024 12:08:04 - INFO - __main__ - Step 9058: {'lr': 0.0004961445815634376, 'samples': 434784, 'steps': 9057, 'loss/train': 2.060720920562744} +07/25/2024 12:08:04 - INFO - __main__ - Step 9059: {'lr': 0.0004961436612078233, 'samples': 434832, 'steps': 9058, 'loss/train': 2.003441572189331} +07/25/2024 12:08:05 - INFO - __main__ - Step 9060: {'lr': 0.0004961427407432234, 'samples': 434880, 'steps': 9059, 'loss/train': 1.843758463859558} +07/25/2024 12:08:05 - INFO - __main__ - Step 9061: {'lr': 0.0004961418201696383, 'samples': 434928, 'steps': 9060, 'loss/train': 0.8667112588882446} +07/25/2024 12:08:05 - INFO - __main__ - Step 9062: {'lr': 0.0004961408994870687, 'samples': 434976, 'steps': 9061, 'loss/train': 1.5640084743499756} +07/25/2024 12:08:06 - INFO - __main__ - Step 9063: {'lr': 0.0004961399786955146, 'samples': 435024, 'steps': 9062, 'loss/train': 2.6574807167053223} +07/25/2024 12:08:06 - INFO - __main__ - Step 9064: {'lr': 0.0004961390577949769, 'samples': 435072, 'steps': 9063, 'loss/train': 2.2015695571899414} +07/25/2024 12:08:06 - INFO - __main__ - Step 9065: {'lr': 0.0004961381367854555, 'samples': 435120, 'steps': 9064, 'loss/train': 2.081960678100586} +07/25/2024 12:08:06 - INFO - __main__ - Step 9066: {'lr': 0.000496137215666951, 'samples': 435168, 'steps': 9065, 'loss/train': 2.1425118446350098} +07/25/2024 12:08:07 - INFO - __main__ - Step 9067: {'lr': 0.0004961362944394639, 'samples': 435216, 'steps': 9066, 'loss/train': 2.297556161880493} +07/25/2024 12:08:07 - INFO - __main__ - Step 9068: {'lr': 0.0004961353731029947, 'samples': 435264, 'steps': 9067, 'loss/train': 1.7295678853988647} +07/25/2024 12:08:07 - INFO - __main__ - Step 9069: {'lr': 0.0004961344516575434, 'samples': 435312, 'steps': 9068, 'loss/train': 2.132732391357422} +07/25/2024 12:08:08 - INFO - __main__ - Step 9070: {'lr': 0.0004961335301031108, 'samples': 435360, 'steps': 9069, 'loss/train': 0.37390366196632385} +07/25/2024 12:08:08 - INFO - __main__ - Step 9071: {'lr': 0.0004961326084396971, 'samples': 435408, 'steps': 9070, 'loss/train': 1.9739089012145996} +07/25/2024 12:08:08 - INFO - __main__ - Step 9072: {'lr': 0.0004961316866673028, 'samples': 435456, 'steps': 9071, 'loss/train': 2.2195796966552734} +07/25/2024 12:08:08 - INFO - __main__ - Step 9073: {'lr': 0.0004961307647859284, 'samples': 435504, 'steps': 9072, 'loss/train': 2.182056427001953} +07/25/2024 12:08:09 - INFO - __main__ - Step 9074: {'lr': 0.0004961298427955741, 'samples': 435552, 'steps': 9073, 'loss/train': 1.4530466794967651} +07/25/2024 12:08:09 - INFO - __main__ - Step 9075: {'lr': 0.0004961289206962404, 'samples': 435600, 'steps': 9074, 'loss/train': 1.6442725658416748} +07/25/2024 12:08:09 - INFO - __main__ - Step 9076: {'lr': 0.0004961279984879277, 'samples': 435648, 'steps': 9075, 'loss/train': 1.6950466632843018} +07/25/2024 12:08:09 - INFO - __main__ - Step 9077: {'lr': 0.0004961270761706364, 'samples': 435696, 'steps': 9076, 'loss/train': 2.1399123668670654} +07/25/2024 12:08:10 - INFO - __main__ - Step 9078: {'lr': 0.0004961261537443669, 'samples': 435744, 'steps': 9077, 'loss/train': 1.7108837366104126} +07/25/2024 12:08:10 - INFO - __main__ - Step 9079: {'lr': 0.0004961252312091197, 'samples': 435792, 'steps': 9078, 'loss/train': 1.6449840068817139} +07/25/2024 12:08:10 - INFO - __main__ - Step 9080: {'lr': 0.0004961243085648951, 'samples': 435840, 'steps': 9079, 'loss/train': 2.128924608230591} +07/25/2024 12:08:11 - INFO - __main__ - Step 9081: {'lr': 0.0004961233858116937, 'samples': 435888, 'steps': 9080, 'loss/train': 2.240863561630249} +07/25/2024 12:08:11 - INFO - __main__ - Step 9082: {'lr': 0.0004961224629495156, 'samples': 435936, 'steps': 9081, 'loss/train': 1.896579623222351} +07/25/2024 12:08:11 - INFO - __main__ - Step 9083: {'lr': 0.0004961215399783614, 'samples': 435984, 'steps': 9082, 'loss/train': 1.9757846593856812} +07/25/2024 12:08:11 - INFO - __main__ - Step 9084: {'lr': 0.0004961206168982316, 'samples': 436032, 'steps': 9083, 'loss/train': 2.1508591175079346} +07/25/2024 12:08:12 - INFO - __main__ - Step 9085: {'lr': 0.0004961196937091263, 'samples': 436080, 'steps': 9084, 'loss/train': 2.564026117324829} +07/25/2024 12:08:12 - INFO - __main__ - Step 9086: {'lr': 0.0004961187704110462, 'samples': 436128, 'steps': 9085, 'loss/train': 1.7315078973770142} +07/25/2024 12:08:12 - INFO - __main__ - Step 9087: {'lr': 0.0004961178470039915, 'samples': 436176, 'steps': 9086, 'loss/train': 1.7649341821670532} +07/25/2024 12:08:13 - INFO - __main__ - Step 9088: {'lr': 0.0004961169234879629, 'samples': 436224, 'steps': 9087, 'loss/train': 2.4946343898773193} +07/25/2024 12:08:13 - INFO - __main__ - Step 9089: {'lr': 0.0004961159998629604, 'samples': 436272, 'steps': 9088, 'loss/train': 2.238006591796875} +07/25/2024 12:08:13 - INFO - __main__ - Step 9090: {'lr': 0.0004961150761289848, 'samples': 436320, 'steps': 9089, 'loss/train': 1.7381259202957153} +07/25/2024 12:08:13 - INFO - __main__ - Step 9091: {'lr': 0.0004961141522860363, 'samples': 436368, 'steps': 9090, 'loss/train': 1.924331545829773} +07/25/2024 12:08:14 - INFO - __main__ - Step 9092: {'lr': 0.0004961132283341154, 'samples': 436416, 'steps': 9091, 'loss/train': 2.0334408283233643} +07/25/2024 12:08:14 - INFO - __main__ - Step 9093: {'lr': 0.0004961123042732223, 'samples': 436464, 'steps': 9092, 'loss/train': 2.069794178009033} +07/25/2024 12:08:14 - INFO - __main__ - Step 9094: {'lr': 0.0004961113801033578, 'samples': 436512, 'steps': 9093, 'loss/train': 0.4485737085342407} +07/25/2024 12:08:15 - INFO - __main__ - Step 9095: {'lr': 0.0004961104558245218, 'samples': 436560, 'steps': 9094, 'loss/train': 2.3174502849578857} +07/25/2024 12:08:15 - INFO - __main__ - Step 9096: {'lr': 0.0004961095314367153, 'samples': 436608, 'steps': 9095, 'loss/train': 2.120549201965332} +07/25/2024 12:08:15 - INFO - __main__ - Step 9097: {'lr': 0.0004961086069399381, 'samples': 436656, 'steps': 9096, 'loss/train': 1.901400089263916} +07/25/2024 12:08:15 - INFO - __main__ - Step 9098: {'lr': 0.0004961076823341911, 'samples': 436704, 'steps': 9097, 'loss/train': 0.9522789120674133} +07/25/2024 12:08:16 - INFO - __main__ - Step 9099: {'lr': 0.0004961067576194744, 'samples': 436752, 'steps': 9098, 'loss/train': 2.363943338394165} +07/25/2024 12:08:16 - INFO - __main__ - Step 9100: {'lr': 0.0004961058327957885, 'samples': 436800, 'steps': 9099, 'loss/train': 0.4828357696533203} +07/25/2024 12:08:16 - INFO - __main__ - Step 9101: {'lr': 0.000496104907863134, 'samples': 436848, 'steps': 9100, 'loss/train': 1.8048309087753296} +07/25/2024 12:08:17 - INFO - __main__ - Step 9102: {'lr': 0.000496103982821511, 'samples': 436896, 'steps': 9101, 'loss/train': 1.9730631113052368} +07/25/2024 12:08:17 - INFO - __main__ - Step 9103: {'lr': 0.0004961030576709201, 'samples': 436944, 'steps': 9102, 'loss/train': 1.965811848640442} +07/25/2024 12:08:17 - INFO - __main__ - Step 9104: {'lr': 0.0004961021324113617, 'samples': 436992, 'steps': 9103, 'loss/train': 1.5647858381271362} +07/25/2024 12:08:17 - INFO - __main__ - Step 9105: {'lr': 0.0004961012070428361, 'samples': 437040, 'steps': 9104, 'loss/train': 2.322587013244629} +07/25/2024 12:08:18 - INFO - __main__ - Step 9106: {'lr': 0.0004961002815653438, 'samples': 437088, 'steps': 9105, 'loss/train': 1.866896629333496} +07/25/2024 12:08:18 - INFO - __main__ - Step 9107: {'lr': 0.0004960993559788852, 'samples': 437136, 'steps': 9106, 'loss/train': 2.362220287322998} +07/25/2024 12:08:18 - INFO - __main__ - Step 9108: {'lr': 0.0004960984302834607, 'samples': 437184, 'steps': 9107, 'loss/train': 1.822358250617981} +07/25/2024 12:08:19 - INFO - __main__ - Step 9109: {'lr': 0.0004960975044790707, 'samples': 437232, 'steps': 9108, 'loss/train': 1.9908230304718018} +07/25/2024 12:08:19 - INFO - __main__ - Step 9110: {'lr': 0.0004960965785657156, 'samples': 437280, 'steps': 9109, 'loss/train': 2.0038814544677734} +07/25/2024 12:08:19 - INFO - __main__ - Step 9111: {'lr': 0.0004960956525433959, 'samples': 437328, 'steps': 9110, 'loss/train': 1.8102471828460693} +07/25/2024 12:08:19 - INFO - __main__ - Step 9112: {'lr': 0.000496094726412112, 'samples': 437376, 'steps': 9111, 'loss/train': 2.60713529586792} +07/25/2024 12:08:20 - INFO - __main__ - Step 9113: {'lr': 0.0004960938001718641, 'samples': 437424, 'steps': 9112, 'loss/train': 1.9238446950912476} +07/25/2024 12:08:20 - INFO - __main__ - Step 9114: {'lr': 0.0004960928738226528, 'samples': 437472, 'steps': 9113, 'loss/train': 2.177399158477783} +07/25/2024 12:08:20 - INFO - __main__ - Step 9115: {'lr': 0.0004960919473644785, 'samples': 437520, 'steps': 9114, 'loss/train': 1.5720137357711792} +07/25/2024 12:08:21 - INFO - __main__ - Step 9116: {'lr': 0.0004960910207973415, 'samples': 437568, 'steps': 9115, 'loss/train': 2.7513527870178223} +07/25/2024 12:08:21 - INFO - __main__ - Step 9117: {'lr': 0.0004960900941212424, 'samples': 437616, 'steps': 9116, 'loss/train': 1.819957971572876} +07/25/2024 12:08:21 - INFO - __main__ - Step 9118: {'lr': 0.0004960891673361815, 'samples': 437664, 'steps': 9117, 'loss/train': 0.4018632769584656} +07/25/2024 12:08:21 - INFO - __main__ - Step 9119: {'lr': 0.0004960882404421591, 'samples': 437712, 'steps': 9118, 'loss/train': 1.9289166927337646} +07/25/2024 12:08:22 - INFO - __main__ - Step 9120: {'lr': 0.0004960873134391759, 'samples': 437760, 'steps': 9119, 'loss/train': 2.4351396560668945} +07/25/2024 12:08:22 - INFO - __main__ - Step 9121: {'lr': 0.000496086386327232, 'samples': 437808, 'steps': 9120, 'loss/train': 1.6656789779663086} +07/25/2024 12:08:22 - INFO - __main__ - Step 9122: {'lr': 0.000496085459106328, 'samples': 437856, 'steps': 9121, 'loss/train': 1.9090512990951538} +07/25/2024 12:08:23 - INFO - __main__ - Step 9123: {'lr': 0.0004960845317764642, 'samples': 437904, 'steps': 9122, 'loss/train': 2.0473220348358154} +07/25/2024 12:08:23 - INFO - __main__ - Step 9124: {'lr': 0.0004960836043376412, 'samples': 437952, 'steps': 9123, 'loss/train': 2.4231979846954346} +07/25/2024 12:08:23 - INFO - __main__ - Step 9125: {'lr': 0.0004960826767898591, 'samples': 438000, 'steps': 9124, 'loss/train': 1.4332163333892822} +07/25/2024 12:08:23 - INFO - __main__ - Step 9126: {'lr': 0.0004960817491331185, 'samples': 438048, 'steps': 9125, 'loss/train': 1.758802056312561} +07/25/2024 12:08:24 - INFO - __main__ - Step 9127: {'lr': 0.0004960808213674198, 'samples': 438096, 'steps': 9126, 'loss/train': 1.6594849824905396} +07/25/2024 12:08:24 - INFO - __main__ - Step 9128: {'lr': 0.0004960798934927635, 'samples': 438144, 'steps': 9127, 'loss/train': 2.4706554412841797} +07/25/2024 12:08:24 - INFO - __main__ - Step 9129: {'lr': 0.0004960789655091499, 'samples': 438192, 'steps': 9128, 'loss/train': 2.227864980697632} +07/25/2024 12:08:25 - INFO - __main__ - Step 9130: {'lr': 0.0004960780374165793, 'samples': 438240, 'steps': 9129, 'loss/train': 1.753676176071167} +07/25/2024 12:08:25 - INFO - __main__ - Step 9131: {'lr': 0.0004960771092150524, 'samples': 438288, 'steps': 9130, 'loss/train': 2.715573787689209} +07/25/2024 12:08:25 - INFO - __main__ - Step 9132: {'lr': 0.0004960761809045695, 'samples': 438336, 'steps': 9131, 'loss/train': 1.2296230792999268} +07/25/2024 12:08:25 - INFO - __main__ - Step 9133: {'lr': 0.0004960752524851308, 'samples': 438384, 'steps': 9132, 'loss/train': 2.3395166397094727} +07/25/2024 12:08:26 - INFO - __main__ - Step 9134: {'lr': 0.0004960743239567369, 'samples': 438432, 'steps': 9133, 'loss/train': 1.914207100868225} +07/25/2024 12:08:26 - INFO - __main__ - Step 9135: {'lr': 0.0004960733953193882, 'samples': 438480, 'steps': 9134, 'loss/train': 1.9499262571334839} +07/25/2024 12:08:26 - INFO - __main__ - Step 9136: {'lr': 0.0004960724665730851, 'samples': 438528, 'steps': 9135, 'loss/train': 2.073148488998413} +07/25/2024 12:08:27 - INFO - __main__ - Step 9137: {'lr': 0.000496071537717828, 'samples': 438576, 'steps': 9136, 'loss/train': 1.7979120016098022} +07/25/2024 12:08:27 - INFO - __main__ - Step 9138: {'lr': 0.0004960706087536173, 'samples': 438624, 'steps': 9137, 'loss/train': 2.441234827041626} +07/25/2024 12:08:27 - INFO - __main__ - Step 9139: {'lr': 0.0004960696796804535, 'samples': 438672, 'steps': 9138, 'loss/train': 1.5430500507354736} +07/25/2024 12:08:27 - INFO - __main__ - Step 9140: {'lr': 0.000496068750498337, 'samples': 438720, 'steps': 9139, 'loss/train': 1.9813430309295654} +07/25/2024 12:08:28 - INFO - __main__ - Step 9141: {'lr': 0.000496067821207268, 'samples': 438768, 'steps': 9140, 'loss/train': 2.1173722743988037} +07/25/2024 12:08:28 - INFO - __main__ - Step 9142: {'lr': 0.0004960668918072471, 'samples': 438816, 'steps': 9141, 'loss/train': 0.4473031461238861} +07/25/2024 12:08:28 - INFO - __main__ - Step 9143: {'lr': 0.0004960659622982748, 'samples': 438864, 'steps': 9142, 'loss/train': 2.318699598312378} +07/25/2024 12:08:29 - INFO - __main__ - Step 9144: {'lr': 0.0004960650326803512, 'samples': 438912, 'steps': 9143, 'loss/train': 2.6848764419555664} +07/25/2024 12:08:29 - INFO - __main__ - Step 9145: {'lr': 0.000496064102953477, 'samples': 438960, 'steps': 9144, 'loss/train': 2.012617588043213} +07/25/2024 12:08:29 - INFO - __main__ - Step 9146: {'lr': 0.0004960631731176525, 'samples': 439008, 'steps': 9145, 'loss/train': 1.930393934249878} +07/25/2024 12:08:29 - INFO - __main__ - Step 9147: {'lr': 0.0004960622431728781, 'samples': 439056, 'steps': 9146, 'loss/train': 2.040889263153076} +07/25/2024 12:08:30 - INFO - __main__ - Step 9148: {'lr': 0.0004960613131191542, 'samples': 439104, 'steps': 9147, 'loss/train': 1.5452146530151367} +07/25/2024 12:08:30 - INFO - __main__ - Step 9149: {'lr': 0.0004960603829564813, 'samples': 439152, 'steps': 9148, 'loss/train': 1.2732897996902466} +07/25/2024 12:08:30 - INFO - __main__ - Step 9150: {'lr': 0.0004960594526848597, 'samples': 439200, 'steps': 9149, 'loss/train': 2.1504640579223633} +07/25/2024 12:08:30 - INFO - __main__ - Step 9151: {'lr': 0.00049605852230429, 'samples': 439248, 'steps': 9150, 'loss/train': 1.9361698627471924} +07/25/2024 12:08:31 - INFO - __main__ - Step 9152: {'lr': 0.0004960575918147724, 'samples': 439296, 'steps': 9151, 'loss/train': 1.9881378412246704} +07/25/2024 12:08:31 - INFO - __main__ - Step 9153: {'lr': 0.0004960566612163073, 'samples': 439344, 'steps': 9152, 'loss/train': 2.0395991802215576} +07/25/2024 12:08:31 - INFO - __main__ - Step 9154: {'lr': 0.0004960557305088953, 'samples': 439392, 'steps': 9153, 'loss/train': 1.7180744409561157} +07/25/2024 12:08:32 - INFO - __main__ - Step 9155: {'lr': 0.0004960547996925368, 'samples': 439440, 'steps': 9154, 'loss/train': 2.6972217559814453} +07/25/2024 12:08:32 - INFO - __main__ - Step 9156: {'lr': 0.000496053868767232, 'samples': 439488, 'steps': 9155, 'loss/train': 1.7623255252838135} +07/25/2024 12:08:32 - INFO - __main__ - Step 9157: {'lr': 0.0004960529377329814, 'samples': 439536, 'steps': 9156, 'loss/train': 2.256970167160034} +07/25/2024 12:08:32 - INFO - __main__ - Step 9158: {'lr': 0.0004960520065897855, 'samples': 439584, 'steps': 9157, 'loss/train': 1.1954649686813354} +07/25/2024 12:08:33 - INFO - __main__ - Step 9159: {'lr': 0.0004960510753376448, 'samples': 439632, 'steps': 9158, 'loss/train': 2.26672101020813} +07/25/2024 12:08:33 - INFO - __main__ - Step 9160: {'lr': 0.0004960501439765595, 'samples': 439680, 'steps': 9159, 'loss/train': 2.3208866119384766} +07/25/2024 12:08:33 - INFO - __main__ - Step 9161: {'lr': 0.00049604921250653, 'samples': 439728, 'steps': 9160, 'loss/train': 1.4483240842819214} +07/25/2024 12:08:34 - INFO - __main__ - Step 9162: {'lr': 0.000496048280927557, 'samples': 439776, 'steps': 9161, 'loss/train': 2.100775957107544} +07/25/2024 12:08:34 - INFO - __main__ - Step 9163: {'lr': 0.0004960473492396405, 'samples': 439824, 'steps': 9162, 'loss/train': 1.705278754234314} +07/25/2024 12:08:34 - INFO - __main__ - Step 9164: {'lr': 0.0004960464174427812, 'samples': 439872, 'steps': 9163, 'loss/train': 1.862337350845337} +07/25/2024 12:08:34 - INFO - __main__ - Step 9165: {'lr': 0.0004960454855369795, 'samples': 439920, 'steps': 9164, 'loss/train': 1.8689593076705933} +07/25/2024 12:08:35 - INFO - __main__ - Step 9166: {'lr': 0.0004960445535222357, 'samples': 439968, 'steps': 9165, 'loss/train': 0.3758673369884491} +07/25/2024 12:08:35 - INFO - __main__ - Step 9167: {'lr': 0.0004960436213985503, 'samples': 440016, 'steps': 9166, 'loss/train': 1.9239031076431274} +07/25/2024 12:08:35 - INFO - __main__ - Step 9168: {'lr': 0.0004960426891659236, 'samples': 440064, 'steps': 9167, 'loss/train': 2.4112038612365723} +07/25/2024 12:08:36 - INFO - __main__ - Step 9169: {'lr': 0.0004960417568243561, 'samples': 440112, 'steps': 9168, 'loss/train': 2.060577630996704} +07/25/2024 12:08:36 - INFO - __main__ - Step 9170: {'lr': 0.0004960408243738483, 'samples': 440160, 'steps': 9169, 'loss/train': 1.145018458366394} +07/25/2024 12:08:36 - INFO - __main__ - Step 9171: {'lr': 0.0004960398918144004, 'samples': 440208, 'steps': 9170, 'loss/train': 1.599772334098816} +07/25/2024 12:08:36 - INFO - __main__ - Step 9172: {'lr': 0.000496038959146013, 'samples': 440256, 'steps': 9171, 'loss/train': 1.2864888906478882} +07/25/2024 12:08:37 - INFO - __main__ - Step 9173: {'lr': 0.0004960380263686864, 'samples': 440304, 'steps': 9172, 'loss/train': 1.3205846548080444} +07/25/2024 12:08:37 - INFO - __main__ - Step 9174: {'lr': 0.0004960370934824212, 'samples': 440352, 'steps': 9173, 'loss/train': 1.9919953346252441} +07/25/2024 12:08:37 - INFO - __main__ - Step 9175: {'lr': 0.0004960361604872175, 'samples': 440400, 'steps': 9174, 'loss/train': 2.2464706897735596} +07/25/2024 12:08:38 - INFO - __main__ - Step 9176: {'lr': 0.0004960352273830759, 'samples': 440448, 'steps': 9175, 'loss/train': 2.0395896434783936} +07/25/2024 12:08:38 - INFO - __main__ - Step 9177: {'lr': 0.0004960342941699969, 'samples': 440496, 'steps': 9176, 'loss/train': 1.9675722122192383} +07/25/2024 12:08:38 - INFO - __main__ - Step 9178: {'lr': 0.0004960333608479808, 'samples': 440544, 'steps': 9177, 'loss/train': 1.642134189605713} +07/25/2024 12:08:38 - INFO - __main__ - Step 9179: {'lr': 0.000496032427417028, 'samples': 440592, 'steps': 9178, 'loss/train': 2.6197025775909424} +07/25/2024 12:08:39 - INFO - __main__ - Step 9180: {'lr': 0.0004960314938771389, 'samples': 440640, 'steps': 9179, 'loss/train': 2.265730381011963} +07/25/2024 12:08:39 - INFO - __main__ - Step 9181: {'lr': 0.0004960305602283139, 'samples': 440688, 'steps': 9180, 'loss/train': 2.162109613418579} +07/25/2024 12:08:39 - INFO - __main__ - Step 9182: {'lr': 0.0004960296264705535, 'samples': 440736, 'steps': 9181, 'loss/train': 1.9027330875396729} +07/25/2024 12:08:40 - INFO - __main__ - Step 9183: {'lr': 0.0004960286926038581, 'samples': 440784, 'steps': 9182, 'loss/train': 1.349648356437683} +07/25/2024 12:08:40 - INFO - __main__ - Step 9184: {'lr': 0.0004960277586282281, 'samples': 440832, 'steps': 9183, 'loss/train': 1.7314728498458862} +07/25/2024 12:08:40 - INFO - __main__ - Step 9185: {'lr': 0.0004960268245436639, 'samples': 440880, 'steps': 9184, 'loss/train': 2.2348854541778564} +07/25/2024 12:08:40 - INFO - __main__ - Step 9186: {'lr': 0.0004960258903501659, 'samples': 440928, 'steps': 9185, 'loss/train': 1.9493972063064575} +07/25/2024 12:08:41 - INFO - __main__ - Step 9187: {'lr': 0.0004960249560477344, 'samples': 440976, 'steps': 9186, 'loss/train': 2.1110830307006836} +07/25/2024 12:08:41 - INFO - __main__ - Step 9188: {'lr': 0.0004960240216363701, 'samples': 441024, 'steps': 9187, 'loss/train': 1.7491028308868408} +07/25/2024 12:08:41 - INFO - __main__ - Step 9189: {'lr': 0.0004960230871160732, 'samples': 441072, 'steps': 9188, 'loss/train': 1.4103446006774902} +07/25/2024 12:08:42 - INFO - __main__ - Step 9190: {'lr': 0.0004960221524868443, 'samples': 441120, 'steps': 9189, 'loss/train': 0.2525912821292877} +07/25/2024 12:08:42 - INFO - __main__ - Step 9191: {'lr': 0.0004960212177486835, 'samples': 441168, 'steps': 9190, 'loss/train': 2.1935489177703857} +07/25/2024 12:08:42 - INFO - __main__ - Step 9192: {'lr': 0.0004960202829015915, 'samples': 441216, 'steps': 9191, 'loss/train': 1.8455159664154053} +07/25/2024 12:08:42 - INFO - __main__ - Step 9193: {'lr': 0.0004960193479455686, 'samples': 441264, 'steps': 9192, 'loss/train': 2.105102777481079} +07/25/2024 12:08:43 - INFO - __main__ - Step 9194: {'lr': 0.0004960184128806152, 'samples': 441312, 'steps': 9193, 'loss/train': 0.23640622198581696} +07/25/2024 12:08:43 - INFO - __main__ - Step 9195: {'lr': 0.0004960174777067318, 'samples': 441360, 'steps': 9194, 'loss/train': 1.4849934577941895} +07/25/2024 12:08:43 - INFO - __main__ - Step 9196: {'lr': 0.0004960165424239187, 'samples': 441408, 'steps': 9195, 'loss/train': 2.0886027812957764} +07/25/2024 12:08:44 - INFO - __main__ - Step 9197: {'lr': 0.0004960156070321763, 'samples': 441456, 'steps': 9196, 'loss/train': 0.9744783639907837} +07/25/2024 12:08:44 - INFO - __main__ - Step 9198: {'lr': 0.0004960146715315052, 'samples': 441504, 'steps': 9197, 'loss/train': 1.7337839603424072} +07/25/2024 12:08:44 - INFO - __main__ - Step 9199: {'lr': 0.0004960137359219057, 'samples': 441552, 'steps': 9198, 'loss/train': 2.5354206562042236} +07/25/2024 12:08:44 - INFO - __main__ - Step 9200: {'lr': 0.0004960128002033782, 'samples': 441600, 'steps': 9199, 'loss/train': 1.2467916011810303} +07/25/2024 12:08:45 - INFO - __main__ - Step 9201: {'lr': 0.000496011864375923, 'samples': 441648, 'steps': 9200, 'loss/train': 1.9795947074890137} +07/25/2024 12:08:45 - INFO - __main__ - Step 9202: {'lr': 0.0004960109284395408, 'samples': 441696, 'steps': 9201, 'loss/train': 1.6831752061843872} +07/25/2024 12:08:45 - INFO - __main__ - Step 9203: {'lr': 0.0004960099923942317, 'samples': 441744, 'steps': 9202, 'loss/train': 2.501093864440918} +07/25/2024 12:08:46 - INFO - __main__ - Step 9204: {'lr': 0.0004960090562399963, 'samples': 441792, 'steps': 9203, 'loss/train': 1.8235082626342773} +07/25/2024 12:08:46 - INFO - __main__ - Step 9205: {'lr': 0.0004960081199768351, 'samples': 441840, 'steps': 9204, 'loss/train': 2.36740779876709} +07/25/2024 12:08:46 - INFO - __main__ - Step 9206: {'lr': 0.0004960071836047484, 'samples': 441888, 'steps': 9205, 'loss/train': 1.9463518857955933} +07/25/2024 12:08:46 - INFO - __main__ - Step 9207: {'lr': 0.0004960062471237365, 'samples': 441936, 'steps': 9206, 'loss/train': 1.7753500938415527} +07/25/2024 12:08:47 - INFO - __main__ - Step 9208: {'lr': 0.0004960053105338, 'samples': 441984, 'steps': 9207, 'loss/train': 2.1409263610839844} +07/25/2024 12:08:47 - INFO - __main__ - Step 9209: {'lr': 0.0004960043738349392, 'samples': 442032, 'steps': 9208, 'loss/train': 1.5606404542922974} +07/25/2024 12:08:47 - INFO - __main__ - Step 9210: {'lr': 0.0004960034370271546, 'samples': 442080, 'steps': 9209, 'loss/train': 1.7634183168411255} +07/25/2024 12:08:48 - INFO - __main__ - Step 9211: {'lr': 0.0004960025001104466, 'samples': 442128, 'steps': 9210, 'loss/train': 1.8436462879180908} +07/25/2024 12:08:48 - INFO - __main__ - Step 9212: {'lr': 0.0004960015630848155, 'samples': 442176, 'steps': 9211, 'loss/train': 2.101996660232544} +07/25/2024 12:08:48 - INFO - __main__ - Step 9213: {'lr': 0.0004960006259502618, 'samples': 442224, 'steps': 9212, 'loss/train': 1.6293699741363525} +07/25/2024 12:08:48 - INFO - __main__ - Step 9214: {'lr': 0.0004959996887067859, 'samples': 442272, 'steps': 9213, 'loss/train': 0.21323062479496002} +07/25/2024 12:08:49 - INFO - __main__ - Step 9215: {'lr': 0.0004959987513543883, 'samples': 442320, 'steps': 9214, 'loss/train': 1.9242526292800903} +07/25/2024 12:08:49 - INFO - __main__ - Step 9216: {'lr': 0.0004959978138930693, 'samples': 442368, 'steps': 9215, 'loss/train': 1.6301790475845337} +07/25/2024 12:08:49 - INFO - __main__ - Step 9217: {'lr': 0.0004959968763228294, 'samples': 442416, 'steps': 9216, 'loss/train': 2.00239634513855} +07/25/2024 12:08:50 - INFO - __main__ - Step 9218: {'lr': 0.000495995938643669, 'samples': 442464, 'steps': 9217, 'loss/train': 0.1744108349084854} +07/25/2024 12:08:50 - INFO - __main__ - Step 9219: {'lr': 0.0004959950008555884, 'samples': 442512, 'steps': 9218, 'loss/train': 1.8337042331695557} +07/25/2024 12:08:50 - INFO - __main__ - Step 9220: {'lr': 0.0004959940629585881, 'samples': 442560, 'steps': 9219, 'loss/train': 1.9595654010772705} +07/25/2024 12:08:50 - INFO - __main__ - Step 9221: {'lr': 0.0004959931249526687, 'samples': 442608, 'steps': 9220, 'loss/train': 0.8766697645187378} +07/25/2024 12:08:51 - INFO - __main__ - Step 9222: {'lr': 0.0004959921868378303, 'samples': 442656, 'steps': 9221, 'loss/train': 1.9864554405212402} +07/25/2024 12:08:51 - INFO - __main__ - Step 9223: {'lr': 0.0004959912486140734, 'samples': 442704, 'steps': 9222, 'loss/train': 1.9558321237564087} +07/25/2024 12:08:51 - INFO - __main__ - Step 9224: {'lr': 0.0004959903102813985, 'samples': 442752, 'steps': 9223, 'loss/train': 1.328876256942749} +07/25/2024 12:08:51 - INFO - __main__ - Step 9225: {'lr': 0.0004959893718398061, 'samples': 442800, 'steps': 9224, 'loss/train': 2.1702849864959717} +07/25/2024 12:08:52 - INFO - __main__ - Step 9226: {'lr': 0.0004959884332892964, 'samples': 442848, 'steps': 9225, 'loss/train': 1.790093183517456} +07/25/2024 12:08:52 - INFO - __main__ - Step 9227: {'lr': 0.0004959874946298699, 'samples': 442896, 'steps': 9226, 'loss/train': 2.7777366638183594} +07/25/2024 12:08:52 - INFO - __main__ - Step 9228: {'lr': 0.0004959865558615271, 'samples': 442944, 'steps': 9227, 'loss/train': 1.8752914667129517} +07/25/2024 12:08:53 - INFO - __main__ - Step 9229: {'lr': 0.0004959856169842683, 'samples': 442992, 'steps': 9228, 'loss/train': 2.0072920322418213} +07/25/2024 12:08:53 - INFO - __main__ - Step 9230: {'lr': 0.000495984677998094, 'samples': 443040, 'steps': 9229, 'loss/train': 1.9296904802322388} +07/25/2024 12:08:53 - INFO - __main__ - Step 9231: {'lr': 0.0004959837389030045, 'samples': 443088, 'steps': 9230, 'loss/train': 1.8017703294754028} +07/25/2024 12:08:53 - INFO - __main__ - Step 9232: {'lr': 0.0004959827996990003, 'samples': 443136, 'steps': 9231, 'loss/train': 1.791445016860962} +07/25/2024 12:08:54 - INFO - __main__ - Step 9233: {'lr': 0.0004959818603860819, 'samples': 443184, 'steps': 9232, 'loss/train': 2.172091007232666} +07/25/2024 12:08:54 - INFO - __main__ - Step 9234: {'lr': 0.0004959809209642496, 'samples': 443232, 'steps': 9233, 'loss/train': 2.388986825942993} +07/25/2024 12:08:54 - INFO - __main__ - Step 9235: {'lr': 0.0004959799814335037, 'samples': 443280, 'steps': 9234, 'loss/train': 1.8799318075180054} +07/25/2024 12:08:55 - INFO - __main__ - Step 9236: {'lr': 0.0004959790417938449, 'samples': 443328, 'steps': 9235, 'loss/train': 1.89616060256958} +07/25/2024 12:08:55 - INFO - __main__ - Step 9237: {'lr': 0.0004959781020452735, 'samples': 443376, 'steps': 9236, 'loss/train': 2.181070327758789} +07/25/2024 12:08:55 - INFO - __main__ - Step 9238: {'lr': 0.0004959771621877898, 'samples': 443424, 'steps': 9237, 'loss/train': 0.19523750245571136} +07/25/2024 12:08:55 - INFO - __main__ - Step 9239: {'lr': 0.0004959762222213944, 'samples': 443472, 'steps': 9238, 'loss/train': 1.9487404823303223} +07/25/2024 12:08:56 - INFO - __main__ - Step 9240: {'lr': 0.0004959752821460875, 'samples': 443520, 'steps': 9239, 'loss/train': 2.4228813648223877} +07/25/2024 12:08:56 - INFO - __main__ - Step 9241: {'lr': 0.0004959743419618697, 'samples': 443568, 'steps': 9240, 'loss/train': 1.3522508144378662} +07/25/2024 12:08:56 - INFO - __main__ - Step 9242: {'lr': 0.0004959734016687414, 'samples': 443616, 'steps': 9241, 'loss/train': 0.20223234593868256} +07/25/2024 12:08:57 - INFO - __main__ - Step 9243: {'lr': 0.0004959724612667028, 'samples': 443664, 'steps': 9242, 'loss/train': 1.0424578189849854} +07/25/2024 12:08:57 - INFO - __main__ - Step 9244: {'lr': 0.0004959715207557546, 'samples': 443712, 'steps': 9243, 'loss/train': 1.5077260732650757} +07/25/2024 12:08:57 - INFO - __main__ - Step 9245: {'lr': 0.0004959705801358971, 'samples': 443760, 'steps': 9244, 'loss/train': 1.0988376140594482} +07/25/2024 12:08:57 - INFO - __main__ - Step 9246: {'lr': 0.0004959696394071308, 'samples': 443808, 'steps': 9245, 'loss/train': 2.3977181911468506} +07/25/2024 12:08:58 - INFO - __main__ - Step 9247: {'lr': 0.0004959686985694559, 'samples': 443856, 'steps': 9246, 'loss/train': 1.5596137046813965} +07/25/2024 12:08:58 - INFO - __main__ - Step 9248: {'lr': 0.000495967757622873, 'samples': 443904, 'steps': 9247, 'loss/train': 1.154457688331604} +07/25/2024 12:08:58 - INFO - __main__ - Step 9249: {'lr': 0.0004959668165673825, 'samples': 443952, 'steps': 9248, 'loss/train': 2.0285491943359375} +07/25/2024 12:08:59 - INFO - __main__ - Step 9250: {'lr': 0.0004959658754029848, 'samples': 444000, 'steps': 9249, 'loss/train': 1.7594101428985596} +07/25/2024 12:08:59 - INFO - __main__ - Step 9251: {'lr': 0.0004959649341296803, 'samples': 444048, 'steps': 9250, 'loss/train': 2.5942962169647217} +07/25/2024 12:08:59 - INFO - __main__ - Step 9252: {'lr': 0.0004959639927474694, 'samples': 444096, 'steps': 9251, 'loss/train': 1.7832281589508057} +07/25/2024 12:08:59 - INFO - __main__ - Step 9253: {'lr': 0.0004959630512563524, 'samples': 444144, 'steps': 9252, 'loss/train': 1.5673410892486572} +07/25/2024 12:09:00 - INFO - __main__ - Step 9254: {'lr': 0.00049596210965633, 'samples': 444192, 'steps': 9253, 'loss/train': 1.9335389137268066} +07/25/2024 12:09:00 - INFO - __main__ - Step 9255: {'lr': 0.0004959611679474025, 'samples': 444240, 'steps': 9254, 'loss/train': 1.5706044435501099} +07/25/2024 12:09:00 - INFO - __main__ - Step 9256: {'lr': 0.0004959602261295702, 'samples': 444288, 'steps': 9255, 'loss/train': 1.6813039779663086} +07/25/2024 12:09:01 - INFO - __main__ - Step 9257: {'lr': 0.0004959592842028336, 'samples': 444336, 'steps': 9256, 'loss/train': 2.270212173461914} +07/25/2024 12:09:01 - INFO - __main__ - Step 9258: {'lr': 0.0004959583421671931, 'samples': 444384, 'steps': 9257, 'loss/train': 2.038755178451538} +07/25/2024 12:09:01 - INFO - __main__ - Step 9259: {'lr': 0.0004959574000226492, 'samples': 444432, 'steps': 9258, 'loss/train': 1.532128095626831} +07/25/2024 12:09:01 - INFO - __main__ - Step 9260: {'lr': 0.0004959564577692023, 'samples': 444480, 'steps': 9259, 'loss/train': 1.5629719495773315} +07/25/2024 12:09:02 - INFO - __main__ - Step 9261: {'lr': 0.0004959555154068528, 'samples': 444528, 'steps': 9260, 'loss/train': 1.5332400798797607} +07/25/2024 12:09:02 - INFO - __main__ - Step 9262: {'lr': 0.0004959545729356009, 'samples': 444576, 'steps': 9261, 'loss/train': 0.22226086258888245} +07/25/2024 12:09:02 - INFO - __main__ - Step 9263: {'lr': 0.0004959536303554474, 'samples': 444624, 'steps': 9262, 'loss/train': 1.89055335521698} +07/25/2024 12:09:03 - INFO - __main__ - Step 9264: {'lr': 0.0004959526876663924, 'samples': 444672, 'steps': 9263, 'loss/train': 1.1675814390182495} +07/25/2024 12:09:03 - INFO - __main__ - Step 9265: {'lr': 0.0004959517448684366, 'samples': 444720, 'steps': 9264, 'loss/train': 2.3423619270324707} +07/25/2024 12:09:03 - INFO - __main__ - Step 9266: {'lr': 0.00049595080196158, 'samples': 444768, 'steps': 9265, 'loss/train': 0.282061904668808} +07/25/2024 12:09:03 - INFO - __main__ - Step 9267: {'lr': 0.0004959498589458234, 'samples': 444816, 'steps': 9266, 'loss/train': 1.637372374534607} +07/25/2024 12:09:04 - INFO - __main__ - Step 9268: {'lr': 0.0004959489158211672, 'samples': 444864, 'steps': 9267, 'loss/train': 1.8538440465927124} +07/25/2024 12:09:04 - INFO - __main__ - Step 9269: {'lr': 0.0004959479725876116, 'samples': 444912, 'steps': 9268, 'loss/train': 0.8589989542961121} +07/25/2024 12:09:04 - INFO - __main__ - Step 9270: {'lr': 0.0004959470292451571, 'samples': 444960, 'steps': 9269, 'loss/train': 1.811686396598816} +07/25/2024 12:09:05 - INFO - __main__ - Step 9271: {'lr': 0.0004959460857938043, 'samples': 445008, 'steps': 9270, 'loss/train': 2.797513961791992} +07/25/2024 12:09:05 - INFO - __main__ - Step 9272: {'lr': 0.0004959451422335533, 'samples': 445056, 'steps': 9271, 'loss/train': 2.0244507789611816} +07/25/2024 12:09:05 - INFO - __main__ - Step 9273: {'lr': 0.0004959441985644047, 'samples': 445104, 'steps': 9272, 'loss/train': 1.8060928583145142} +07/25/2024 12:09:05 - INFO - __main__ - Step 9274: {'lr': 0.000495943254786359, 'samples': 445152, 'steps': 9273, 'loss/train': 1.821858525276184} +07/25/2024 12:09:06 - INFO - __main__ - Step 9275: {'lr': 0.0004959423108994164, 'samples': 445200, 'steps': 9274, 'loss/train': 2.635812520980835} +07/25/2024 12:09:06 - INFO - __main__ - Step 9276: {'lr': 0.0004959413669035776, 'samples': 445248, 'steps': 9275, 'loss/train': 1.9175584316253662} +07/25/2024 12:09:06 - INFO - __main__ - Step 9277: {'lr': 0.0004959404227988426, 'samples': 445296, 'steps': 9276, 'loss/train': 1.947139024734497} +07/25/2024 12:09:07 - INFO - __main__ - Step 9278: {'lr': 0.0004959394785852123, 'samples': 445344, 'steps': 9277, 'loss/train': 1.8323736190795898} +07/25/2024 12:09:07 - INFO - __main__ - Step 9279: {'lr': 0.0004959385342626868, 'samples': 445392, 'steps': 9278, 'loss/train': 1.7083790302276611} +07/25/2024 12:09:07 - INFO - __main__ - Step 9280: {'lr': 0.0004959375898312667, 'samples': 445440, 'steps': 9279, 'loss/train': 1.4676473140716553} +07/25/2024 12:09:07 - INFO - __main__ - Step 9281: {'lr': 0.0004959366452909522, 'samples': 445488, 'steps': 9280, 'loss/train': 1.9633588790893555} +07/25/2024 12:09:08 - INFO - __main__ - Step 9282: {'lr': 0.0004959357006417439, 'samples': 445536, 'steps': 9281, 'loss/train': 1.7134833335876465} +07/25/2024 12:09:08 - INFO - __main__ - Step 9283: {'lr': 0.0004959347558836421, 'samples': 445584, 'steps': 9282, 'loss/train': 1.8677966594696045} +07/25/2024 12:09:08 - INFO - __main__ - Step 9284: {'lr': 0.0004959338110166473, 'samples': 445632, 'steps': 9283, 'loss/train': 1.7668815851211548} +07/25/2024 12:09:09 - INFO - __main__ - Step 9285: {'lr': 0.0004959328660407601, 'samples': 445680, 'steps': 9284, 'loss/train': 1.2364184856414795} +07/25/2024 12:09:09 - INFO - __main__ - Step 9286: {'lr': 0.0004959319209559805, 'samples': 445728, 'steps': 9285, 'loss/train': 0.20349019765853882} +07/25/2024 12:09:09 - INFO - __main__ - Step 9287: {'lr': 0.0004959309757623092, 'samples': 445776, 'steps': 9286, 'loss/train': 1.8582357168197632} +07/25/2024 12:09:09 - INFO - __main__ - Step 9288: {'lr': 0.0004959300304597465, 'samples': 445824, 'steps': 9287, 'loss/train': 1.6159896850585938} +07/25/2024 12:09:10 - INFO - __main__ - Step 9289: {'lr': 0.0004959290850482929, 'samples': 445872, 'steps': 9288, 'loss/train': 1.5595940351486206} +07/25/2024 12:09:10 - INFO - __main__ - Step 9290: {'lr': 0.0004959281395279488, 'samples': 445920, 'steps': 9289, 'loss/train': 0.1771787405014038} +07/25/2024 12:09:10 - INFO - __main__ - Step 9291: {'lr': 0.0004959271938987146, 'samples': 445968, 'steps': 9290, 'loss/train': 1.8605085611343384} +07/25/2024 12:09:11 - INFO - __main__ - Step 9292: {'lr': 0.0004959262481605909, 'samples': 446016, 'steps': 9291, 'loss/train': 1.6902005672454834} +07/25/2024 12:09:11 - INFO - __main__ - Step 9293: {'lr': 0.0004959253023135776, 'samples': 446064, 'steps': 9292, 'loss/train': 0.8234474062919617} +07/25/2024 12:09:11 - INFO - __main__ - Step 9294: {'lr': 0.0004959243563576757, 'samples': 446112, 'steps': 9293, 'loss/train': 1.5260201692581177} +07/25/2024 12:09:11 - INFO - __main__ - Step 9295: {'lr': 0.0004959234102928855, 'samples': 446160, 'steps': 9294, 'loss/train': 2.0269742012023926} +07/25/2024 12:09:12 - INFO - __main__ - Step 9296: {'lr': 0.000495922464119207, 'samples': 446208, 'steps': 9295, 'loss/train': 2.0955779552459717} +07/25/2024 12:09:12 - INFO - __main__ - Step 9297: {'lr': 0.0004959215178366412, 'samples': 446256, 'steps': 9296, 'loss/train': 1.9109621047973633} +07/25/2024 12:09:12 - INFO - __main__ - Step 9298: {'lr': 0.0004959205714451881, 'samples': 446304, 'steps': 9297, 'loss/train': 2.8416545391082764} +07/25/2024 12:09:12 - INFO - __main__ - Step 9299: {'lr': 0.0004959196249448483, 'samples': 446352, 'steps': 9298, 'loss/train': 1.4845329523086548} +07/25/2024 12:09:13 - INFO - __main__ - Step 9300: {'lr': 0.0004959186783356221, 'samples': 446400, 'steps': 9299, 'loss/train': 1.8106684684753418} +07/25/2024 12:09:13 - INFO - __main__ - Step 9301: {'lr': 0.0004959177316175101, 'samples': 446448, 'steps': 9300, 'loss/train': 2.13502836227417} +07/25/2024 12:09:13 - INFO - __main__ - Step 9302: {'lr': 0.0004959167847905126, 'samples': 446496, 'steps': 9301, 'loss/train': 2.4661545753479004} +07/25/2024 12:09:14 - INFO - __main__ - Step 9303: {'lr': 0.0004959158378546301, 'samples': 446544, 'steps': 9302, 'loss/train': 2.438048839569092} +07/25/2024 12:09:14 - INFO - __main__ - Step 9304: {'lr': 0.0004959148908098628, 'samples': 446592, 'steps': 9303, 'loss/train': 1.5853029489517212} +07/25/2024 12:09:14 - INFO - __main__ - Step 9305: {'lr': 0.0004959139436562114, 'samples': 446640, 'steps': 9304, 'loss/train': 1.1912726163864136} +07/25/2024 12:09:14 - INFO - __main__ - Step 9306: {'lr': 0.0004959129963936761, 'samples': 446688, 'steps': 9305, 'loss/train': 1.9608054161071777} +07/25/2024 12:09:15 - INFO - __main__ - Step 9307: {'lr': 0.0004959120490222574, 'samples': 446736, 'steps': 9306, 'loss/train': 2.361665964126587} +07/25/2024 12:09:15 - INFO - __main__ - Step 9308: {'lr': 0.0004959111015419559, 'samples': 446784, 'steps': 9307, 'loss/train': 1.6364482641220093} +07/25/2024 12:09:15 - INFO - __main__ - Step 9309: {'lr': 0.0004959101539527718, 'samples': 446832, 'steps': 9308, 'loss/train': 1.5282307863235474} +07/25/2024 12:09:16 - INFO - __main__ - Step 9310: {'lr': 0.0004959092062547055, 'samples': 446880, 'steps': 9309, 'loss/train': 0.22807163000106812} +07/25/2024 12:09:16 - INFO - __main__ - Step 9311: {'lr': 0.0004959082584477574, 'samples': 446928, 'steps': 9310, 'loss/train': 1.8910882472991943} +07/25/2024 12:09:16 - INFO - __main__ - Step 9312: {'lr': 0.0004959073105319283, 'samples': 446976, 'steps': 9311, 'loss/train': 1.590570092201233} +07/25/2024 12:09:16 - INFO - __main__ - Step 9313: {'lr': 0.0004959063625072181, 'samples': 447024, 'steps': 9312, 'loss/train': 1.966407060623169} +07/25/2024 12:09:17 - INFO - __main__ - Step 9314: {'lr': 0.0004959054143736275, 'samples': 447072, 'steps': 9313, 'loss/train': 0.14549857378005981} +07/25/2024 12:09:17 - INFO - __main__ - Step 9315: {'lr': 0.0004959044661311569, 'samples': 447120, 'steps': 9314, 'loss/train': 1.5803059339523315} +07/25/2024 12:09:17 - INFO - __main__ - Step 9316: {'lr': 0.0004959035177798067, 'samples': 447168, 'steps': 9315, 'loss/train': 2.0174312591552734} +07/25/2024 12:09:18 - INFO - __main__ - Step 9317: {'lr': 0.0004959025693195774, 'samples': 447216, 'steps': 9316, 'loss/train': 0.7143731117248535} +07/25/2024 12:09:18 - INFO - __main__ - Step 9318: {'lr': 0.0004959016207504691, 'samples': 447264, 'steps': 9317, 'loss/train': 1.8724950551986694} +07/25/2024 12:09:18 - INFO - __main__ - Step 9319: {'lr': 0.0004959006720724826, 'samples': 447312, 'steps': 9318, 'loss/train': 1.7808961868286133} +07/25/2024 12:09:18 - INFO - __main__ - Step 9320: {'lr': 0.0004958997232856182, 'samples': 447360, 'steps': 9319, 'loss/train': 1.317222237586975} +07/25/2024 12:09:19 - INFO - __main__ - Step 9321: {'lr': 0.0004958987743898763, 'samples': 447408, 'steps': 9320, 'loss/train': 1.7508903741836548} +07/25/2024 12:09:19 - INFO - __main__ - Step 9322: {'lr': 0.0004958978253852572, 'samples': 447456, 'steps': 9321, 'loss/train': 2.2388689517974854} +07/25/2024 12:09:19 - INFO - __main__ - Step 9323: {'lr': 0.0004958968762717615, 'samples': 447504, 'steps': 9322, 'loss/train': 0.9972813725471497} +07/25/2024 12:09:20 - INFO - __main__ - Step 9324: {'lr': 0.0004958959270493896, 'samples': 447552, 'steps': 9323, 'loss/train': 2.1790554523468018} +07/25/2024 12:09:20 - INFO - __main__ - Step 9325: {'lr': 0.0004958949777181418, 'samples': 447600, 'steps': 9324, 'loss/train': 1.7292439937591553} +07/25/2024 12:09:20 - INFO - __main__ - Step 9326: {'lr': 0.0004958940282780186, 'samples': 447648, 'steps': 9325, 'loss/train': 1.9051260948181152} +07/25/2024 12:09:20 - INFO - __main__ - Step 9327: {'lr': 0.0004958930787290204, 'samples': 447696, 'steps': 9326, 'loss/train': 1.2015438079833984} +07/25/2024 12:09:21 - INFO - __main__ - Step 9328: {'lr': 0.0004958921290711477, 'samples': 447744, 'steps': 9327, 'loss/train': 1.7562592029571533} +07/25/2024 12:09:21 - INFO - __main__ - Step 9329: {'lr': 0.0004958911793044008, 'samples': 447792, 'steps': 9328, 'loss/train': 2.1167309284210205} +07/25/2024 12:09:21 - INFO - __main__ - Step 9330: {'lr': 0.0004958902294287803, 'samples': 447840, 'steps': 9329, 'loss/train': 1.8667900562286377} +07/25/2024 12:09:22 - INFO - __main__ - Step 9331: {'lr': 0.0004958892794442863, 'samples': 447888, 'steps': 9330, 'loss/train': 1.9334566593170166} +07/25/2024 12:09:22 - INFO - __main__ - Step 9332: {'lr': 0.0004958883293509196, 'samples': 447936, 'steps': 9331, 'loss/train': 1.793734073638916} +07/25/2024 12:09:22 - INFO - __main__ - Step 9333: {'lr': 0.0004958873791486803, 'samples': 447984, 'steps': 9332, 'loss/train': 1.1258735656738281} +07/25/2024 12:09:22 - INFO - __main__ - Step 9334: {'lr': 0.0004958864288375689, 'samples': 448032, 'steps': 9333, 'loss/train': 0.6053615808486938} +07/25/2024 12:09:23 - INFO - __main__ - Step 9335: {'lr': 0.000495885478417586, 'samples': 448080, 'steps': 9334, 'loss/train': 1.50688636302948} +07/25/2024 12:09:23 - INFO - __main__ - Step 9336: {'lr': 0.0004958845278887319, 'samples': 448128, 'steps': 9335, 'loss/train': 2.625988245010376} +07/25/2024 12:09:23 - INFO - __main__ - Step 9337: {'lr': 0.000495883577251007, 'samples': 448176, 'steps': 9336, 'loss/train': 2.1403965950012207} +07/25/2024 12:09:24 - INFO - __main__ - Step 9338: {'lr': 0.0004958826265044117, 'samples': 448224, 'steps': 9337, 'loss/train': 0.852744996547699} +07/25/2024 12:09:24 - INFO - __main__ - Step 9339: {'lr': 0.0004958816756489466, 'samples': 448272, 'steps': 9338, 'loss/train': 1.905159831047058} +07/25/2024 12:09:24 - INFO - __main__ - Step 9340: {'lr': 0.0004958807246846119, 'samples': 448320, 'steps': 9339, 'loss/train': 2.183929204940796} +07/25/2024 12:09:24 - INFO - __main__ - Step 9341: {'lr': 0.0004958797736114081, 'samples': 448368, 'steps': 9340, 'loss/train': 0.9049922823905945} +07/25/2024 12:09:25 - INFO - __main__ - Step 9342: {'lr': 0.0004958788224293356, 'samples': 448416, 'steps': 9341, 'loss/train': 1.8833122253417969} +07/25/2024 12:09:25 - INFO - __main__ - Step 9343: {'lr': 0.0004958778711383949, 'samples': 448464, 'steps': 9342, 'loss/train': 2.555873394012451} +07/25/2024 12:09:25 - INFO - __main__ - Step 9344: {'lr': 0.0004958769197385863, 'samples': 448512, 'steps': 9343, 'loss/train': 1.8269457817077637} +07/25/2024 12:09:26 - INFO - __main__ - Step 9345: {'lr': 0.0004958759682299103, 'samples': 448560, 'steps': 9344, 'loss/train': 1.7054210901260376} +07/25/2024 12:09:26 - INFO - __main__ - Step 9346: {'lr': 0.0004958750166123673, 'samples': 448608, 'steps': 9345, 'loss/train': 1.824919581413269} +07/25/2024 12:09:26 - INFO - __main__ - Step 9347: {'lr': 0.0004958740648859579, 'samples': 448656, 'steps': 9346, 'loss/train': 1.422105073928833} +07/25/2024 12:09:26 - INFO - __main__ - Step 9348: {'lr': 0.0004958731130506822, 'samples': 448704, 'steps': 9347, 'loss/train': 2.228545665740967} +07/25/2024 12:09:27 - INFO - __main__ - Step 9349: {'lr': 0.0004958721611065407, 'samples': 448752, 'steps': 9348, 'loss/train': 1.7393910884857178} +07/25/2024 12:09:27 - INFO - __main__ - Step 9350: {'lr': 0.0004958712090535341, 'samples': 448800, 'steps': 9349, 'loss/train': 2.7649524211883545} +07/25/2024 12:09:27 - INFO - __main__ - Step 9351: {'lr': 0.0004958702568916624, 'samples': 448848, 'steps': 9350, 'loss/train': 1.9432979822158813} +07/25/2024 12:09:28 - INFO - __main__ - Step 9352: {'lr': 0.0004958693046209264, 'samples': 448896, 'steps': 9351, 'loss/train': 1.7830144166946411} +07/25/2024 12:09:28 - INFO - __main__ - Step 9353: {'lr': 0.0004958683522413263, 'samples': 448944, 'steps': 9352, 'loss/train': 1.9393755197525024} +07/25/2024 12:09:28 - INFO - __main__ - Step 9354: {'lr': 0.0004958673997528626, 'samples': 448992, 'steps': 9353, 'loss/train': 2.067319869995117} +07/25/2024 12:09:28 - INFO - __main__ - Step 9355: {'lr': 0.0004958664471555358, 'samples': 449040, 'steps': 9354, 'loss/train': 2.110936164855957} +07/25/2024 12:09:29 - INFO - __main__ - Step 9356: {'lr': 0.000495865494449346, 'samples': 449088, 'steps': 9355, 'loss/train': 2.3040688037872314} +07/25/2024 12:09:29 - INFO - __main__ - Step 9357: {'lr': 0.0004958645416342941, 'samples': 449136, 'steps': 9356, 'loss/train': 1.3896878957748413} +07/25/2024 12:09:29 - INFO - __main__ - Step 9358: {'lr': 0.0004958635887103801, 'samples': 449184, 'steps': 9357, 'loss/train': 0.9285328984260559} +07/25/2024 12:09:30 - INFO - __main__ - Step 9359: {'lr': 0.0004958626356776045, 'samples': 449232, 'steps': 9358, 'loss/train': 1.7922682762145996} +07/25/2024 12:09:30 - INFO - __main__ - Step 9360: {'lr': 0.0004958616825359681, 'samples': 449280, 'steps': 9359, 'loss/train': 1.8906440734863281} +07/25/2024 12:09:30 - INFO - __main__ - Step 9361: {'lr': 0.0004958607292854709, 'samples': 449328, 'steps': 9360, 'loss/train': 2.028689384460449} +07/25/2024 12:09:30 - INFO - __main__ - Step 9362: {'lr': 0.0004958597759261134, 'samples': 449376, 'steps': 9361, 'loss/train': 2.479982614517212} +07/25/2024 12:09:31 - INFO - __main__ - Step 9363: {'lr': 0.0004958588224578961, 'samples': 449424, 'steps': 9362, 'loss/train': 2.2552311420440674} +07/25/2024 12:09:31 - INFO - __main__ - Step 9364: {'lr': 0.0004958578688808194, 'samples': 449472, 'steps': 9363, 'loss/train': 2.226222038269043} +07/25/2024 12:09:31 - INFO - __main__ - Step 9365: {'lr': 0.0004958569151948838, 'samples': 449520, 'steps': 9364, 'loss/train': 0.8334150314331055} +07/25/2024 12:09:31 - INFO - __main__ - Step 9366: {'lr': 0.0004958559614000895, 'samples': 449568, 'steps': 9365, 'loss/train': 2.3782801628112793} +07/25/2024 12:09:32 - INFO - __main__ - Step 9367: {'lr': 0.0004958550074964373, 'samples': 449616, 'steps': 9366, 'loss/train': 2.1372017860412598} +07/25/2024 12:09:32 - INFO - __main__ - Step 9368: {'lr': 0.0004958540534839272, 'samples': 449664, 'steps': 9367, 'loss/train': 3.019420623779297} +07/25/2024 12:09:32 - INFO - __main__ - Step 9369: {'lr': 0.0004958530993625599, 'samples': 449712, 'steps': 9368, 'loss/train': 1.3547704219818115} +07/25/2024 12:09:33 - INFO - __main__ - Step 9370: {'lr': 0.0004958521451323357, 'samples': 449760, 'steps': 9369, 'loss/train': 2.0602927207946777} +07/25/2024 12:09:33 - INFO - __main__ - Step 9371: {'lr': 0.0004958511907932551, 'samples': 449808, 'steps': 9370, 'loss/train': 1.2268345355987549} +07/25/2024 12:09:33 - INFO - __main__ - Step 9372: {'lr': 0.0004958502363453185, 'samples': 449856, 'steps': 9371, 'loss/train': 2.287095308303833} +07/25/2024 12:09:33 - INFO - __main__ - Step 9373: {'lr': 0.0004958492817885261, 'samples': 449904, 'steps': 9372, 'loss/train': 1.9129287004470825} +07/25/2024 12:09:34 - INFO - __main__ - Step 9374: {'lr': 0.0004958483271228787, 'samples': 449952, 'steps': 9373, 'loss/train': 5.550621509552002} +07/25/2024 12:09:34 - INFO - __main__ - Step 9375: {'lr': 0.0004958473723483765, 'samples': 450000, 'steps': 9374, 'loss/train': 1.8867683410644531} +07/25/2024 12:09:34 - INFO - __main__ - Step 9376: {'lr': 0.0004958464174650201, 'samples': 450048, 'steps': 9375, 'loss/train': 1.9626187086105347} +07/25/2024 12:09:35 - INFO - __main__ - Step 9377: {'lr': 0.0004958454624728097, 'samples': 450096, 'steps': 9376, 'loss/train': 2.1275765895843506} +07/25/2024 12:09:35 - INFO - __main__ - Step 9378: {'lr': 0.0004958445073717458, 'samples': 450144, 'steps': 9377, 'loss/train': 2.604121208190918} +07/25/2024 12:09:35 - INFO - __main__ - Step 9379: {'lr': 0.0004958435521618289, 'samples': 450192, 'steps': 9378, 'loss/train': 2.1960067749023438} +07/25/2024 12:09:35 - INFO - __main__ - Step 9380: {'lr': 0.0004958425968430592, 'samples': 450240, 'steps': 9379, 'loss/train': 1.7048077583312988} +07/25/2024 12:09:36 - INFO - __main__ - Step 9381: {'lr': 0.0004958416414154374, 'samples': 450288, 'steps': 9380, 'loss/train': 1.604193925857544} +07/25/2024 12:09:36 - INFO - __main__ - Step 9382: {'lr': 0.0004958406858789639, 'samples': 450336, 'steps': 9381, 'loss/train': 0.9385942220687866} +07/25/2024 12:09:36 - INFO - __main__ - Step 9383: {'lr': 0.0004958397302336389, 'samples': 450384, 'steps': 9382, 'loss/train': 2.380828619003296} +07/25/2024 12:09:37 - INFO - __main__ - Step 9384: {'lr': 0.0004958387744794631, 'samples': 450432, 'steps': 9383, 'loss/train': 1.7600855827331543} +07/25/2024 12:09:37 - INFO - __main__ - Step 9385: {'lr': 0.0004958378186164366, 'samples': 450480, 'steps': 9384, 'loss/train': 2.58146595954895} +07/25/2024 12:09:37 - INFO - __main__ - Step 9386: {'lr': 0.0004958368626445601, 'samples': 450528, 'steps': 9385, 'loss/train': 2.126692533493042} +07/25/2024 12:09:37 - INFO - __main__ - Step 9387: {'lr': 0.0004958359065638339, 'samples': 450576, 'steps': 9386, 'loss/train': 2.180220127105713} +07/25/2024 12:09:38 - INFO - __main__ - Step 9388: {'lr': 0.0004958349503742584, 'samples': 450624, 'steps': 9387, 'loss/train': 2.072544574737549} +07/25/2024 12:09:38 - INFO - __main__ - Step 9389: {'lr': 0.0004958339940758342, 'samples': 450672, 'steps': 9388, 'loss/train': 0.5766874551773071} +07/25/2024 12:09:38 - INFO - __main__ - Step 9390: {'lr': 0.0004958330376685615, 'samples': 450720, 'steps': 9389, 'loss/train': 2.483621120452881} +07/25/2024 12:09:39 - INFO - __main__ - Step 9391: {'lr': 0.0004958320811524409, 'samples': 450768, 'steps': 9390, 'loss/train': 1.402299404144287} +07/25/2024 12:09:39 - INFO - __main__ - Step 9392: {'lr': 0.0004958311245274726, 'samples': 450816, 'steps': 9391, 'loss/train': 3.302762746810913} +07/25/2024 12:09:39 - INFO - __main__ - Step 9393: {'lr': 0.0004958301677936573, 'samples': 450864, 'steps': 9392, 'loss/train': 0.7783350944519043} +07/25/2024 12:09:39 - INFO - __main__ - Step 9394: {'lr': 0.0004958292109509952, 'samples': 450912, 'steps': 9393, 'loss/train': 1.7083958387374878} +07/25/2024 12:09:40 - INFO - __main__ - Step 9395: {'lr': 0.0004958282539994869, 'samples': 450960, 'steps': 9394, 'loss/train': 1.5172568559646606} +07/25/2024 12:09:40 - INFO - __main__ - Step 9396: {'lr': 0.0004958272969391327, 'samples': 451008, 'steps': 9395, 'loss/train': 2.218412160873413} +07/25/2024 12:09:40 - INFO - __main__ - Step 9397: {'lr': 0.000495826339769933, 'samples': 451056, 'steps': 9396, 'loss/train': 1.7068259716033936} +07/25/2024 12:09:41 - INFO - __main__ - Step 9398: {'lr': 0.0004958253824918884, 'samples': 451104, 'steps': 9397, 'loss/train': 5.3710246086120605} +07/25/2024 12:09:41 - INFO - __main__ - Step 9399: {'lr': 0.0004958244251049991, 'samples': 451152, 'steps': 9398, 'loss/train': 1.7096918821334839} +07/25/2024 12:09:41 - INFO - __main__ - Step 9400: {'lr': 0.0004958234676092657, 'samples': 451200, 'steps': 9399, 'loss/train': 2.17216157913208} +07/25/2024 12:09:41 - INFO - __main__ - Step 9401: {'lr': 0.0004958225100046886, 'samples': 451248, 'steps': 9400, 'loss/train': 2.0514538288116455} +07/25/2024 12:09:42 - INFO - __main__ - Step 9402: {'lr': 0.000495821552291268, 'samples': 451296, 'steps': 9401, 'loss/train': 2.259000062942505} +07/25/2024 12:09:42 - INFO - __main__ - Step 9403: {'lr': 0.0004958205944690047, 'samples': 451344, 'steps': 9402, 'loss/train': 1.840285062789917} +07/25/2024 12:09:42 - INFO - __main__ - Step 9404: {'lr': 0.0004958196365378989, 'samples': 451392, 'steps': 9403, 'loss/train': 1.9789806604385376} +07/25/2024 12:09:43 - INFO - __main__ - Step 9405: {'lr': 0.0004958186784979509, 'samples': 451440, 'steps': 9404, 'loss/train': 2.3308968544006348} +07/25/2024 12:09:43 - INFO - __main__ - Step 9406: {'lr': 0.0004958177203491614, 'samples': 451488, 'steps': 9405, 'loss/train': 0.6177344918251038} +07/25/2024 12:09:43 - INFO - __main__ - Step 9407: {'lr': 0.0004958167620915307, 'samples': 451536, 'steps': 9406, 'loss/train': 2.021764039993286} +07/25/2024 12:09:43 - INFO - __main__ - Step 9408: {'lr': 0.0004958158037250592, 'samples': 451584, 'steps': 9407, 'loss/train': 1.8712249994277954} +07/25/2024 12:09:44 - INFO - __main__ - Step 9409: {'lr': 0.0004958148452497474, 'samples': 451632, 'steps': 9408, 'loss/train': 2.1466386318206787} +07/25/2024 12:09:44 - INFO - __main__ - Step 9410: {'lr': 0.0004958138866655956, 'samples': 451680, 'steps': 9409, 'loss/train': 2.0211875438690186} +07/25/2024 12:09:44 - INFO - __main__ - Step 9411: {'lr': 0.0004958129279726043, 'samples': 451728, 'steps': 9410, 'loss/train': 1.9801645278930664} +07/25/2024 12:09:45 - INFO - __main__ - Step 9412: {'lr': 0.000495811969170774, 'samples': 451776, 'steps': 9411, 'loss/train': 2.0598456859588623} +07/25/2024 12:09:45 - INFO - __main__ - Step 9413: {'lr': 0.000495811010260105, 'samples': 451824, 'steps': 9412, 'loss/train': 0.8944133520126343} +07/25/2024 12:09:45 - INFO - __main__ - Step 9414: {'lr': 0.0004958100512405978, 'samples': 451872, 'steps': 9413, 'loss/train': 1.7313967943191528} +07/25/2024 12:09:45 - INFO - __main__ - Step 9415: {'lr': 0.0004958090921122527, 'samples': 451920, 'steps': 9414, 'loss/train': 2.003472328186035} +07/25/2024 12:09:46 - INFO - __main__ - Step 9416: {'lr': 0.0004958081328750704, 'samples': 451968, 'steps': 9415, 'loss/train': 2.44291090965271} +07/25/2024 12:09:46 - INFO - __main__ - Step 9417: {'lr': 0.000495807173529051, 'samples': 452016, 'steps': 9416, 'loss/train': 2.2276363372802734} +07/25/2024 12:09:46 - INFO - __main__ - Step 9418: {'lr': 0.000495806214074195, 'samples': 452064, 'steps': 9417, 'loss/train': 1.3098297119140625} +07/25/2024 12:09:47 - INFO - __main__ - Step 9419: {'lr': 0.000495805254510503, 'samples': 452112, 'steps': 9418, 'loss/train': 1.558548092842102} +07/25/2024 12:09:47 - INFO - __main__ - Step 9420: {'lr': 0.0004958042948379754, 'samples': 452160, 'steps': 9419, 'loss/train': 1.8751940727233887} +07/25/2024 12:09:47 - INFO - __main__ - Step 9421: {'lr': 0.0004958033350566125, 'samples': 452208, 'steps': 9420, 'loss/train': 1.737663745880127} +07/25/2024 12:09:47 - INFO - __main__ - Step 9422: {'lr': 0.0004958023751664147, 'samples': 452256, 'steps': 9421, 'loss/train': 4.4541168212890625} +07/25/2024 12:09:48 - INFO - __main__ - Step 9423: {'lr': 0.0004958014151673825, 'samples': 452304, 'steps': 9422, 'loss/train': 2.099142074584961} +07/25/2024 12:09:48 - INFO - __main__ - Step 9424: {'lr': 0.0004958004550595163, 'samples': 452352, 'steps': 9423, 'loss/train': 1.937382698059082} +07/25/2024 12:09:48 - INFO - __main__ - Step 9425: {'lr': 0.0004957994948428167, 'samples': 452400, 'steps': 9424, 'loss/train': 1.8899569511413574} +07/25/2024 12:09:49 - INFO - __main__ - Step 9426: {'lr': 0.0004957985345172838, 'samples': 452448, 'steps': 9425, 'loss/train': 2.0437076091766357} +07/25/2024 12:09:49 - INFO - __main__ - Step 9427: {'lr': 0.0004957975740829183, 'samples': 452496, 'steps': 9426, 'loss/train': 2.0767812728881836} +07/25/2024 12:09:49 - INFO - __main__ - Step 9428: {'lr': 0.0004957966135397205, 'samples': 452544, 'steps': 9427, 'loss/train': 2.0372543334960938} +07/25/2024 12:09:49 - INFO - __main__ - Step 9429: {'lr': 0.0004957956528876908, 'samples': 452592, 'steps': 9428, 'loss/train': 2.264528751373291} +07/25/2024 12:09:50 - INFO - __main__ - Step 9430: {'lr': 0.0004957946921268297, 'samples': 452640, 'steps': 9429, 'loss/train': 0.3265307545661926} +07/25/2024 12:09:50 - INFO - __main__ - Step 9431: {'lr': 0.0004957937312571376, 'samples': 452688, 'steps': 9430, 'loss/train': 2.2481689453125} +07/25/2024 12:09:50 - INFO - __main__ - Step 9432: {'lr': 0.000495792770278615, 'samples': 452736, 'steps': 9431, 'loss/train': 2.10805606842041} +07/25/2024 12:09:50 - INFO - __main__ - Step 9433: {'lr': 0.0004957918091912622, 'samples': 452784, 'steps': 9432, 'loss/train': 1.9699342250823975} +07/25/2024 12:09:51 - INFO - __main__ - Step 9434: {'lr': 0.0004957908479950796, 'samples': 452832, 'steps': 9433, 'loss/train': 1.7402770519256592} +07/25/2024 12:09:51 - INFO - __main__ - Step 9435: {'lr': 0.0004957898866900678, 'samples': 452880, 'steps': 9434, 'loss/train': 0.9839634895324707} +07/25/2024 12:09:51 - INFO - __main__ - Step 9436: {'lr': 0.000495788925276227, 'samples': 452928, 'steps': 9435, 'loss/train': 1.0549945831298828} +07/25/2024 12:09:52 - INFO - __main__ - Step 9437: {'lr': 0.0004957879637535578, 'samples': 452976, 'steps': 9436, 'loss/train': 1.0087734460830688} +07/25/2024 12:09:52 - INFO - __main__ - Step 9438: {'lr': 0.0004957870021220606, 'samples': 453024, 'steps': 9437, 'loss/train': 1.7939209938049316} +07/25/2024 12:09:52 - INFO - __main__ - Step 9439: {'lr': 0.0004957860403817358, 'samples': 453072, 'steps': 9438, 'loss/train': 1.427046298980713} +07/25/2024 12:09:52 - INFO - __main__ - Step 9440: {'lr': 0.000495785078532584, 'samples': 453120, 'steps': 9439, 'loss/train': 1.9548509120941162} +07/25/2024 12:09:53 - INFO - __main__ - Step 9441: {'lr': 0.0004957841165746052, 'samples': 453168, 'steps': 9440, 'loss/train': 2.027686595916748} +07/25/2024 12:09:53 - INFO - __main__ - Step 9442: {'lr': 0.0004957831545078002, 'samples': 453216, 'steps': 9441, 'loss/train': 1.3841756582260132} +07/25/2024 12:09:53 - INFO - __main__ - Step 9443: {'lr': 0.0004957821923321692, 'samples': 453264, 'steps': 9442, 'loss/train': 1.7822840213775635} +07/25/2024 12:09:54 - INFO - __main__ - Step 9444: {'lr': 0.0004957812300477128, 'samples': 453312, 'steps': 9443, 'loss/train': 1.428523063659668} +07/25/2024 12:09:54 - INFO - __main__ - Step 9445: {'lr': 0.0004957802676544314, 'samples': 453360, 'steps': 9444, 'loss/train': 2.1746304035186768} +07/25/2024 12:09:54 - INFO - __main__ - Step 9446: {'lr': 0.0004957793051523253, 'samples': 453408, 'steps': 9445, 'loss/train': 1.8655651807785034} +07/25/2024 12:09:54 - INFO - __main__ - Step 9447: {'lr': 0.0004957783425413951, 'samples': 453456, 'steps': 9446, 'loss/train': 1.8318616151809692} +07/25/2024 12:09:55 - INFO - __main__ - Step 9448: {'lr': 0.000495777379821641, 'samples': 453504, 'steps': 9447, 'loss/train': 2.6377007961273193} +07/25/2024 12:09:55 - INFO - __main__ - Step 9449: {'lr': 0.0004957764169930637, 'samples': 453552, 'steps': 9448, 'loss/train': 1.5126569271087646} +07/25/2024 12:09:55 - INFO - __main__ - Step 9450: {'lr': 0.0004957754540556635, 'samples': 453600, 'steps': 9449, 'loss/train': 2.1098685264587402} +07/25/2024 12:09:56 - INFO - __main__ - Step 9451: {'lr': 0.0004957744910094407, 'samples': 453648, 'steps': 9450, 'loss/train': 2.479363203048706} +07/25/2024 12:09:56 - INFO - __main__ - Step 9452: {'lr': 0.000495773527854396, 'samples': 453696, 'steps': 9451, 'loss/train': 1.829223871231079} +07/25/2024 12:09:56 - INFO - __main__ - Step 9453: {'lr': 0.0004957725645905295, 'samples': 453744, 'steps': 9452, 'loss/train': 2.0775043964385986} +07/25/2024 12:09:56 - INFO - __main__ - Step 9454: {'lr': 0.0004957716012178419, 'samples': 453792, 'steps': 9453, 'loss/train': 0.3933413624763489} +07/25/2024 12:09:57 - INFO - __main__ - Step 9455: {'lr': 0.0004957706377363334, 'samples': 453840, 'steps': 9454, 'loss/train': 1.7214285135269165} +07/25/2024 12:09:57 - INFO - __main__ - Step 9456: {'lr': 0.0004957696741460047, 'samples': 453888, 'steps': 9455, 'loss/train': 2.05952787399292} +07/25/2024 12:09:57 - INFO - __main__ - Step 9457: {'lr': 0.000495768710446856, 'samples': 453936, 'steps': 9456, 'loss/train': 2.293614625930786} +07/25/2024 12:09:58 - INFO - __main__ - Step 9458: {'lr': 0.0004957677466388878, 'samples': 453984, 'steps': 9457, 'loss/train': 2.2523715496063232} +07/25/2024 12:09:58 - INFO - __main__ - Step 9459: {'lr': 0.0004957667827221006, 'samples': 454032, 'steps': 9458, 'loss/train': 1.2807238101959229} +07/25/2024 12:09:58 - INFO - __main__ - Step 9460: {'lr': 0.0004957658186964947, 'samples': 454080, 'steps': 9459, 'loss/train': 1.5838496685028076} +07/25/2024 12:09:58 - INFO - __main__ - Step 9461: {'lr': 0.0004957648545620705, 'samples': 454128, 'steps': 9460, 'loss/train': 1.5599172115325928} +07/25/2024 12:09:59 - INFO - __main__ - Step 9462: {'lr': 0.0004957638903188286, 'samples': 454176, 'steps': 9461, 'loss/train': 1.9628396034240723} +07/25/2024 12:09:59 - INFO - __main__ - Step 9463: {'lr': 0.0004957629259667694, 'samples': 454224, 'steps': 9462, 'loss/train': 1.6510748863220215} +07/25/2024 12:09:59 - INFO - __main__ - Step 9464: {'lr': 0.0004957619615058932, 'samples': 454272, 'steps': 9463, 'loss/train': 1.6695489883422852} +07/25/2024 12:10:00 - INFO - __main__ - Step 9465: {'lr': 0.0004957609969362005, 'samples': 454320, 'steps': 9464, 'loss/train': 2.6679799556732178} +07/25/2024 12:10:00 - INFO - __main__ - Step 9466: {'lr': 0.0004957600322576916, 'samples': 454368, 'steps': 9465, 'loss/train': 1.7117867469787598} +07/25/2024 12:10:00 - INFO - __main__ - Step 9467: {'lr': 0.0004957590674703671, 'samples': 454416, 'steps': 9466, 'loss/train': 0.9552178978919983} +07/25/2024 12:10:00 - INFO - __main__ - Step 9468: {'lr': 0.0004957581025742274, 'samples': 454464, 'steps': 9467, 'loss/train': 1.8232618570327759} +07/25/2024 12:10:01 - INFO - __main__ - Step 9469: {'lr': 0.000495757137569273, 'samples': 454512, 'steps': 9468, 'loss/train': 1.8320950269699097} +07/25/2024 12:10:01 - INFO - __main__ - Step 9470: {'lr': 0.0004957561724555041, 'samples': 454560, 'steps': 9469, 'loss/train': 2.415015935897827} +07/25/2024 12:10:01 - INFO - __main__ - Step 9471: {'lr': 0.0004957552072329213, 'samples': 454608, 'steps': 9470, 'loss/train': 2.2553718090057373} +07/25/2024 12:10:02 - INFO - __main__ - Step 9472: {'lr': 0.000495754241901525, 'samples': 454656, 'steps': 9471, 'loss/train': 1.9351507425308228} +07/25/2024 12:10:02 - INFO - __main__ - Step 9473: {'lr': 0.0004957532764613155, 'samples': 454704, 'steps': 9472, 'loss/train': 1.661370038986206} +07/25/2024 12:10:02 - INFO - __main__ - Step 9474: {'lr': 0.0004957523109122934, 'samples': 454752, 'steps': 9473, 'loss/train': 2.166264533996582} +07/25/2024 12:10:02 - INFO - __main__ - Step 9475: {'lr': 0.000495751345254459, 'samples': 454800, 'steps': 9474, 'loss/train': 2.3650567531585693} +07/25/2024 12:10:03 - INFO - __main__ - Step 9476: {'lr': 0.0004957503794878129, 'samples': 454848, 'steps': 9475, 'loss/train': 2.339761734008789} +07/25/2024 12:10:03 - INFO - __main__ - Step 9477: {'lr': 0.0004957494136123554, 'samples': 454896, 'steps': 9476, 'loss/train': 1.8073781728744507} +07/25/2024 12:10:03 - INFO - __main__ - Step 9478: {'lr': 0.0004957484476280869, 'samples': 454944, 'steps': 9477, 'loss/train': 0.365135133266449} +07/25/2024 12:10:04 - INFO - __main__ - Step 9479: {'lr': 0.0004957474815350079, 'samples': 454992, 'steps': 9478, 'loss/train': 1.3484108448028564} +07/25/2024 12:10:04 - INFO - __main__ - Step 9480: {'lr': 0.0004957465153331188, 'samples': 455040, 'steps': 9479, 'loss/train': 2.369847297668457} +07/25/2024 12:10:04 - INFO - __main__ - Step 9481: {'lr': 0.00049574554902242, 'samples': 455088, 'steps': 9480, 'loss/train': 1.685644268989563} +07/25/2024 12:10:04 - INFO - __main__ - Step 9482: {'lr': 0.000495744582602912, 'samples': 455136, 'steps': 9481, 'loss/train': 2.0320639610290527} +07/25/2024 12:10:05 - INFO - __main__ - Step 9483: {'lr': 0.0004957436160745952, 'samples': 455184, 'steps': 9482, 'loss/train': 1.9739761352539062} +07/25/2024 12:10:05 - INFO - __main__ - Step 9484: {'lr': 0.0004957426494374699, 'samples': 455232, 'steps': 9483, 'loss/train': 2.7382595539093018} +07/25/2024 12:10:05 - INFO - __main__ - Step 9485: {'lr': 0.0004957416826915367, 'samples': 455280, 'steps': 9484, 'loss/train': 1.6732850074768066} +07/25/2024 12:10:06 - INFO - __main__ - Step 9486: {'lr': 0.000495740715836796, 'samples': 455328, 'steps': 9485, 'loss/train': 1.9176647663116455} +07/25/2024 12:10:06 - INFO - __main__ - Step 9487: {'lr': 0.0004957397488732483, 'samples': 455376, 'steps': 9486, 'loss/train': 1.4824072122573853} +07/25/2024 12:10:06 - INFO - __main__ - Step 9488: {'lr': 0.0004957387818008938, 'samples': 455424, 'steps': 9487, 'loss/train': 1.316235899925232} +07/25/2024 12:10:06 - INFO - __main__ - Step 9489: {'lr': 0.000495737814619733, 'samples': 455472, 'steps': 9488, 'loss/train': 2.3516769409179688} +07/25/2024 12:10:07 - INFO - __main__ - Step 9490: {'lr': 0.0004957368473297664, 'samples': 455520, 'steps': 9489, 'loss/train': 1.8660426139831543} +07/25/2024 12:10:07 - INFO - __main__ - Step 9491: {'lr': 0.0004957358799309945, 'samples': 455568, 'steps': 9490, 'loss/train': 0.21019645035266876} +07/25/2024 12:10:07 - INFO - __main__ - Step 9492: {'lr': 0.0004957349124234176, 'samples': 455616, 'steps': 9491, 'loss/train': 1.9058785438537598} +07/25/2024 12:10:08 - INFO - __main__ - Step 9493: {'lr': 0.0004957339448070361, 'samples': 455664, 'steps': 9492, 'loss/train': 1.6563026905059814} +07/25/2024 12:10:08 - INFO - __main__ - Step 9494: {'lr': 0.0004957329770818506, 'samples': 455712, 'steps': 9493, 'loss/train': 2.5999157428741455} +07/25/2024 12:10:08 - INFO - __main__ - Step 9495: {'lr': 0.0004957320092478614, 'samples': 455760, 'steps': 9494, 'loss/train': 2.0171220302581787} +07/25/2024 12:10:08 - INFO - __main__ - Step 9496: {'lr': 0.0004957310413050689, 'samples': 455808, 'steps': 9495, 'loss/train': 1.20278000831604} +07/25/2024 12:10:09 - INFO - __main__ - Step 9497: {'lr': 0.0004957300732534736, 'samples': 455856, 'steps': 9496, 'loss/train': 1.6363760232925415} +07/25/2024 12:10:09 - INFO - __main__ - Step 9498: {'lr': 0.0004957291050930759, 'samples': 455904, 'steps': 9497, 'loss/train': 2.202735424041748} +07/25/2024 12:10:09 - INFO - __main__ - Step 9499: {'lr': 0.0004957281368238763, 'samples': 455952, 'steps': 9498, 'loss/train': 2.105135440826416} +07/25/2024 12:10:09 - INFO - __main__ - Step 9500: {'lr': 0.000495727168445875, 'samples': 456000, 'steps': 9499, 'loss/train': 2.0372397899627686} +07/25/2024 12:10:10 - INFO - __main__ - Step 9501: {'lr': 0.0004957261999590728, 'samples': 456048, 'steps': 9500, 'loss/train': 1.6483687162399292} +07/25/2024 12:10:10 - INFO - __main__ - Step 9502: {'lr': 0.0004957252313634699, 'samples': 456096, 'steps': 9501, 'loss/train': 0.2969144284725189} +07/25/2024 12:10:10 - INFO - __main__ - Step 9503: {'lr': 0.0004957242626590666, 'samples': 456144, 'steps': 9502, 'loss/train': 2.114901304244995} +07/25/2024 12:10:11 - INFO - __main__ - Step 9504: {'lr': 0.0004957232938458635, 'samples': 456192, 'steps': 9503, 'loss/train': 1.9749038219451904} +07/25/2024 12:10:11 - INFO - __main__ - Step 9505: {'lr': 0.0004957223249238611, 'samples': 456240, 'steps': 9504, 'loss/train': 1.8773894309997559} +07/25/2024 12:10:11 - INFO - __main__ - Step 9506: {'lr': 0.0004957213558930598, 'samples': 456288, 'steps': 9505, 'loss/train': 2.272397994995117} +07/25/2024 12:10:11 - INFO - __main__ - Step 9507: {'lr': 0.0004957203867534598, 'samples': 456336, 'steps': 9506, 'loss/train': 0.4947589337825775} +07/25/2024 12:10:12 - INFO - __main__ - Step 9508: {'lr': 0.0004957194175050619, 'samples': 456384, 'steps': 9507, 'loss/train': 1.884015440940857} +07/25/2024 12:10:12 - INFO - __main__ - Step 9509: {'lr': 0.0004957184481478662, 'samples': 456432, 'steps': 9508, 'loss/train': 1.4937893152236938} +07/25/2024 12:10:12 - INFO - __main__ - Step 9510: {'lr': 0.0004957174786818732, 'samples': 456480, 'steps': 9509, 'loss/train': 2.5540363788604736} +07/25/2024 12:10:13 - INFO - __main__ - Step 9511: {'lr': 0.0004957165091070835, 'samples': 456528, 'steps': 9510, 'loss/train': 1.8019061088562012} +07/25/2024 12:10:13 - INFO - __main__ - Step 9512: {'lr': 0.0004957155394234974, 'samples': 456576, 'steps': 9511, 'loss/train': 2.6821775436401367} +07/25/2024 12:10:13 - INFO - __main__ - Step 9513: {'lr': 0.0004957145696311152, 'samples': 456624, 'steps': 9512, 'loss/train': 1.7488901615142822} +07/25/2024 12:10:13 - INFO - __main__ - Step 9514: {'lr': 0.0004957135997299376, 'samples': 456672, 'steps': 9513, 'loss/train': 2.790544271469116} +07/25/2024 12:10:14 - INFO - __main__ - Step 9515: {'lr': 0.000495712629719965, 'samples': 456720, 'steps': 9514, 'loss/train': 0.17653648555278778} +07/25/2024 12:10:14 - INFO - __main__ - Step 9516: {'lr': 0.0004957116596011976, 'samples': 456768, 'steps': 9515, 'loss/train': 1.8105872869491577} +07/25/2024 12:10:14 - INFO - __main__ - Step 9517: {'lr': 0.000495710689373636, 'samples': 456816, 'steps': 9516, 'loss/train': 1.3984469175338745} +07/25/2024 12:10:15 - INFO - __main__ - Step 9518: {'lr': 0.0004957097190372806, 'samples': 456864, 'steps': 9517, 'loss/train': 1.4500527381896973} +07/25/2024 12:10:15 - INFO - __main__ - Step 9519: {'lr': 0.0004957087485921317, 'samples': 456912, 'steps': 9518, 'loss/train': 1.1350377798080444} +07/25/2024 12:10:15 - INFO - __main__ - Step 9520: {'lr': 0.00049570777803819, 'samples': 456960, 'steps': 9519, 'loss/train': 1.200137972831726} +07/25/2024 12:10:15 - INFO - __main__ - Step 9521: {'lr': 0.0004957068073754557, 'samples': 457008, 'steps': 9520, 'loss/train': 2.240831136703491} +07/25/2024 12:10:16 - INFO - __main__ - Step 9522: {'lr': 0.0004957058366039293, 'samples': 457056, 'steps': 9521, 'loss/train': 1.9253969192504883} +07/25/2024 12:10:16 - INFO - __main__ - Step 9523: {'lr': 0.0004957048657236113, 'samples': 457104, 'steps': 9522, 'loss/train': 2.7682602405548096} +07/25/2024 12:10:16 - INFO - __main__ - Step 9524: {'lr': 0.0004957038947345021, 'samples': 457152, 'steps': 9523, 'loss/train': 2.1233232021331787} +07/25/2024 12:10:17 - INFO - __main__ - Step 9525: {'lr': 0.000495702923636602, 'samples': 457200, 'steps': 9524, 'loss/train': 1.7031999826431274} +07/25/2024 12:10:17 - INFO - __main__ - Step 9526: {'lr': 0.0004957019524299116, 'samples': 457248, 'steps': 9525, 'loss/train': 0.312850683927536} +07/25/2024 12:10:17 - INFO - __main__ - Step 9527: {'lr': 0.0004957009811144312, 'samples': 457296, 'steps': 9526, 'loss/train': 2.0187582969665527} +07/25/2024 12:10:17 - INFO - __main__ - Step 9528: {'lr': 0.0004957000096901614, 'samples': 457344, 'steps': 9527, 'loss/train': 1.9165509939193726} +07/25/2024 12:10:18 - INFO - __main__ - Step 9529: {'lr': 0.0004956990381571024, 'samples': 457392, 'steps': 9528, 'loss/train': 1.9057855606079102} +07/25/2024 12:10:18 - INFO - __main__ - Step 9530: {'lr': 0.0004956980665152548, 'samples': 457440, 'steps': 9529, 'loss/train': 1.6479401588439941} +07/25/2024 12:10:18 - INFO - __main__ - Step 9531: {'lr': 0.000495697094764619, 'samples': 457488, 'steps': 9530, 'loss/train': 0.6655592918395996} +07/25/2024 12:10:19 - INFO - __main__ - Step 9532: {'lr': 0.0004956961229051954, 'samples': 457536, 'steps': 9531, 'loss/train': 1.667885422706604} +07/25/2024 12:10:19 - INFO - __main__ - Step 9533: {'lr': 0.0004956951509369845, 'samples': 457584, 'steps': 9532, 'loss/train': 1.857228398323059} +07/25/2024 12:10:19 - INFO - __main__ - Step 9534: {'lr': 0.0004956941788599865, 'samples': 457632, 'steps': 9533, 'loss/train': 2.1517257690429688} +07/25/2024 12:10:19 - INFO - __main__ - Step 9535: {'lr': 0.0004956932066742021, 'samples': 457680, 'steps': 9534, 'loss/train': 1.9684643745422363} +07/25/2024 12:10:20 - INFO - __main__ - Step 9536: {'lr': 0.0004956922343796316, 'samples': 457728, 'steps': 9535, 'loss/train': 2.1567466259002686} +07/25/2024 12:10:20 - INFO - __main__ - Step 9537: {'lr': 0.0004956912619762756, 'samples': 457776, 'steps': 9536, 'loss/train': 2.063171625137329} +07/25/2024 12:10:20 - INFO - __main__ - Step 9538: {'lr': 0.0004956902894641342, 'samples': 457824, 'steps': 9537, 'loss/train': 2.3528246879577637} +07/25/2024 12:10:21 - INFO - __main__ - Step 9539: {'lr': 0.0004956893168432081, 'samples': 457872, 'steps': 9538, 'loss/train': 0.17405366897583008} +07/25/2024 12:10:21 - INFO - __main__ - Step 9540: {'lr': 0.0004956883441134978, 'samples': 457920, 'steps': 9539, 'loss/train': 1.8798670768737793} +07/25/2024 12:10:21 - INFO - __main__ - Step 9541: {'lr': 0.0004956873712750034, 'samples': 457968, 'steps': 9540, 'loss/train': 2.042482852935791} +07/25/2024 12:10:21 - INFO - __main__ - Step 9542: {'lr': 0.0004956863983277256, 'samples': 458016, 'steps': 9541, 'loss/train': 0.1644950956106186} +07/25/2024 12:10:22 - INFO - __main__ - Step 9543: {'lr': 0.0004956854252716647, 'samples': 458064, 'steps': 9542, 'loss/train': 2.1537880897521973} +07/25/2024 12:10:22 - INFO - __main__ - Step 9544: {'lr': 0.0004956844521068212, 'samples': 458112, 'steps': 9543, 'loss/train': 1.0619518756866455} +07/25/2024 12:10:22 - INFO - __main__ - Step 9545: {'lr': 0.0004956834788331956, 'samples': 458160, 'steps': 9544, 'loss/train': 2.221383571624756} +07/25/2024 12:10:23 - INFO - __main__ - Step 9546: {'lr': 0.0004956825054507881, 'samples': 458208, 'steps': 9545, 'loss/train': 2.4383537769317627} +07/25/2024 12:10:23 - INFO - __main__ - Step 9547: {'lr': 0.0004956815319595993, 'samples': 458256, 'steps': 9546, 'loss/train': 2.299696445465088} +07/25/2024 12:10:23 - INFO - __main__ - Step 9548: {'lr': 0.0004956805583596296, 'samples': 458304, 'steps': 9547, 'loss/train': 2.0552847385406494} +07/25/2024 12:10:23 - INFO - __main__ - Step 9549: {'lr': 0.0004956795846508794, 'samples': 458352, 'steps': 9548, 'loss/train': 1.9184637069702148} +07/25/2024 12:10:24 - INFO - __main__ - Step 9550: {'lr': 0.0004956786108333492, 'samples': 458400, 'steps': 9549, 'loss/train': 0.721714198589325} +07/25/2024 12:10:24 - INFO - __main__ - Step 9551: {'lr': 0.0004956776369070395, 'samples': 458448, 'steps': 9550, 'loss/train': 1.8898037672042847} +07/25/2024 12:10:24 - INFO - __main__ - Step 9552: {'lr': 0.0004956766628719505, 'samples': 458496, 'steps': 9551, 'loss/train': 1.2933213710784912} +07/25/2024 12:10:25 - INFO - __main__ - Step 9553: {'lr': 0.0004956756887280828, 'samples': 458544, 'steps': 9552, 'loss/train': 1.9804046154022217} +07/25/2024 12:10:25 - INFO - __main__ - Step 9554: {'lr': 0.0004956747144754368, 'samples': 458592, 'steps': 9553, 'loss/train': 2.6749489307403564} +07/25/2024 12:10:25 - INFO - __main__ - Step 9555: {'lr': 0.0004956737401140129, 'samples': 458640, 'steps': 9554, 'loss/train': 2.378305435180664} +07/25/2024 12:10:25 - INFO - __main__ - Step 9556: {'lr': 0.0004956727656438114, 'samples': 458688, 'steps': 9555, 'loss/train': 2.5604970455169678} +07/25/2024 12:10:26 - INFO - __main__ - Step 9557: {'lr': 0.0004956717910648332, 'samples': 458736, 'steps': 9556, 'loss/train': 2.022226095199585} +07/25/2024 12:10:26 - INFO - __main__ - Step 9558: {'lr': 0.0004956708163770783, 'samples': 458784, 'steps': 9557, 'loss/train': 1.873953938484192} +07/25/2024 12:10:26 - INFO - __main__ - Step 9559: {'lr': 0.000495669841580547, 'samples': 458832, 'steps': 9558, 'loss/train': 2.0267486572265625} +07/25/2024 12:10:27 - INFO - __main__ - Step 9560: {'lr': 0.0004956688666752403, 'samples': 458880, 'steps': 9559, 'loss/train': 2.1668357849121094} +07/25/2024 12:10:27 - INFO - __main__ - Step 9561: {'lr': 0.0004956678916611582, 'samples': 458928, 'steps': 9560, 'loss/train': 2.8421051502227783} +07/25/2024 12:10:27 - INFO - __main__ - Step 9562: {'lr': 0.0004956669165383012, 'samples': 458976, 'steps': 9561, 'loss/train': 1.9905240535736084} +07/25/2024 12:10:27 - INFO - __main__ - Step 9563: {'lr': 0.0004956659413066697, 'samples': 459024, 'steps': 9562, 'loss/train': 0.14287737011909485} +07/25/2024 12:10:28 - INFO - __main__ - Step 9564: {'lr': 0.0004956649659662644, 'samples': 459072, 'steps': 9563, 'loss/train': 1.5553745031356812} +07/25/2024 12:10:28 - INFO - __main__ - Step 9565: {'lr': 0.0004956639905170855, 'samples': 459120, 'steps': 9564, 'loss/train': 2.1396262645721436} +07/25/2024 12:10:28 - INFO - __main__ - Step 9566: {'lr': 0.0004956630149591334, 'samples': 459168, 'steps': 9565, 'loss/train': 1.7326838970184326} +07/25/2024 12:10:28 - INFO - __main__ - Step 9567: {'lr': 0.0004956620392924085, 'samples': 459216, 'steps': 9566, 'loss/train': 2.4298312664031982} +07/25/2024 12:10:29 - INFO - __main__ - Step 9568: {'lr': 0.0004956610635169115, 'samples': 459264, 'steps': 9567, 'loss/train': 1.5467153787612915} +07/25/2024 12:10:29 - INFO - __main__ - Step 9569: {'lr': 0.0004956600876326426, 'samples': 459312, 'steps': 9568, 'loss/train': 1.9855836629867554} +07/25/2024 12:10:29 - INFO - __main__ - Step 9570: {'lr': 0.0004956591116396023, 'samples': 459360, 'steps': 9569, 'loss/train': 1.613855004310608} +07/25/2024 12:10:30 - INFO - __main__ - Step 9571: {'lr': 0.000495658135537791, 'samples': 459408, 'steps': 9570, 'loss/train': 1.8128067255020142} +07/25/2024 12:10:30 - INFO - __main__ - Step 9572: {'lr': 0.0004956571593272092, 'samples': 459456, 'steps': 9571, 'loss/train': 1.8950642347335815} +07/25/2024 12:10:30 - INFO - __main__ - Step 9573: {'lr': 0.0004956561830078573, 'samples': 459504, 'steps': 9572, 'loss/train': 1.8120981454849243} +07/25/2024 12:10:30 - INFO - __main__ - Step 9574: {'lr': 0.0004956552065797357, 'samples': 459552, 'steps': 9573, 'loss/train': 0.25353679060935974} +07/25/2024 12:10:31 - INFO - __main__ - Step 9575: {'lr': 0.0004956542300428448, 'samples': 459600, 'steps': 9574, 'loss/train': 1.4044476747512817} +07/25/2024 12:10:31 - INFO - __main__ - Step 9576: {'lr': 0.0004956532533971853, 'samples': 459648, 'steps': 9575, 'loss/train': 2.1428558826446533} +07/25/2024 12:10:31 - INFO - __main__ - Step 9577: {'lr': 0.0004956522766427572, 'samples': 459696, 'steps': 9576, 'loss/train': 1.870579481124878} +07/25/2024 12:10:32 - INFO - __main__ - Step 9578: {'lr': 0.0004956512997795613, 'samples': 459744, 'steps': 9577, 'loss/train': 2.176872968673706} +07/25/2024 12:10:32 - INFO - __main__ - Step 9579: {'lr': 0.0004956503228075977, 'samples': 459792, 'steps': 9578, 'loss/train': 2.4507715702056885} +07/25/2024 12:10:32 - INFO - __main__ - Step 9580: {'lr': 0.0004956493457268672, 'samples': 459840, 'steps': 9579, 'loss/train': 2.382950782775879} +07/25/2024 12:10:32 - INFO - __main__ - Step 9581: {'lr': 0.00049564836853737, 'samples': 459888, 'steps': 9580, 'loss/train': 2.852064847946167} +07/25/2024 12:10:33 - INFO - __main__ - Step 9582: {'lr': 0.0004956473912391065, 'samples': 459936, 'steps': 9581, 'loss/train': 2.387958526611328} +07/25/2024 12:10:33 - INFO - __main__ - Step 9583: {'lr': 0.0004956464138320774, 'samples': 459984, 'steps': 9582, 'loss/train': 1.8361375331878662} +07/25/2024 12:10:33 - INFO - __main__ - Step 9584: {'lr': 0.0004956454363162828, 'samples': 460032, 'steps': 9583, 'loss/train': 1.7488075494766235} +07/25/2024 12:10:34 - INFO - __main__ - Step 9585: {'lr': 0.0004956444586917233, 'samples': 460080, 'steps': 9584, 'loss/train': 2.4221272468566895} +07/25/2024 12:10:34 - INFO - __main__ - Step 9586: {'lr': 0.0004956434809583994, 'samples': 460128, 'steps': 9585, 'loss/train': 2.0321342945098877} +07/25/2024 12:10:34 - INFO - __main__ - Step 9587: {'lr': 0.0004956425031163114, 'samples': 460176, 'steps': 9586, 'loss/train': 0.19859342277050018} +07/25/2024 12:10:34 - INFO - __main__ - Step 9588: {'lr': 0.0004956415251654598, 'samples': 460224, 'steps': 9587, 'loss/train': 1.8871574401855469} +07/25/2024 12:10:35 - INFO - __main__ - Step 9589: {'lr': 0.000495640547105845, 'samples': 460272, 'steps': 9588, 'loss/train': 1.864166498184204} +07/25/2024 12:10:35 - INFO - __main__ - Step 9590: {'lr': 0.0004956395689374674, 'samples': 460320, 'steps': 9589, 'loss/train': 1.8350321054458618} +07/25/2024 12:10:35 - INFO - __main__ - Step 9591: {'lr': 0.0004956385906603276, 'samples': 460368, 'steps': 9590, 'loss/train': 1.6054952144622803} +07/25/2024 12:10:36 - INFO - __main__ - Step 9592: {'lr': 0.0004956376122744258, 'samples': 460416, 'steps': 9591, 'loss/train': 1.7081612348556519} +07/25/2024 12:10:36 - INFO - __main__ - Step 9593: {'lr': 0.0004956366337797625, 'samples': 460464, 'steps': 9592, 'loss/train': 1.7983689308166504} +07/25/2024 12:10:36 - INFO - __main__ - Step 9594: {'lr': 0.0004956356551763383, 'samples': 460512, 'steps': 9593, 'loss/train': 1.0635536909103394} +07/25/2024 12:10:36 - INFO - __main__ - Step 9595: {'lr': 0.0004956346764641535, 'samples': 460560, 'steps': 9594, 'loss/train': 1.2939265966415405} +07/25/2024 12:10:37 - INFO - __main__ - Step 9596: {'lr': 0.0004956336976432085, 'samples': 460608, 'steps': 9595, 'loss/train': 1.920461893081665} +07/25/2024 12:10:37 - INFO - __main__ - Step 9597: {'lr': 0.0004956327187135039, 'samples': 460656, 'steps': 9596, 'loss/train': 2.084829807281494} +07/25/2024 12:10:37 - INFO - __main__ - Step 9598: {'lr': 0.0004956317396750399, 'samples': 460704, 'steps': 9597, 'loss/train': 1.7189782857894897} +07/25/2024 12:10:38 - INFO - __main__ - Step 9599: {'lr': 0.0004956307605278171, 'samples': 460752, 'steps': 9598, 'loss/train': 2.1052660942077637} +07/25/2024 12:10:38 - INFO - __main__ - Step 9600: {'lr': 0.0004956297812718358, 'samples': 460800, 'steps': 9599, 'loss/train': 2.18941593170166} +07/25/2024 12:10:38 - INFO - __main__ - Step 9601: {'lr': 0.0004956288019070966, 'samples': 460848, 'steps': 9600, 'loss/train': 1.8082395792007446} +07/25/2024 12:10:38 - INFO - __main__ - Step 9602: {'lr': 0.0004956278224335999, 'samples': 460896, 'steps': 9601, 'loss/train': 1.9671138525009155} +07/25/2024 12:10:39 - INFO - __main__ - Step 9603: {'lr': 0.000495626842851346, 'samples': 460944, 'steps': 9602, 'loss/train': 1.9170801639556885} +07/25/2024 12:10:39 - INFO - __main__ - Step 9604: {'lr': 0.0004956258631603354, 'samples': 460992, 'steps': 9603, 'loss/train': 1.8476781845092773} +07/25/2024 12:10:39 - INFO - __main__ - Step 9605: {'lr': 0.0004956248833605686, 'samples': 461040, 'steps': 9604, 'loss/train': 2.2126173973083496} +07/25/2024 12:10:40 - INFO - __main__ - Step 9606: {'lr': 0.0004956239034520459, 'samples': 461088, 'steps': 9605, 'loss/train': 1.8433825969696045} +07/25/2024 12:10:40 - INFO - __main__ - Step 9607: {'lr': 0.0004956229234347679, 'samples': 461136, 'steps': 9606, 'loss/train': 1.8229089975357056} +07/25/2024 12:10:40 - INFO - __main__ - Step 9608: {'lr': 0.0004956219433087349, 'samples': 461184, 'steps': 9607, 'loss/train': 1.761691689491272} +07/25/2024 12:10:40 - INFO - __main__ - Step 9609: {'lr': 0.0004956209630739474, 'samples': 461232, 'steps': 9608, 'loss/train': 3.420314311981201} +07/25/2024 12:10:41 - INFO - __main__ - Step 9610: {'lr': 0.0004956199827304058, 'samples': 461280, 'steps': 9609, 'loss/train': 2.3237996101379395} +07/25/2024 12:10:41 - INFO - __main__ - Step 9611: {'lr': 0.0004956190022781106, 'samples': 461328, 'steps': 9610, 'loss/train': 0.15550395846366882} +07/25/2024 12:10:41 - INFO - __main__ - Step 9612: {'lr': 0.0004956180217170621, 'samples': 461376, 'steps': 9611, 'loss/train': 2.023716449737549} +07/25/2024 12:10:42 - INFO - __main__ - Step 9613: {'lr': 0.000495617041047261, 'samples': 461424, 'steps': 9612, 'loss/train': 2.0579001903533936} +07/25/2024 12:10:42 - INFO - __main__ - Step 9614: {'lr': 0.0004956160602687074, 'samples': 461472, 'steps': 9613, 'loss/train': 1.9013862609863281} +07/25/2024 12:10:42 - INFO - __main__ - Step 9615: {'lr': 0.000495615079381402, 'samples': 461520, 'steps': 9614, 'loss/train': 2.0190751552581787} +07/25/2024 12:10:42 - INFO - __main__ - Step 9616: {'lr': 0.000495614098385345, 'samples': 461568, 'steps': 9615, 'loss/train': 1.914891242980957} +07/25/2024 12:10:43 - INFO - __main__ - Step 9617: {'lr': 0.0004956131172805371, 'samples': 461616, 'steps': 9616, 'loss/train': 1.7727993726730347} +07/25/2024 12:10:43 - INFO - __main__ - Step 9618: {'lr': 0.0004956121360669785, 'samples': 461664, 'steps': 9617, 'loss/train': 2.2395153045654297} +07/25/2024 12:10:43 - INFO - __main__ - Step 9619: {'lr': 0.0004956111547446697, 'samples': 461712, 'steps': 9618, 'loss/train': 1.0849605798721313} +07/25/2024 12:10:44 - INFO - __main__ - Step 9620: {'lr': 0.0004956101733136112, 'samples': 461760, 'steps': 9619, 'loss/train': 1.7937612533569336} +07/25/2024 12:10:44 - INFO - __main__ - Step 9621: {'lr': 0.0004956091917738035, 'samples': 461808, 'steps': 9620, 'loss/train': 1.44037926197052} +07/25/2024 12:10:44 - INFO - __main__ - Step 9622: {'lr': 0.0004956082101252468, 'samples': 461856, 'steps': 9621, 'loss/train': 1.8924381732940674} +07/25/2024 12:10:44 - INFO - __main__ - Step 9623: {'lr': 0.0004956072283679416, 'samples': 461904, 'steps': 9622, 'loss/train': 2.1590664386749268} +07/25/2024 12:10:45 - INFO - __main__ - Step 9624: {'lr': 0.0004956062465018887, 'samples': 461952, 'steps': 9623, 'loss/train': 1.6633425951004028} +07/25/2024 12:10:45 - INFO - __main__ - Step 9625: {'lr': 0.0004956052645270879, 'samples': 462000, 'steps': 9624, 'loss/train': 2.4197897911071777} +07/25/2024 12:10:45 - INFO - __main__ - Step 9626: {'lr': 0.0004956042824435402, 'samples': 462048, 'steps': 9625, 'loss/train': 1.7105839252471924} +07/25/2024 12:10:45 - INFO - __main__ - Step 9627: {'lr': 0.0004956033002512457, 'samples': 462096, 'steps': 9626, 'loss/train': 1.8906915187835693} +07/25/2024 12:10:46 - INFO - __main__ - Step 9628: {'lr': 0.0004956023179502049, 'samples': 462144, 'steps': 9627, 'loss/train': 2.0705783367156982} +07/25/2024 12:10:46 - INFO - __main__ - Step 9629: {'lr': 0.0004956013355404184, 'samples': 462192, 'steps': 9628, 'loss/train': 2.3757925033569336} +07/25/2024 12:10:46 - INFO - __main__ - Step 9630: {'lr': 0.0004956003530218865, 'samples': 462240, 'steps': 9629, 'loss/train': 1.9886587858200073} +07/25/2024 12:10:47 - INFO - __main__ - Step 9631: {'lr': 0.0004955993703946095, 'samples': 462288, 'steps': 9630, 'loss/train': 2.3770594596862793} +07/25/2024 12:10:47 - INFO - __main__ - Step 9632: {'lr': 0.0004955983876585882, 'samples': 462336, 'steps': 9631, 'loss/train': 2.3237249851226807} +07/25/2024 12:10:47 - INFO - __main__ - Step 9633: {'lr': 0.0004955974048138225, 'samples': 462384, 'steps': 9632, 'loss/train': 1.2695448398590088} +07/25/2024 12:10:47 - INFO - __main__ - Step 9634: {'lr': 0.0004955964218603134, 'samples': 462432, 'steps': 9633, 'loss/train': 1.6624830961227417} +07/25/2024 12:10:48 - INFO - __main__ - Step 9635: {'lr': 0.000495595438798061, 'samples': 462480, 'steps': 9634, 'loss/train': 0.26932185888290405} +07/25/2024 12:10:48 - INFO - __main__ - Step 9636: {'lr': 0.0004955944556270658, 'samples': 462528, 'steps': 9635, 'loss/train': 2.212667942047119} +07/25/2024 12:10:48 - INFO - __main__ - Step 9637: {'lr': 0.0004955934723473284, 'samples': 462576, 'steps': 9636, 'loss/train': 1.9266830682754517} +07/25/2024 12:10:49 - INFO - __main__ - Step 9638: {'lr': 0.000495592488958849, 'samples': 462624, 'steps': 9637, 'loss/train': 1.8855345249176025} +07/25/2024 12:10:49 - INFO - __main__ - Step 9639: {'lr': 0.0004955915054616282, 'samples': 462672, 'steps': 9638, 'loss/train': 2.1803457736968994} +07/25/2024 12:10:49 - INFO - __main__ - Step 9640: {'lr': 0.0004955905218556662, 'samples': 462720, 'steps': 9639, 'loss/train': 1.7791850566864014} +07/25/2024 12:10:49 - INFO - __main__ - Step 9641: {'lr': 0.0004955895381409636, 'samples': 462768, 'steps': 9640, 'loss/train': 2.912029266357422} +07/25/2024 12:10:50 - INFO - __main__ - Step 9642: {'lr': 0.000495588554317521, 'samples': 462816, 'steps': 9641, 'loss/train': 1.727980136871338} +07/25/2024 12:10:50 - INFO - __main__ - Step 9643: {'lr': 0.0004955875703853385, 'samples': 462864, 'steps': 9642, 'loss/train': 2.2749245166778564} +07/25/2024 12:10:50 - INFO - __main__ - Step 9644: {'lr': 0.0004955865863444168, 'samples': 462912, 'steps': 9643, 'loss/train': 1.3045055866241455} +07/25/2024 12:10:51 - INFO - __main__ - Step 9645: {'lr': 0.0004955856021947562, 'samples': 462960, 'steps': 9644, 'loss/train': 1.1528183221817017} +07/25/2024 12:10:51 - INFO - __main__ - Step 9646: {'lr': 0.0004955846179363573, 'samples': 463008, 'steps': 9645, 'loss/train': 1.9744192361831665} +07/25/2024 12:10:51 - INFO - __main__ - Step 9647: {'lr': 0.0004955836335692203, 'samples': 463056, 'steps': 9646, 'loss/train': 0.8054771423339844} +07/25/2024 12:10:51 - INFO - __main__ - Step 9648: {'lr': 0.0004955826490933457, 'samples': 463104, 'steps': 9647, 'loss/train': 1.874333381652832} +07/25/2024 12:10:52 - INFO - __main__ - Step 9649: {'lr': 0.0004955816645087341, 'samples': 463152, 'steps': 9648, 'loss/train': 2.339240550994873} +07/25/2024 12:10:52 - INFO - __main__ - Step 9650: {'lr': 0.0004955806798153858, 'samples': 463200, 'steps': 9649, 'loss/train': 1.9110639095306396} +07/25/2024 12:10:52 - INFO - __main__ - Step 9651: {'lr': 0.0004955796950133013, 'samples': 463248, 'steps': 9650, 'loss/train': 2.036162853240967} +07/25/2024 12:10:53 - INFO - __main__ - Step 9652: {'lr': 0.0004955787101024808, 'samples': 463296, 'steps': 9651, 'loss/train': 2.0427019596099854} +07/25/2024 12:10:53 - INFO - __main__ - Step 9653: {'lr': 0.0004955777250829251, 'samples': 463344, 'steps': 9652, 'loss/train': 1.8368297815322876} +07/25/2024 12:10:53 - INFO - __main__ - Step 9654: {'lr': 0.0004955767399546344, 'samples': 463392, 'steps': 9653, 'loss/train': 2.4954583644866943} +07/25/2024 12:10:53 - INFO - __main__ - Step 9655: {'lr': 0.0004955757547176092, 'samples': 463440, 'steps': 9654, 'loss/train': 2.3059003353118896} +07/25/2024 12:10:54 - INFO - __main__ - Step 9656: {'lr': 0.00049557476937185, 'samples': 463488, 'steps': 9655, 'loss/train': 2.109301805496216} +07/25/2024 12:10:54 - INFO - __main__ - Step 9657: {'lr': 0.000495573783917357, 'samples': 463536, 'steps': 9656, 'loss/train': 2.1279659271240234} +07/25/2024 12:10:54 - INFO - __main__ - Step 9658: {'lr': 0.000495572798354131, 'samples': 463584, 'steps': 9657, 'loss/train': 1.4561996459960938} +07/25/2024 12:10:55 - INFO - __main__ - Step 9659: {'lr': 0.0004955718126821722, 'samples': 463632, 'steps': 9658, 'loss/train': 0.2494916468858719} +07/25/2024 12:10:55 - INFO - __main__ - Step 9660: {'lr': 0.0004955708269014811, 'samples': 463680, 'steps': 9659, 'loss/train': 2.479156494140625} +07/25/2024 12:10:55 - INFO - __main__ - Step 9661: {'lr': 0.000495569841012058, 'samples': 463728, 'steps': 9660, 'loss/train': 1.1926096677780151} +07/25/2024 12:10:55 - INFO - __main__ - Step 9662: {'lr': 0.0004955688550139036, 'samples': 463776, 'steps': 9661, 'loss/train': 1.7925262451171875} +07/25/2024 12:10:56 - INFO - __main__ - Step 9663: {'lr': 0.0004955678689070181, 'samples': 463824, 'steps': 9662, 'loss/train': 2.076803684234619} +07/25/2024 12:10:56 - INFO - __main__ - Step 9664: {'lr': 0.0004955668826914021, 'samples': 463872, 'steps': 9663, 'loss/train': 1.7700467109680176} +07/25/2024 12:10:56 - INFO - __main__ - Step 9665: {'lr': 0.0004955658963670559, 'samples': 463920, 'steps': 9664, 'loss/train': 1.8634454011917114} +07/25/2024 12:10:57 - INFO - __main__ - Step 9666: {'lr': 0.0004955649099339799, 'samples': 463968, 'steps': 9665, 'loss/train': 1.9519762992858887} +07/25/2024 12:10:57 - INFO - __main__ - Step 9667: {'lr': 0.0004955639233921749, 'samples': 464016, 'steps': 9666, 'loss/train': 2.6525375843048096} +07/25/2024 12:10:57 - INFO - __main__ - Step 9668: {'lr': 0.0004955629367416409, 'samples': 464064, 'steps': 9667, 'loss/train': 1.4430015087127686} +07/25/2024 12:10:57 - INFO - __main__ - Step 9669: {'lr': 0.0004955619499823785, 'samples': 464112, 'steps': 9668, 'loss/train': 1.942671537399292} +07/25/2024 12:10:58 - INFO - __main__ - Step 9670: {'lr': 0.0004955609631143882, 'samples': 464160, 'steps': 9669, 'loss/train': 1.9545015096664429} +07/25/2024 12:10:58 - INFO - __main__ - Step 9671: {'lr': 0.0004955599761376704, 'samples': 464208, 'steps': 9670, 'loss/train': 1.720353364944458} +07/25/2024 12:10:58 - INFO - __main__ - Step 9672: {'lr': 0.0004955589890522255, 'samples': 464256, 'steps': 9671, 'loss/train': 1.983979344367981} +07/25/2024 12:10:59 - INFO - __main__ - Step 9673: {'lr': 0.000495558001858054, 'samples': 464304, 'steps': 9672, 'loss/train': 1.628653645515442} +07/25/2024 12:10:59 - INFO - __main__ - Step 9674: {'lr': 0.0004955570145551562, 'samples': 464352, 'steps': 9673, 'loss/train': 1.7837671041488647} +07/25/2024 12:10:59 - INFO - __main__ - Step 9675: {'lr': 0.0004955560271435327, 'samples': 464400, 'steps': 9674, 'loss/train': 2.2247331142425537} +07/25/2024 12:10:59 - INFO - __main__ - Step 9676: {'lr': 0.0004955550396231839, 'samples': 464448, 'steps': 9675, 'loss/train': 2.107405424118042} +07/25/2024 12:11:00 - INFO - __main__ - Step 9677: {'lr': 0.0004955540519941102, 'samples': 464496, 'steps': 9676, 'loss/train': 1.9570640325546265} +07/25/2024 12:11:00 - INFO - __main__ - Step 9678: {'lr': 0.000495553064256312, 'samples': 464544, 'steps': 9677, 'loss/train': 2.3433752059936523} +07/25/2024 12:11:00 - INFO - __main__ - Step 9679: {'lr': 0.0004955520764097898, 'samples': 464592, 'steps': 9678, 'loss/train': 2.1499485969543457} +07/25/2024 12:11:01 - INFO - __main__ - Step 9680: {'lr': 0.000495551088454544, 'samples': 464640, 'steps': 9679, 'loss/train': 2.259878158569336} +07/25/2024 12:11:01 - INFO - __main__ - Step 9681: {'lr': 0.0004955501003905752, 'samples': 464688, 'steps': 9680, 'loss/train': 2.2285919189453125} +07/25/2024 12:11:01 - INFO - __main__ - Step 9682: {'lr': 0.0004955491122178835, 'samples': 464736, 'steps': 9681, 'loss/train': 1.9110727310180664} +07/25/2024 12:11:01 - INFO - __main__ - Step 9683: {'lr': 0.0004955481239364695, 'samples': 464784, 'steps': 9682, 'loss/train': 0.7037660479545593} +07/25/2024 12:11:02 - INFO - __main__ - Step 9684: {'lr': 0.0004955471355463339, 'samples': 464832, 'steps': 9683, 'loss/train': 2.4138782024383545} +07/25/2024 12:11:02 - INFO - __main__ - Step 9685: {'lr': 0.0004955461470474768, 'samples': 464880, 'steps': 9684, 'loss/train': 2.2639002799987793} +07/25/2024 12:11:02 - INFO - __main__ - Step 9686: {'lr': 0.0004955451584398987, 'samples': 464928, 'steps': 9685, 'loss/train': 1.9137319326400757} +07/25/2024 12:11:03 - INFO - __main__ - Step 9687: {'lr': 0.0004955441697236002, 'samples': 464976, 'steps': 9686, 'loss/train': 1.7800018787384033} +07/25/2024 12:11:03 - INFO - __main__ - Step 9688: {'lr': 0.0004955431808985815, 'samples': 465024, 'steps': 9687, 'loss/train': 2.1323792934417725} +07/25/2024 12:11:03 - INFO - __main__ - Step 9689: {'lr': 0.0004955421919648432, 'samples': 465072, 'steps': 9688, 'loss/train': 2.28446888923645} +07/25/2024 12:11:03 - INFO - __main__ - Step 9690: {'lr': 0.0004955412029223857, 'samples': 465120, 'steps': 9689, 'loss/train': 2.2907559871673584} +07/25/2024 12:11:04 - INFO - __main__ - Step 9691: {'lr': 0.0004955402137712094, 'samples': 465168, 'steps': 9690, 'loss/train': 2.562654733657837} +07/25/2024 12:11:04 - INFO - __main__ - Step 9692: {'lr': 0.0004955392245113148, 'samples': 465216, 'steps': 9691, 'loss/train': 1.2209588289260864} +07/25/2024 12:11:04 - INFO - __main__ - Step 9693: {'lr': 0.0004955382351427023, 'samples': 465264, 'steps': 9692, 'loss/train': 1.7762895822525024} +07/25/2024 12:11:05 - INFO - __main__ - Step 9694: {'lr': 0.0004955372456653724, 'samples': 465312, 'steps': 9693, 'loss/train': 1.8040742874145508} +07/25/2024 12:11:05 - INFO - __main__ - Step 9695: {'lr': 0.0004955362560793255, 'samples': 465360, 'steps': 9694, 'loss/train': 0.9400464296340942} +07/25/2024 12:11:05 - INFO - __main__ - Step 9696: {'lr': 0.0004955352663845619, 'samples': 465408, 'steps': 9695, 'loss/train': 1.9895422458648682} +07/25/2024 12:11:05 - INFO - __main__ - Step 9697: {'lr': 0.0004955342765810823, 'samples': 465456, 'steps': 9696, 'loss/train': 2.2131125926971436} +07/25/2024 12:11:06 - INFO - __main__ - Step 9698: {'lr': 0.000495533286668887, 'samples': 465504, 'steps': 9697, 'loss/train': 1.8847888708114624} +07/25/2024 12:11:06 - INFO - __main__ - Step 9699: {'lr': 0.0004955322966479764, 'samples': 465552, 'steps': 9698, 'loss/train': 1.5983576774597168} +07/25/2024 12:11:06 - INFO - __main__ - Step 9700: {'lr': 0.000495531306518351, 'samples': 465600, 'steps': 9699, 'loss/train': 1.681602120399475} +07/25/2024 12:11:06 - INFO - __main__ - Step 9701: {'lr': 0.0004955303162800111, 'samples': 465648, 'steps': 9700, 'loss/train': 2.1969029903411865} +07/25/2024 12:11:07 - INFO - __main__ - Step 9702: {'lr': 0.0004955293259329574, 'samples': 465696, 'steps': 9701, 'loss/train': 1.6857531070709229} +07/25/2024 12:11:07 - INFO - __main__ - Step 9703: {'lr': 0.0004955283354771902, 'samples': 465744, 'steps': 9702, 'loss/train': 2.07940411567688} +07/25/2024 12:11:07 - INFO - __main__ - Step 9704: {'lr': 0.0004955273449127098, 'samples': 465792, 'steps': 9703, 'loss/train': 2.245269536972046} +07/25/2024 12:11:08 - INFO - __main__ - Step 9705: {'lr': 0.0004955263542395168, 'samples': 465840, 'steps': 9704, 'loss/train': 1.6766623258590698} +07/25/2024 12:11:08 - INFO - __main__ - Step 9706: {'lr': 0.0004955253634576117, 'samples': 465888, 'steps': 9705, 'loss/train': 1.8710005283355713} +07/25/2024 12:11:08 - INFO - __main__ - Step 9707: {'lr': 0.0004955243725669947, 'samples': 465936, 'steps': 9706, 'loss/train': 1.7760928869247437} +07/25/2024 12:11:08 - INFO - __main__ - Step 9708: {'lr': 0.0004955233815676666, 'samples': 465984, 'steps': 9707, 'loss/train': 2.3810782432556152} +07/25/2024 12:11:09 - INFO - __main__ - Step 9709: {'lr': 0.0004955223904596274, 'samples': 466032, 'steps': 9708, 'loss/train': 1.9740827083587646} +07/25/2024 12:11:09 - INFO - __main__ - Step 9710: {'lr': 0.000495521399242878, 'samples': 466080, 'steps': 9709, 'loss/train': 2.0967116355895996} +07/25/2024 12:11:09 - INFO - __main__ - Step 9711: {'lr': 0.0004955204079174184, 'samples': 466128, 'steps': 9710, 'loss/train': 0.9343461394309998} +07/25/2024 12:11:10 - INFO - __main__ - Step 9712: {'lr': 0.0004955194164832493, 'samples': 466176, 'steps': 9711, 'loss/train': 1.8910088539123535} +07/25/2024 12:11:10 - INFO - __main__ - Step 9713: {'lr': 0.0004955184249403711, 'samples': 466224, 'steps': 9712, 'loss/train': 2.361048936843872} +07/25/2024 12:11:10 - INFO - __main__ - Step 9714: {'lr': 0.0004955174332887843, 'samples': 466272, 'steps': 9713, 'loss/train': 2.0535433292388916} +07/25/2024 12:11:10 - INFO - __main__ - Step 9715: {'lr': 0.0004955164415284892, 'samples': 466320, 'steps': 9714, 'loss/train': 2.427238941192627} +07/25/2024 12:11:11 - INFO - __main__ - Step 9716: {'lr': 0.0004955154496594863, 'samples': 466368, 'steps': 9715, 'loss/train': 1.801060438156128} +07/25/2024 12:11:11 - INFO - __main__ - Step 9717: {'lr': 0.0004955144576817759, 'samples': 466416, 'steps': 9716, 'loss/train': 1.8203932046890259} +07/25/2024 12:11:11 - INFO - __main__ - Step 9718: {'lr': 0.0004955134655953589, 'samples': 466464, 'steps': 9717, 'loss/train': 2.616899251937866} +07/25/2024 12:11:12 - INFO - __main__ - Step 9719: {'lr': 0.0004955124734002351, 'samples': 466512, 'steps': 9718, 'loss/train': 2.1081795692443848} +07/25/2024 12:11:12 - INFO - __main__ - Step 9720: {'lr': 0.0004955114810964054, 'samples': 466560, 'steps': 9719, 'loss/train': 1.5339804887771606} +07/25/2024 12:11:12 - INFO - __main__ - Step 9721: {'lr': 0.0004955104886838701, 'samples': 466608, 'steps': 9720, 'loss/train': 1.673685908317566} +07/25/2024 12:11:12 - INFO - __main__ - Step 9722: {'lr': 0.0004955094961626295, 'samples': 466656, 'steps': 9721, 'loss/train': 1.9258956909179688} +07/25/2024 12:11:13 - INFO - __main__ - Step 9723: {'lr': 0.0004955085035326843, 'samples': 466704, 'steps': 9722, 'loss/train': 2.3283462524414062} +07/25/2024 12:11:13 - INFO - __main__ - Step 9724: {'lr': 0.0004955075107940347, 'samples': 466752, 'steps': 9723, 'loss/train': 2.137106418609619} +07/25/2024 12:11:13 - INFO - __main__ - Step 9725: {'lr': 0.0004955065179466815, 'samples': 466800, 'steps': 9724, 'loss/train': 2.387021780014038} +07/25/2024 12:11:14 - INFO - __main__ - Step 9726: {'lr': 0.0004955055249906247, 'samples': 466848, 'steps': 9725, 'loss/train': 1.8670343160629272} +07/25/2024 12:11:14 - INFO - __main__ - Step 9727: {'lr': 0.000495504531925865, 'samples': 466896, 'steps': 9726, 'loss/train': 1.569427728652954} +07/25/2024 12:11:14 - INFO - __main__ - Step 9728: {'lr': 0.0004955035387524027, 'samples': 466944, 'steps': 9727, 'loss/train': 2.0778729915618896} +07/25/2024 12:11:14 - INFO - __main__ - Step 9729: {'lr': 0.0004955025454702383, 'samples': 466992, 'steps': 9728, 'loss/train': 1.938651204109192} +07/25/2024 12:11:15 - INFO - __main__ - Step 9730: {'lr': 0.0004955015520793723, 'samples': 467040, 'steps': 9729, 'loss/train': 2.1538078784942627} +07/25/2024 12:11:15 - INFO - __main__ - Step 9731: {'lr': 0.0004955005585798051, 'samples': 467088, 'steps': 9730, 'loss/train': 2.2560269832611084} +07/25/2024 12:11:15 - INFO - __main__ - Step 9732: {'lr': 0.0004954995649715372, 'samples': 467136, 'steps': 9731, 'loss/train': 2.5172364711761475} +07/25/2024 12:11:16 - INFO - __main__ - Step 9733: {'lr': 0.0004954985712545689, 'samples': 467184, 'steps': 9732, 'loss/train': 1.9731740951538086} +07/25/2024 12:11:16 - INFO - __main__ - Step 9734: {'lr': 0.0004954975774289007, 'samples': 467232, 'steps': 9733, 'loss/train': 1.8678423166275024} +07/25/2024 12:11:16 - INFO - __main__ - Step 9735: {'lr': 0.0004954965834945331, 'samples': 467280, 'steps': 9734, 'loss/train': 2.6807546615600586} +07/25/2024 12:11:16 - INFO - __main__ - Step 9736: {'lr': 0.0004954955894514665, 'samples': 467328, 'steps': 9735, 'loss/train': 2.148266315460205} +07/25/2024 12:11:17 - INFO - __main__ - Step 9737: {'lr': 0.0004954945952997012, 'samples': 467376, 'steps': 9736, 'loss/train': 0.9914339780807495} +07/25/2024 12:11:17 - INFO - __main__ - Step 9738: {'lr': 0.0004954936010392379, 'samples': 467424, 'steps': 9737, 'loss/train': 1.798278570175171} +07/25/2024 12:11:17 - INFO - __main__ - Step 9739: {'lr': 0.0004954926066700769, 'samples': 467472, 'steps': 9738, 'loss/train': 1.9482929706573486} +07/25/2024 12:11:18 - INFO - __main__ - Step 9740: {'lr': 0.0004954916121922186, 'samples': 467520, 'steps': 9739, 'loss/train': 1.9613783359527588} +07/25/2024 12:11:18 - INFO - __main__ - Step 9741: {'lr': 0.0004954906176056635, 'samples': 467568, 'steps': 9740, 'loss/train': 2.118100166320801} +07/25/2024 12:11:18 - INFO - __main__ - Step 9742: {'lr': 0.0004954896229104119, 'samples': 467616, 'steps': 9741, 'loss/train': 2.0944933891296387} +07/25/2024 12:11:18 - INFO - __main__ - Step 9743: {'lr': 0.0004954886281064647, 'samples': 467664, 'steps': 9742, 'loss/train': 2.1054916381835938} +07/25/2024 12:11:19 - INFO - __main__ - Step 9744: {'lr': 0.0004954876331938218, 'samples': 467712, 'steps': 9743, 'loss/train': 2.1035594940185547} +07/25/2024 12:11:19 - INFO - __main__ - Step 9745: {'lr': 0.0004954866381724839, 'samples': 467760, 'steps': 9744, 'loss/train': 1.8242990970611572} +07/25/2024 12:11:19 - INFO - __main__ - Step 9746: {'lr': 0.0004954856430424514, 'samples': 467808, 'steps': 9745, 'loss/train': 2.0509326457977295} +07/25/2024 12:11:20 - INFO - __main__ - Step 9747: {'lr': 0.0004954846478037248, 'samples': 467856, 'steps': 9746, 'loss/train': 1.7824931144714355} +07/25/2024 12:11:20 - INFO - __main__ - Step 9748: {'lr': 0.0004954836524563043, 'samples': 467904, 'steps': 9747, 'loss/train': 2.6125502586364746} +07/25/2024 12:11:20 - INFO - __main__ - Step 9749: {'lr': 0.0004954826570001906, 'samples': 467952, 'steps': 9748, 'loss/train': 2.577782392501831} +07/25/2024 12:11:20 - INFO - __main__ - Step 9750: {'lr': 0.0004954816614353841, 'samples': 468000, 'steps': 9749, 'loss/train': 2.0687406063079834} +07/25/2024 12:11:21 - INFO - __main__ - Step 9751: {'lr': 0.0004954806657618851, 'samples': 468048, 'steps': 9750, 'loss/train': 2.234142780303955} +07/25/2024 12:11:21 - INFO - __main__ - Step 9752: {'lr': 0.0004954796699796943, 'samples': 468096, 'steps': 9751, 'loss/train': 1.802199363708496} +07/25/2024 12:11:21 - DEBUG - datasets.packaged_modules.json.json - Batch of 11335148 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 12:11:21 - DEBUG - datasets.packaged_modules.json.json - Batch of 11335148 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 12:11:21 - INFO - __main__ - Step 9753: {'lr': 0.0004954786740888118, 'samples': 468144, 'steps': 9752, 'loss/train': 2.210143804550171} +07/25/2024 12:11:22 - INFO - __main__ - Step 9754: {'lr': 0.0004954776780892384, 'samples': 468192, 'steps': 9753, 'loss/train': 1.7418129444122314} +07/25/2024 12:11:22 - INFO - __main__ - Step 9755: {'lr': 0.0004954766819809742, 'samples': 468240, 'steps': 9754, 'loss/train': 2.1576738357543945} +07/25/2024 12:11:22 - INFO - __main__ - Step 9756: {'lr': 0.0004954756857640199, 'samples': 468288, 'steps': 9755, 'loss/train': 2.382948875427246} +07/25/2024 12:11:22 - INFO - __main__ - Step 9757: {'lr': 0.0004954746894383758, 'samples': 468336, 'steps': 9756, 'loss/train': 2.0645925998687744} +07/25/2024 12:11:23 - INFO - __main__ - Step 9758: {'lr': 0.0004954736930040425, 'samples': 468384, 'steps': 9757, 'loss/train': 2.1005425453186035} +07/25/2024 12:11:23 - INFO - __main__ - Step 9759: {'lr': 0.0004954726964610202, 'samples': 468432, 'steps': 9758, 'loss/train': 2.112316846847534} +07/25/2024 12:11:23 - INFO - __main__ - Step 9760: {'lr': 0.0004954716998093095, 'samples': 468480, 'steps': 9759, 'loss/train': 2.277247905731201} +07/25/2024 12:11:24 - INFO - __main__ - Step 9761: {'lr': 0.0004954707030489108, 'samples': 468528, 'steps': 9760, 'loss/train': 1.320465326309204} +07/25/2024 12:11:24 - INFO - __main__ - Step 9762: {'lr': 0.0004954697061798246, 'samples': 468576, 'steps': 9761, 'loss/train': 1.9758919477462769} +07/25/2024 12:11:24 - INFO - __main__ - Step 9763: {'lr': 0.0004954687092020512, 'samples': 468624, 'steps': 9762, 'loss/train': 1.6027858257293701} +07/25/2024 12:11:24 - INFO - __main__ - Step 9764: {'lr': 0.0004954677121155912, 'samples': 468672, 'steps': 9763, 'loss/train': 1.5875288248062134} +07/25/2024 12:11:25 - INFO - __main__ - Step 9765: {'lr': 0.000495466714920445, 'samples': 468720, 'steps': 9764, 'loss/train': 1.8215649127960205} +07/25/2024 12:11:25 - INFO - __main__ - Step 9766: {'lr': 0.000495465717616613, 'samples': 468768, 'steps': 9765, 'loss/train': 1.9958953857421875} +07/25/2024 12:11:25 - INFO - __main__ - Step 9767: {'lr': 0.0004954647202040956, 'samples': 468816, 'steps': 9766, 'loss/train': 2.002734422683716} +07/25/2024 12:11:26 - INFO - __main__ - Step 9768: {'lr': 0.0004954637226828933, 'samples': 468864, 'steps': 9767, 'loss/train': 2.4693996906280518} +07/25/2024 12:11:26 - INFO - __main__ - Step 9769: {'lr': 0.0004954627250530066, 'samples': 468912, 'steps': 9768, 'loss/train': 1.8900662660598755} +07/25/2024 12:11:26 - INFO - __main__ - Step 9770: {'lr': 0.0004954617273144359, 'samples': 468960, 'steps': 9769, 'loss/train': 2.261046886444092} +07/25/2024 12:11:26 - INFO - __main__ - Step 9771: {'lr': 0.0004954607294671816, 'samples': 469008, 'steps': 9770, 'loss/train': 2.0652194023132324} +07/25/2024 12:11:27 - INFO - __main__ - Step 9772: {'lr': 0.0004954597315112441, 'samples': 469056, 'steps': 9771, 'loss/train': 1.9234076738357544} +07/25/2024 12:11:27 - INFO - __main__ - Step 9773: {'lr': 0.0004954587334466239, 'samples': 469104, 'steps': 9772, 'loss/train': 2.490741014480591} +07/25/2024 12:11:27 - INFO - __main__ - Step 9774: {'lr': 0.0004954577352733216, 'samples': 469152, 'steps': 9773, 'loss/train': 2.216019868850708} +07/25/2024 12:11:28 - INFO - __main__ - Step 9775: {'lr': 0.0004954567369913374, 'samples': 469200, 'steps': 9774, 'loss/train': 2.2214975357055664} +07/25/2024 12:11:28 - INFO - __main__ - Step 9776: {'lr': 0.0004954557386006718, 'samples': 469248, 'steps': 9775, 'loss/train': 2.200087070465088} +07/25/2024 12:11:28 - INFO - __main__ - Step 9777: {'lr': 0.0004954547401013254, 'samples': 469296, 'steps': 9776, 'loss/train': 2.206472635269165} +07/25/2024 12:11:28 - INFO - __main__ - Step 9778: {'lr': 0.0004954537414932984, 'samples': 469344, 'steps': 9777, 'loss/train': 2.503997325897217} +07/25/2024 12:11:29 - INFO - __main__ - Step 9779: {'lr': 0.0004954527427765914, 'samples': 469392, 'steps': 9778, 'loss/train': 1.9266761541366577} +07/25/2024 12:11:29 - INFO - __main__ - Step 9780: {'lr': 0.0004954517439512048, 'samples': 469440, 'steps': 9779, 'loss/train': 1.7589668035507202} +07/25/2024 12:11:29 - INFO - __main__ - Step 9781: {'lr': 0.000495450745017139, 'samples': 469488, 'steps': 9780, 'loss/train': 2.4774928092956543} +07/25/2024 12:11:29 - INFO - __main__ - Step 9782: {'lr': 0.0004954497459743945, 'samples': 469536, 'steps': 9781, 'loss/train': 3.1235172748565674} +07/25/2024 12:11:30 - INFO - __main__ - Step 9783: {'lr': 0.0004954487468229718, 'samples': 469584, 'steps': 9782, 'loss/train': 2.4265615940093994} +07/25/2024 12:11:30 - INFO - __main__ - Step 9784: {'lr': 0.0004954477475628713, 'samples': 469632, 'steps': 9783, 'loss/train': 2.1767280101776123} +07/25/2024 12:11:30 - INFO - __main__ - Step 9785: {'lr': 0.0004954467481940934, 'samples': 469680, 'steps': 9784, 'loss/train': 2.3078765869140625} +07/25/2024 12:11:31 - INFO - __main__ - Step 9786: {'lr': 0.0004954457487166383, 'samples': 469728, 'steps': 9785, 'loss/train': 1.8282382488250732} +07/25/2024 12:11:31 - INFO - __main__ - Step 9787: {'lr': 0.000495444749130507, 'samples': 469776, 'steps': 9786, 'loss/train': 2.5328738689422607} +07/25/2024 12:11:31 - INFO - __main__ - Step 9788: {'lr': 0.0004954437494356996, 'samples': 469824, 'steps': 9787, 'loss/train': 1.8538055419921875} +07/25/2024 12:11:31 - INFO - __main__ - Step 9789: {'lr': 0.0004954427496322165, 'samples': 469872, 'steps': 9788, 'loss/train': 1.8979249000549316} +07/25/2024 12:11:32 - INFO - __main__ - Step 9790: {'lr': 0.0004954417497200583, 'samples': 469920, 'steps': 9789, 'loss/train': 2.0874924659729004} +07/25/2024 12:11:32 - INFO - __main__ - Step 9791: {'lr': 0.0004954407496992252, 'samples': 469968, 'steps': 9790, 'loss/train': 1.836219310760498} +07/25/2024 12:11:32 - INFO - __main__ - Step 9792: {'lr': 0.000495439749569718, 'samples': 470016, 'steps': 9791, 'loss/train': 2.424621343612671} +07/25/2024 12:11:33 - INFO - __main__ - Step 9793: {'lr': 0.0004954387493315369, 'samples': 470064, 'steps': 9792, 'loss/train': 1.649219036102295} +07/25/2024 12:11:33 - INFO - __main__ - Step 9794: {'lr': 0.0004954377489846824, 'samples': 470112, 'steps': 9793, 'loss/train': 1.9052298069000244} +07/25/2024 12:11:33 - INFO - __main__ - Step 9795: {'lr': 0.000495436748529155, 'samples': 470160, 'steps': 9794, 'loss/train': 1.6254645586013794} +07/25/2024 12:11:33 - INFO - __main__ - Step 9796: {'lr': 0.000495435747964955, 'samples': 470208, 'steps': 9795, 'loss/train': 0.2585812509059906} +07/25/2024 12:11:34 - INFO - __main__ - Step 9797: {'lr': 0.0004954347472920829, 'samples': 470256, 'steps': 9796, 'loss/train': 1.7521898746490479} +07/25/2024 12:11:34 - INFO - __main__ - Step 9798: {'lr': 0.0004954337465105393, 'samples': 470304, 'steps': 9797, 'loss/train': 2.3988256454467773} +07/25/2024 12:11:34 - INFO - __main__ - Step 9799: {'lr': 0.0004954327456203245, 'samples': 470352, 'steps': 9798, 'loss/train': 1.761135458946228} +07/25/2024 12:11:35 - INFO - __main__ - Step 9800: {'lr': 0.0004954317446214388, 'samples': 470400, 'steps': 9799, 'loss/train': 2.292921543121338} +07/25/2024 12:11:35 - INFO - __main__ - Step 9801: {'lr': 0.000495430743513883, 'samples': 470448, 'steps': 9800, 'loss/train': 2.0486810207366943} +07/25/2024 12:11:35 - DEBUG - datasets.packaged_modules.json.json - Batch of 10486391 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 12:11:35 - INFO - __main__ - Step 9802: {'lr': 0.0004954297422976572, 'samples': 470496, 'steps': 9801, 'loss/train': 2.050183057785034} +07/25/2024 12:11:35 - INFO - __main__ - Step 9803: {'lr': 0.000495428740972762, 'samples': 470544, 'steps': 9802, 'loss/train': 2.1050009727478027} +07/25/2024 12:11:36 - INFO - __main__ - Step 9804: {'lr': 0.0004954277395391979, 'samples': 470592, 'steps': 9803, 'loss/train': 2.211676597595215} +07/25/2024 12:11:36 - INFO - __main__ - Step 9805: {'lr': 0.0004954267379969652, 'samples': 470640, 'steps': 9804, 'loss/train': 1.7954756021499634} +07/25/2024 12:11:36 - INFO - __main__ - Step 9806: {'lr': 0.0004954257363460644, 'samples': 470688, 'steps': 9805, 'loss/train': 3.9219672679901123} +07/25/2024 12:11:37 - INFO - __main__ - Step 9807: {'lr': 0.0004954247345864961, 'samples': 470736, 'steps': 9806, 'loss/train': 1.9540032148361206} +07/25/2024 12:11:37 - INFO - __main__ - Step 9808: {'lr': 0.0004954237327182605, 'samples': 470784, 'steps': 9807, 'loss/train': 1.9979010820388794} +07/25/2024 12:11:37 - INFO - __main__ - Step 9809: {'lr': 0.0004954227307413581, 'samples': 470832, 'steps': 9808, 'loss/train': 2.0761754512786865} +07/25/2024 12:11:37 - INFO - __main__ - Step 9810: {'lr': 0.0004954217286557895, 'samples': 470880, 'steps': 9809, 'loss/train': 2.293506622314453} +07/25/2024 12:11:38 - INFO - __main__ - Step 9811: {'lr': 0.000495420726461555, 'samples': 470928, 'steps': 9810, 'loss/train': 2.3710289001464844} +07/25/2024 12:11:38 - INFO - __main__ - Step 9812: {'lr': 0.000495419724158655, 'samples': 470976, 'steps': 9811, 'loss/train': 0.7449370622634888} +07/25/2024 12:11:38 - INFO - __main__ - Step 9813: {'lr': 0.0004954187217470901, 'samples': 471024, 'steps': 9812, 'loss/train': 2.418696403503418} +07/25/2024 12:11:39 - INFO - __main__ - Step 9814: {'lr': 0.0004954177192268606, 'samples': 471072, 'steps': 9813, 'loss/train': 2.2186176776885986} +07/25/2024 12:11:39 - INFO - __main__ - Step 9815: {'lr': 0.0004954167165979671, 'samples': 471120, 'steps': 9814, 'loss/train': 1.2060327529907227} +07/25/2024 12:11:39 - INFO - __main__ - Step 9816: {'lr': 0.0004954157138604099, 'samples': 471168, 'steps': 9815, 'loss/train': 1.780017614364624} +07/25/2024 12:11:39 - INFO - __main__ - Step 9817: {'lr': 0.0004954147110141896, 'samples': 471216, 'steps': 9816, 'loss/train': 1.3963041305541992} +07/25/2024 12:11:40 - INFO - __main__ - Step 9818: {'lr': 0.0004954137080593064, 'samples': 471264, 'steps': 9817, 'loss/train': 1.8464378118515015} +07/25/2024 12:11:40 - INFO - __main__ - Step 9819: {'lr': 0.000495412704995761, 'samples': 471312, 'steps': 9818, 'loss/train': 1.9781945943832397} +07/25/2024 12:11:40 - INFO - __main__ - Step 9820: {'lr': 0.0004954117018235537, 'samples': 471360, 'steps': 9819, 'loss/train': 0.703033447265625} +07/25/2024 12:11:41 - INFO - __main__ - Step 9821: {'lr': 0.0004954106985426849, 'samples': 471408, 'steps': 9820, 'loss/train': 2.377955675125122} +07/25/2024 12:11:41 - INFO - __main__ - Step 9822: {'lr': 0.0004954096951531551, 'samples': 471456, 'steps': 9821, 'loss/train': 2.2490854263305664} +07/25/2024 12:11:41 - INFO - __main__ - Step 9823: {'lr': 0.000495408691654965, 'samples': 471504, 'steps': 9822, 'loss/train': 2.2707712650299072} +07/25/2024 12:11:41 - INFO - __main__ - Step 9824: {'lr': 0.0004954076880481146, 'samples': 471552, 'steps': 9823, 'loss/train': 1.9035060405731201} +07/25/2024 12:11:42 - INFO - __main__ - Step 9825: {'lr': 0.0004954066843326046, 'samples': 471600, 'steps': 9824, 'loss/train': 1.3987094163894653} +07/25/2024 12:11:42 - INFO - __main__ - Step 9826: {'lr': 0.0004954056805084354, 'samples': 471648, 'steps': 9825, 'loss/train': 2.801875591278076} +07/25/2024 12:11:42 - INFO - __main__ - Step 9827: {'lr': 0.0004954046765756073, 'samples': 471696, 'steps': 9826, 'loss/train': 2.262542486190796} +07/25/2024 12:11:43 - INFO - __main__ - Step 9828: {'lr': 0.0004954036725341211, 'samples': 471744, 'steps': 9827, 'loss/train': 1.554627537727356} +07/25/2024 12:11:43 - INFO - __main__ - Step 9829: {'lr': 0.000495402668383977, 'samples': 471792, 'steps': 9828, 'loss/train': 1.7271065711975098} +07/25/2024 12:11:43 - INFO - __main__ - Step 9830: {'lr': 0.0004954016641251754, 'samples': 471840, 'steps': 9829, 'loss/train': 4.134598731994629} +07/25/2024 12:11:43 - INFO - __main__ - Step 9831: {'lr': 0.0004954006597577168, 'samples': 471888, 'steps': 9830, 'loss/train': 1.7511718273162842} +07/25/2024 12:11:44 - INFO - __main__ - Step 9832: {'lr': 0.0004953996552816016, 'samples': 471936, 'steps': 9831, 'loss/train': 2.303227186203003} +07/25/2024 12:11:44 - INFO - __main__ - Step 9833: {'lr': 0.0004953986506968306, 'samples': 471984, 'steps': 9832, 'loss/train': 1.607843041419983} +07/25/2024 12:11:44 - INFO - __main__ - Step 9834: {'lr': 0.0004953976460034037, 'samples': 472032, 'steps': 9833, 'loss/train': 3.144477128982544} +07/25/2024 12:11:45 - INFO - __main__ - Step 9835: {'lr': 0.0004953966412013217, 'samples': 472080, 'steps': 9834, 'loss/train': 2.5846736431121826} +07/25/2024 12:11:45 - INFO - __main__ - Step 9836: {'lr': 0.0004953956362905848, 'samples': 472128, 'steps': 9835, 'loss/train': 1.6999553442001343} +07/25/2024 12:11:45 - INFO - __main__ - Step 9837: {'lr': 0.0004953946312711938, 'samples': 472176, 'steps': 9836, 'loss/train': 2.108152151107788} +07/25/2024 12:11:45 - INFO - __main__ - Step 9838: {'lr': 0.0004953936261431487, 'samples': 472224, 'steps': 9837, 'loss/train': 2.0965094566345215} +07/25/2024 12:11:46 - INFO - __main__ - Step 9839: {'lr': 0.0004953926209064505, 'samples': 472272, 'steps': 9838, 'loss/train': 1.9491851329803467} +07/25/2024 12:11:46 - INFO - __main__ - Step 9840: {'lr': 0.000495391615561099, 'samples': 472320, 'steps': 9839, 'loss/train': 1.9068299531936646} +07/25/2024 12:11:46 - INFO - __main__ - Step 9841: {'lr': 0.0004953906101070951, 'samples': 472368, 'steps': 9840, 'loss/train': 1.91497004032135} +07/25/2024 12:11:47 - INFO - __main__ - Step 9842: {'lr': 0.0004953896045444392, 'samples': 472416, 'steps': 9841, 'loss/train': 1.481073021888733} +07/25/2024 12:11:47 - INFO - __main__ - Step 9843: {'lr': 0.0004953885988731315, 'samples': 472464, 'steps': 9842, 'loss/train': 2.1200637817382812} +07/25/2024 12:11:47 - INFO - __main__ - Step 9844: {'lr': 0.0004953875930931727, 'samples': 472512, 'steps': 9843, 'loss/train': 1.4799284934997559} +07/25/2024 12:11:47 - INFO - __main__ - Step 9845: {'lr': 0.0004953865872045631, 'samples': 472560, 'steps': 9844, 'loss/train': 2.1631650924682617} +07/25/2024 12:11:48 - INFO - __main__ - Step 9846: {'lr': 0.0004953855812073033, 'samples': 472608, 'steps': 9845, 'loss/train': 2.2817158699035645} +07/25/2024 12:11:48 - INFO - __main__ - Step 9847: {'lr': 0.0004953845751013936, 'samples': 472656, 'steps': 9846, 'loss/train': 2.4118504524230957} +07/25/2024 12:11:48 - INFO - __main__ - Step 9848: {'lr': 0.0004953835688868343, 'samples': 472704, 'steps': 9847, 'loss/train': 2.5342259407043457} +07/25/2024 12:11:48 - DEBUG - datasets.packaged_modules.json.json - Batch of 10991053 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 12:11:49 - INFO - __main__ - Step 9849: {'lr': 0.0004953825625636263, 'samples': 472752, 'steps': 9848, 'loss/train': 1.4093166589736938} +07/25/2024 12:11:49 - INFO - __main__ - Step 9850: {'lr': 0.0004953815561317696, 'samples': 472800, 'steps': 9849, 'loss/train': 2.2449352741241455} +07/25/2024 12:11:49 - INFO - __main__ - Step 9851: {'lr': 0.0004953805495912649, 'samples': 472848, 'steps': 9850, 'loss/train': 2.376260280609131} +07/25/2024 12:11:49 - INFO - __main__ - Step 9852: {'lr': 0.0004953795429421125, 'samples': 472896, 'steps': 9851, 'loss/train': 2.3382508754730225} +07/25/2024 12:11:50 - INFO - __main__ - Step 9853: {'lr': 0.000495378536184313, 'samples': 472944, 'steps': 9852, 'loss/train': 1.6476885080337524} +07/25/2024 12:11:50 - DEBUG - datasets.packaged_modules.json.json - Batch of 10510740 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 12:11:50 - DEBUG - datasets.packaged_modules.json.json - Batch of 10510740 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 12:11:50 - INFO - __main__ - Step 9854: {'lr': 0.0004953775293178666, 'samples': 472992, 'steps': 9853, 'loss/train': 2.6871824264526367} +07/25/2024 12:11:50 - INFO - __main__ - Step 9855: {'lr': 0.0004953765223427741, 'samples': 473040, 'steps': 9854, 'loss/train': 1.622288465499878} +07/25/2024 12:11:51 - INFO - __main__ - Step 9856: {'lr': 0.0004953755152590357, 'samples': 473088, 'steps': 9855, 'loss/train': 1.4976918697357178} +07/25/2024 12:11:51 - INFO - __main__ - Step 9857: {'lr': 0.0004953745080666518, 'samples': 473136, 'steps': 9856, 'loss/train': 1.7109293937683105} +07/25/2024 12:11:51 - INFO - __main__ - Step 9858: {'lr': 0.000495373500765623, 'samples': 473184, 'steps': 9857, 'loss/train': 1.4035166501998901} +07/25/2024 12:11:51 - INFO - __main__ - Step 9859: {'lr': 0.0004953724933559498, 'samples': 473232, 'steps': 9858, 'loss/train': 1.9155018329620361} +07/25/2024 12:11:52 - INFO - __main__ - Step 9860: {'lr': 0.0004953714858376324, 'samples': 473280, 'steps': 9859, 'loss/train': 2.131350517272949} +07/25/2024 12:11:52 - INFO - __main__ - Step 9861: {'lr': 0.0004953704782106714, 'samples': 473328, 'steps': 9860, 'loss/train': 2.031562566757202} +07/25/2024 12:11:52 - INFO - __main__ - Step 9862: {'lr': 0.0004953694704750673, 'samples': 473376, 'steps': 9861, 'loss/train': 1.5281685590744019} +07/25/2024 12:11:52 - INFO - __main__ - Step 9863: {'lr': 0.0004953684626308204, 'samples': 473424, 'steps': 9862, 'loss/train': 2.1522488594055176} +07/25/2024 12:11:53 - INFO - __main__ - Step 9864: {'lr': 0.0004953674546779313, 'samples': 473472, 'steps': 9863, 'loss/train': 2.0263545513153076} +07/25/2024 12:11:53 - INFO - __main__ - Step 9865: {'lr': 0.0004953664466164003, 'samples': 473520, 'steps': 9864, 'loss/train': 1.5679677724838257} +07/25/2024 12:11:53 - INFO - __main__ - Step 9866: {'lr': 0.0004953654384462279, 'samples': 473568, 'steps': 9865, 'loss/train': 2.3359460830688477} +07/25/2024 12:11:54 - INFO - __main__ - Step 9867: {'lr': 0.0004953644301674146, 'samples': 473616, 'steps': 9866, 'loss/train': 2.3522212505340576} +07/25/2024 12:11:54 - INFO - __main__ - Step 9868: {'lr': 0.0004953634217799608, 'samples': 473664, 'steps': 9867, 'loss/train': 1.944246530532837} +07/25/2024 12:11:54 - INFO - __main__ - Step 9869: {'lr': 0.0004953624132838669, 'samples': 473712, 'steps': 9868, 'loss/train': 2.246891498565674} +07/25/2024 12:11:54 - INFO - __main__ - Step 9870: {'lr': 0.0004953614046791335, 'samples': 473760, 'steps': 9869, 'loss/train': 2.1385042667388916} +07/25/2024 12:11:55 - INFO - __main__ - Step 9871: {'lr': 0.0004953603959657608, 'samples': 473808, 'steps': 9870, 'loss/train': 2.1418464183807373} +07/25/2024 12:11:55 - DEBUG - datasets.packaged_modules.json.json - Batch of 10504920 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 12:11:55 - INFO - __main__ - Step 9872: {'lr': 0.0004953593871437495, 'samples': 473856, 'steps': 9871, 'loss/train': 1.9975314140319824} +07/25/2024 12:11:55 - INFO - __main__ - Step 9873: {'lr': 0.0004953583782130999, 'samples': 473904, 'steps': 9872, 'loss/train': 1.644331455230713} +07/25/2024 12:11:56 - INFO - __main__ - Step 9874: {'lr': 0.0004953573691738125, 'samples': 473952, 'steps': 9873, 'loss/train': 1.9079875946044922} +07/25/2024 12:11:56 - INFO - __main__ - Step 9875: {'lr': 0.0004953563600258876, 'samples': 474000, 'steps': 9874, 'loss/train': 2.158536672592163} +07/25/2024 12:11:56 - INFO - __main__ - Step 9876: {'lr': 0.0004953553507693261, 'samples': 474048, 'steps': 9875, 'loss/train': 1.9547282457351685} +07/25/2024 12:11:56 - INFO - __main__ - Step 9877: {'lr': 0.0004953543414041278, 'samples': 474096, 'steps': 9876, 'loss/train': 2.345489740371704} +07/25/2024 12:11:57 - INFO - __main__ - Step 9878: {'lr': 0.0004953533319302936, 'samples': 474144, 'steps': 9877, 'loss/train': 1.8661675453186035} +07/25/2024 12:11:57 - INFO - __main__ - Step 9879: {'lr': 0.0004953523223478239, 'samples': 474192, 'steps': 9878, 'loss/train': 1.4174458980560303} +07/25/2024 12:11:57 - INFO - __main__ - Step 9880: {'lr': 0.000495351312656719, 'samples': 474240, 'steps': 9879, 'loss/train': 1.7333465814590454} +07/25/2024 12:11:58 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488778 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 12:11:58 - INFO - __main__ - Step 9881: {'lr': 0.0004953503028569794, 'samples': 474288, 'steps': 9880, 'loss/train': 2.0389139652252197} +07/25/2024 12:11:58 - INFO - __main__ - Step 9882: {'lr': 0.0004953492929486055, 'samples': 474336, 'steps': 9881, 'loss/train': 1.1642704010009766} +07/25/2024 12:11:58 - INFO - __main__ - Step 9883: {'lr': 0.0004953482829315979, 'samples': 474384, 'steps': 9882, 'loss/train': 1.8245162963867188} +07/25/2024 12:11:58 - INFO - __main__ - Step 9884: {'lr': 0.0004953472728059569, 'samples': 474432, 'steps': 9883, 'loss/train': 1.9973955154418945} +07/25/2024 12:11:59 - INFO - __main__ - Step 9885: {'lr': 0.0004953462625716831, 'samples': 474480, 'steps': 9884, 'loss/train': 1.3530302047729492} +07/25/2024 12:11:59 - INFO - __main__ - Step 9886: {'lr': 0.0004953452522287767, 'samples': 474528, 'steps': 9885, 'loss/train': 1.9265888929367065} +07/25/2024 12:11:59 - INFO - __main__ - Step 9887: {'lr': 0.0004953442417772384, 'samples': 474576, 'steps': 9886, 'loss/train': 2.0858755111694336} +07/25/2024 12:12:00 - INFO - __main__ - Step 9888: {'lr': 0.0004953432312170686, 'samples': 474624, 'steps': 9887, 'loss/train': 2.109705924987793} +07/25/2024 12:12:00 - INFO - __main__ - Step 9889: {'lr': 0.0004953422205482675, 'samples': 474672, 'steps': 9888, 'loss/train': 2.320852279663086} +07/25/2024 12:12:00 - INFO - __main__ - Step 9890: {'lr': 0.0004953412097708358, 'samples': 474720, 'steps': 9889, 'loss/train': 2.024834156036377} +07/25/2024 12:12:00 - INFO - __main__ - Step 9891: {'lr': 0.000495340198884774, 'samples': 474768, 'steps': 9890, 'loss/train': 0.576071560382843} +07/25/2024 12:12:01 - INFO - __main__ - Step 9892: {'lr': 0.0004953391878900823, 'samples': 474816, 'steps': 9891, 'loss/train': 2.031486749649048} +07/25/2024 12:12:01 - INFO - __main__ - Step 9893: {'lr': 0.0004953381767867615, 'samples': 474864, 'steps': 9892, 'loss/train': 2.1962950229644775} +07/25/2024 12:12:01 - INFO - __main__ - Step 9894: {'lr': 0.0004953371655748117, 'samples': 474912, 'steps': 9893, 'loss/train': 2.7614240646362305} +07/25/2024 12:12:02 - INFO - __main__ - Step 9895: {'lr': 0.0004953361542542334, 'samples': 474960, 'steps': 9894, 'loss/train': 2.028986692428589} +07/25/2024 12:12:02 - INFO - __main__ - Step 9896: {'lr': 0.0004953351428250272, 'samples': 475008, 'steps': 9895, 'loss/train': 1.9542683362960815} +07/25/2024 12:12:02 - INFO - __main__ - Step 9897: {'lr': 0.0004953341312871934, 'samples': 475056, 'steps': 9896, 'loss/train': 1.651660442352295} +07/25/2024 12:12:02 - INFO - __main__ - Step 9898: {'lr': 0.0004953331196407326, 'samples': 475104, 'steps': 9897, 'loss/train': 1.7692241668701172} +07/25/2024 12:12:03 - INFO - __main__ - Step 9899: {'lr': 0.0004953321078856451, 'samples': 475152, 'steps': 9898, 'loss/train': 2.075016736984253} +07/25/2024 12:12:03 - INFO - __main__ - Step 9900: {'lr': 0.0004953310960219315, 'samples': 475200, 'steps': 9899, 'loss/train': 2.238151788711548} +07/25/2024 12:12:03 - INFO - __main__ - Step 9901: {'lr': 0.0004953300840495921, 'samples': 475248, 'steps': 9900, 'loss/train': 2.0864241123199463} +07/25/2024 12:12:04 - INFO - __main__ - Step 9902: {'lr': 0.0004953290719686274, 'samples': 475296, 'steps': 9901, 'loss/train': 2.016275644302368} +07/25/2024 12:12:04 - DEBUG - datasets.packaged_modules.json.json - Batch of 10504939 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 12:12:04 - INFO - __main__ - Step 9903: {'lr': 0.0004953280597790379, 'samples': 475344, 'steps': 9902, 'loss/train': 0.6958889365196228} +07/25/2024 12:12:04 - INFO - __main__ - Step 9904: {'lr': 0.0004953270474808241, 'samples': 475392, 'steps': 9903, 'loss/train': 1.98786199092865} +07/25/2024 12:12:04 - INFO - __main__ - Step 9905: {'lr': 0.0004953260350739863, 'samples': 475440, 'steps': 9904, 'loss/train': 2.940763235092163} +07/25/2024 12:12:05 - INFO - __main__ - Step 9906: {'lr': 0.0004953250225585249, 'samples': 475488, 'steps': 9905, 'loss/train': 1.100536584854126} +07/25/2024 12:12:05 - INFO - __main__ - Step 9907: {'lr': 0.0004953240099344406, 'samples': 475536, 'steps': 9906, 'loss/train': 2.205853223800659} +07/25/2024 12:12:05 - INFO - __main__ - Step 9908: {'lr': 0.0004953229972017336, 'samples': 475584, 'steps': 9907, 'loss/train': 1.8632115125656128} +07/25/2024 12:12:06 - INFO - __main__ - Step 9909: {'lr': 0.0004953219843604045, 'samples': 475632, 'steps': 9908, 'loss/train': 1.681586503982544} +07/25/2024 12:12:06 - INFO - __main__ - Step 9910: {'lr': 0.0004953209714104536, 'samples': 475680, 'steps': 9909, 'loss/train': 2.1624703407287598} +07/25/2024 12:12:06 - INFO - __main__ - Step 9911: {'lr': 0.0004953199583518816, 'samples': 475728, 'steps': 9910, 'loss/train': 1.8840579986572266} +07/25/2024 12:12:06 - INFO - __main__ - Step 9912: {'lr': 0.0004953189451846887, 'samples': 475776, 'steps': 9911, 'loss/train': 1.8764499425888062} +07/25/2024 12:12:07 - INFO - __main__ - Step 9913: {'lr': 0.0004953179319088755, 'samples': 475824, 'steps': 9912, 'loss/train': 1.8562551736831665} +07/25/2024 12:12:07 - INFO - __main__ - Step 9914: {'lr': 0.0004953169185244424, 'samples': 475872, 'steps': 9913, 'loss/train': 1.8710030317306519} +07/25/2024 12:12:07 - INFO - __main__ - Step 9915: {'lr': 0.0004953159050313898, 'samples': 475920, 'steps': 9914, 'loss/train': 0.27398681640625} +07/25/2024 12:12:08 - INFO - __main__ - Step 9916: {'lr': 0.0004953148914297182, 'samples': 475968, 'steps': 9915, 'loss/train': 2.0881879329681396} +07/25/2024 12:12:08 - INFO - __main__ - Step 9917: {'lr': 0.0004953138777194279, 'samples': 476016, 'steps': 9916, 'loss/train': 2.0337631702423096} +07/25/2024 12:12:08 - INFO - __main__ - Step 9918: {'lr': 0.0004953128639005196, 'samples': 476064, 'steps': 9917, 'loss/train': 2.3174452781677246} +07/25/2024 12:12:08 - INFO - __main__ - Step 9919: {'lr': 0.0004953118499729937, 'samples': 476112, 'steps': 9918, 'loss/train': 1.9621788263320923} +07/25/2024 12:12:09 - INFO - __main__ - Step 9920: {'lr': 0.0004953108359368507, 'samples': 476160, 'steps': 9919, 'loss/train': 1.9979054927825928} +07/25/2024 12:12:09 - INFO - __main__ - Step 9921: {'lr': 0.0004953098217920907, 'samples': 476208, 'steps': 9920, 'loss/train': 0.9190749526023865} +07/25/2024 12:12:09 - INFO - __main__ - Step 9922: {'lr': 0.0004953088075387145, 'samples': 476256, 'steps': 9921, 'loss/train': 1.96616792678833} +07/25/2024 12:12:10 - INFO - __main__ - Step 9923: {'lr': 0.0004953077931767224, 'samples': 476304, 'steps': 9922, 'loss/train': 1.7798031568527222} +07/25/2024 12:12:10 - INFO - __main__ - Step 9924: {'lr': 0.0004953067787061148, 'samples': 476352, 'steps': 9923, 'loss/train': 2.4955894947052} +07/25/2024 12:12:10 - INFO - __main__ - Step 9925: {'lr': 0.0004953057641268923, 'samples': 476400, 'steps': 9924, 'loss/train': 2.235795736312866} +07/25/2024 12:12:10 - INFO - __main__ - Step 9926: {'lr': 0.0004953047494390553, 'samples': 476448, 'steps': 9925, 'loss/train': 2.197497606277466} +07/25/2024 12:12:11 - INFO - __main__ - Step 9927: {'lr': 0.0004953037346426042, 'samples': 476496, 'steps': 9926, 'loss/train': 1.69162917137146} +07/25/2024 12:12:11 - INFO - __main__ - Step 9928: {'lr': 0.0004953027197375396, 'samples': 476544, 'steps': 9927, 'loss/train': 1.7806284427642822} +07/25/2024 12:12:11 - INFO - __main__ - Step 9929: {'lr': 0.0004953017047238617, 'samples': 476592, 'steps': 9928, 'loss/train': 1.2521871328353882} +07/25/2024 12:12:12 - INFO - __main__ - Step 9930: {'lr': 0.0004953006896015711, 'samples': 476640, 'steps': 9929, 'loss/train': 2.0085299015045166} +07/25/2024 12:12:12 - INFO - __main__ - Step 9931: {'lr': 0.0004952996743706683, 'samples': 476688, 'steps': 9930, 'loss/train': 2.1527998447418213} +07/25/2024 12:12:12 - INFO - __main__ - Step 9932: {'lr': 0.0004952986590311536, 'samples': 476736, 'steps': 9931, 'loss/train': 2.1815404891967773} +07/25/2024 12:12:12 - INFO - __main__ - Step 9933: {'lr': 0.0004952976435830275, 'samples': 476784, 'steps': 9932, 'loss/train': 1.830151915550232} +07/25/2024 12:12:13 - INFO - __main__ - Step 9934: {'lr': 0.0004952966280262906, 'samples': 476832, 'steps': 9933, 'loss/train': 2.215540647506714} +07/25/2024 12:12:13 - INFO - __main__ - Step 9935: {'lr': 0.0004952956123609431, 'samples': 476880, 'steps': 9934, 'loss/train': 1.8357007503509521} +07/25/2024 12:12:13 - INFO - __main__ - Step 9936: {'lr': 0.0004952945965869856, 'samples': 476928, 'steps': 9935, 'loss/train': 1.9374910593032837} +07/25/2024 12:12:14 - INFO - __main__ - Step 9937: {'lr': 0.0004952935807044185, 'samples': 476976, 'steps': 9936, 'loss/train': 1.9098176956176758} +07/25/2024 12:12:14 - INFO - __main__ - Step 9938: {'lr': 0.0004952925647132425, 'samples': 477024, 'steps': 9937, 'loss/train': 1.7906218767166138} +07/25/2024 12:12:14 - INFO - __main__ - Step 9939: {'lr': 0.0004952915486134575, 'samples': 477072, 'steps': 9938, 'loss/train': 0.23149019479751587} +07/25/2024 12:12:14 - INFO - __main__ - Step 9940: {'lr': 0.0004952905324050645, 'samples': 477120, 'steps': 9939, 'loss/train': 1.9025663137435913} +07/25/2024 12:12:15 - INFO - __main__ - Step 9941: {'lr': 0.0004952895160880636, 'samples': 477168, 'steps': 9940, 'loss/train': 1.920467734336853} +07/25/2024 12:12:15 - INFO - __main__ - Step 9942: {'lr': 0.0004952884996624555, 'samples': 477216, 'steps': 9941, 'loss/train': 1.9551920890808105} +07/25/2024 12:12:15 - INFO - __main__ - Step 9943: {'lr': 0.0004952874831282404, 'samples': 477264, 'steps': 9942, 'loss/train': 1.8091914653778076} +07/25/2024 12:12:16 - INFO - __main__ - Step 9944: {'lr': 0.0004952864664854189, 'samples': 477312, 'steps': 9943, 'loss/train': 2.3000500202178955} +07/25/2024 12:12:16 - INFO - __main__ - Step 9945: {'lr': 0.0004952854497339915, 'samples': 477360, 'steps': 9944, 'loss/train': 1.9136197566986084} +07/25/2024 12:12:16 - INFO - __main__ - Step 9946: {'lr': 0.0004952844328739585, 'samples': 477408, 'steps': 9945, 'loss/train': 1.010848045349121} +07/25/2024 12:12:16 - INFO - __main__ - Step 9947: {'lr': 0.0004952834159053204, 'samples': 477456, 'steps': 9946, 'loss/train': 2.226268768310547} +07/25/2024 12:12:17 - INFO - __main__ - Step 9948: {'lr': 0.0004952823988280778, 'samples': 477504, 'steps': 9947, 'loss/train': 2.2339868545532227} +07/25/2024 12:12:17 - INFO - __main__ - Step 9949: {'lr': 0.0004952813816422309, 'samples': 477552, 'steps': 9948, 'loss/train': 1.9708843231201172} +07/25/2024 12:12:17 - INFO - __main__ - Step 9950: {'lr': 0.0004952803643477804, 'samples': 477600, 'steps': 9949, 'loss/train': 1.541074514389038} +07/25/2024 12:12:17 - INFO - __main__ - Step 9951: {'lr': 0.0004952793469447266, 'samples': 477648, 'steps': 9950, 'loss/train': 2.1604690551757812} +07/25/2024 12:12:18 - DEBUG - datasets.packaged_modules.json.json - Batch of 10491107 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 12:12:18 - INFO - __main__ - Step 9952: {'lr': 0.0004952783294330698, 'samples': 477696, 'steps': 9951, 'loss/train': 2.1130027770996094} +07/25/2024 12:12:18 - INFO - __main__ - Step 9953: {'lr': 0.0004952773118128108, 'samples': 477744, 'steps': 9952, 'loss/train': 0.7562810778617859} +07/25/2024 12:12:18 - INFO - __main__ - Step 9954: {'lr': 0.0004952762940839499, 'samples': 477792, 'steps': 9953, 'loss/train': 1.7770241498947144} +07/25/2024 12:12:19 - INFO - __main__ - Step 9955: {'lr': 0.0004952752762464875, 'samples': 477840, 'steps': 9954, 'loss/train': 1.863222599029541} +07/25/2024 12:12:19 - INFO - __main__ - Step 9956: {'lr': 0.0004952742583004241, 'samples': 477888, 'steps': 9955, 'loss/train': 1.2895687818527222} +07/25/2024 12:12:19 - INFO - __main__ - Step 9957: {'lr': 0.00049527324024576, 'samples': 477936, 'steps': 9956, 'loss/train': 1.9643734693527222} +07/25/2024 12:12:19 - INFO - __main__ - Step 9958: {'lr': 0.0004952722220824959, 'samples': 477984, 'steps': 9957, 'loss/train': 1.891387701034546} +07/25/2024 12:12:20 - INFO - __main__ - Step 9959: {'lr': 0.000495271203810632, 'samples': 478032, 'steps': 9958, 'loss/train': 2.417772054672241} +07/25/2024 12:12:20 - INFO - __main__ - Step 9960: {'lr': 0.0004952701854301691, 'samples': 478080, 'steps': 9959, 'loss/train': 2.0564377307891846} +07/25/2024 12:12:20 - INFO - __main__ - Step 9961: {'lr': 0.0004952691669411073, 'samples': 478128, 'steps': 9960, 'loss/train': 1.9887083768844604} +07/25/2024 12:12:21 - INFO - __main__ - Step 9962: {'lr': 0.0004952681483434472, 'samples': 478176, 'steps': 9961, 'loss/train': 2.2442543506622314} +07/25/2024 12:12:21 - INFO - __main__ - Step 9963: {'lr': 0.0004952671296371892, 'samples': 478224, 'steps': 9962, 'loss/train': 0.19640475511550903} +07/25/2024 12:12:21 - INFO - __main__ - Step 9964: {'lr': 0.0004952661108223339, 'samples': 478272, 'steps': 9963, 'loss/train': 1.628688931465149} +07/25/2024 12:12:21 - INFO - __main__ - Step 9965: {'lr': 0.0004952650918988816, 'samples': 478320, 'steps': 9964, 'loss/train': 2.338158130645752} +07/25/2024 12:12:22 - INFO - __main__ - Step 9966: {'lr': 0.0004952640728668327, 'samples': 478368, 'steps': 9965, 'loss/train': 1.8967206478118896} +07/25/2024 12:12:22 - INFO - __main__ - Step 9967: {'lr': 0.0004952630537261878, 'samples': 478416, 'steps': 9966, 'loss/train': 2.136568784713745} +07/25/2024 12:12:22 - INFO - __main__ - Step 9968: {'lr': 0.0004952620344769473, 'samples': 478464, 'steps': 9967, 'loss/train': 2.0652315616607666} +07/25/2024 12:12:23 - INFO - __main__ - Step 9969: {'lr': 0.0004952610151191116, 'samples': 478512, 'steps': 9968, 'loss/train': 2.2584660053253174} +07/25/2024 12:12:23 - INFO - __main__ - Step 9970: {'lr': 0.0004952599956526812, 'samples': 478560, 'steps': 9969, 'loss/train': 1.7653034925460815} +07/25/2024 12:12:23 - INFO - __main__ - Step 9971: {'lr': 0.0004952589760776566, 'samples': 478608, 'steps': 9970, 'loss/train': 1.9831056594848633} +07/25/2024 12:12:23 - INFO - __main__ - Step 9972: {'lr': 0.0004952579563940381, 'samples': 478656, 'steps': 9971, 'loss/train': 2.1169514656066895} +07/25/2024 12:12:24 - INFO - __main__ - Step 9973: {'lr': 0.0004952569366018264, 'samples': 478704, 'steps': 9972, 'loss/train': 1.8094125986099243} +07/25/2024 12:12:24 - INFO - __main__ - Step 9974: {'lr': 0.0004952559167010217, 'samples': 478752, 'steps': 9973, 'loss/train': 1.6522457599639893} +07/25/2024 12:12:24 - INFO - __main__ - Step 9975: {'lr': 0.0004952548966916246, 'samples': 478800, 'steps': 9974, 'loss/train': 2.204210042953491} +07/25/2024 12:12:25 - INFO - __main__ - Step 9976: {'lr': 0.0004952538765736355, 'samples': 478848, 'steps': 9975, 'loss/train': 2.0376250743865967} +07/25/2024 12:12:25 - INFO - __main__ - Step 9977: {'lr': 0.0004952528563470547, 'samples': 478896, 'steps': 9976, 'loss/train': 1.3249481916427612} +07/25/2024 12:12:25 - INFO - __main__ - Step 9978: {'lr': 0.000495251836011883, 'samples': 478944, 'steps': 9977, 'loss/train': 2.2963249683380127} +07/25/2024 12:12:25 - INFO - __main__ - Step 9979: {'lr': 0.0004952508155681207, 'samples': 478992, 'steps': 9978, 'loss/train': 2.000687599182129} +07/25/2024 12:12:26 - INFO - __main__ - Step 9980: {'lr': 0.0004952497950157681, 'samples': 479040, 'steps': 9979, 'loss/train': 0.8895371556282043} +07/25/2024 12:12:26 - INFO - __main__ - Step 9981: {'lr': 0.0004952487743548257, 'samples': 479088, 'steps': 9980, 'loss/train': 2.3735029697418213} +07/25/2024 12:12:26 - INFO - __main__ - Step 9982: {'lr': 0.0004952477535852941, 'samples': 479136, 'steps': 9981, 'loss/train': 1.9159550666809082} +07/25/2024 12:12:27 - INFO - __main__ - Step 9983: {'lr': 0.0004952467327071737, 'samples': 479184, 'steps': 9982, 'loss/train': 1.6984964609146118} +07/25/2024 12:12:27 - INFO - __main__ - Step 9984: {'lr': 0.0004952457117204647, 'samples': 479232, 'steps': 9983, 'loss/train': 1.8166536092758179} +07/25/2024 12:12:27 - INFO - __main__ - Step 9985: {'lr': 0.000495244690625168, 'samples': 479280, 'steps': 9984, 'loss/train': 1.9081875085830688} +07/25/2024 12:12:27 - INFO - __main__ - Step 9986: {'lr': 0.0004952436694212838, 'samples': 479328, 'steps': 9985, 'loss/train': 2.232673406600952} +07/25/2024 12:12:28 - INFO - __main__ - Step 9987: {'lr': 0.0004952426481088125, 'samples': 479376, 'steps': 9986, 'loss/train': 0.17907647788524628} +07/25/2024 12:12:28 - INFO - __main__ - Step 9988: {'lr': 0.0004952416266877546, 'samples': 479424, 'steps': 9987, 'loss/train': 2.3653829097747803} +07/25/2024 12:12:28 - INFO - __main__ - Step 9989: {'lr': 0.0004952406051581108, 'samples': 479472, 'steps': 9988, 'loss/train': 1.860629677772522} +07/25/2024 12:12:29 - INFO - __main__ - Step 9990: {'lr': 0.0004952395835198811, 'samples': 479520, 'steps': 9989, 'loss/train': 1.9054317474365234} +07/25/2024 12:12:29 - INFO - __main__ - Step 9991: {'lr': 0.0004952385617730663, 'samples': 479568, 'steps': 9990, 'loss/train': 1.9998499155044556} +07/25/2024 12:12:29 - INFO - __main__ - Step 9992: {'lr': 0.0004952375399176667, 'samples': 479616, 'steps': 9991, 'loss/train': 2.1062674522399902} +07/25/2024 12:12:29 - INFO - __main__ - Step 9993: {'lr': 0.0004952365179536828, 'samples': 479664, 'steps': 9992, 'loss/train': 2.104299783706665} +07/25/2024 12:12:30 - INFO - __main__ - Step 9994: {'lr': 0.0004952354958811149, 'samples': 479712, 'steps': 9993, 'loss/train': 1.7614575624465942} +07/25/2024 12:12:30 - INFO - __main__ - Step 9995: {'lr': 0.0004952344736999638, 'samples': 479760, 'steps': 9994, 'loss/train': 1.8137586116790771} +07/25/2024 12:12:30 - INFO - __main__ - Step 9996: {'lr': 0.0004952334514102297, 'samples': 479808, 'steps': 9995, 'loss/train': 2.167727470397949} +07/25/2024 12:12:31 - INFO - __main__ - Step 9997: {'lr': 0.000495232429011913, 'samples': 479856, 'steps': 9996, 'loss/train': 2.1694133281707764} +07/25/2024 12:12:31 - INFO - __main__ - Step 9998: {'lr': 0.0004952314065050143, 'samples': 479904, 'steps': 9997, 'loss/train': 1.7100335359573364} +07/25/2024 12:12:31 - INFO - __main__ - Step 9999: {'lr': 0.000495230383889534, 'samples': 479952, 'steps': 9998, 'loss/train': 3.626089096069336} +07/25/2024 12:12:31 - INFO - __main__ - Step 10000: {'lr': 0.0004952293611654727, 'samples': 480000, 'steps': 9999, 'loss/train': 1.620881199836731} +07/25/2024 12:12:32 - INFO - __main__ - Step 10001: {'lr': 0.0004952283383328305, 'samples': 480048, 'steps': 10000, 'loss/train': 2.2893640995025635} +07/25/2024 12:12:32 - INFO - __main__ - Step 10002: {'lr': 0.0004952273153916081, 'samples': 480096, 'steps': 10001, 'loss/train': 2.2721445560455322} +07/25/2024 12:12:32 - INFO - __main__ - Step 10003: {'lr': 0.000495226292341806, 'samples': 480144, 'steps': 10002, 'loss/train': 1.8688530921936035} +07/25/2024 12:12:33 - INFO - __main__ - Step 10004: {'lr': 0.0004952252691834245, 'samples': 480192, 'steps': 10003, 'loss/train': 0.920895516872406} +07/25/2024 12:12:33 - INFO - __main__ - Step 10005: {'lr': 0.0004952242459164642, 'samples': 480240, 'steps': 10004, 'loss/train': 2.100407600402832} +07/25/2024 12:12:33 - INFO - __main__ - Step 10006: {'lr': 0.0004952232225409254, 'samples': 480288, 'steps': 10005, 'loss/train': 2.1617753505706787} +07/25/2024 12:12:33 - INFO - __main__ - Step 10007: {'lr': 0.0004952221990568087, 'samples': 480336, 'steps': 10006, 'loss/train': 1.6404504776000977} +07/25/2024 12:12:34 - INFO - __main__ - Step 10008: {'lr': 0.0004952211754641145, 'samples': 480384, 'steps': 10007, 'loss/train': 1.8554396629333496} +07/25/2024 12:12:34 - INFO - __main__ - Step 10009: {'lr': 0.0004952201517628432, 'samples': 480432, 'steps': 10008, 'loss/train': 1.894025206565857} +07/25/2024 12:12:34 - INFO - __main__ - Step 10010: {'lr': 0.0004952191279529952, 'samples': 480480, 'steps': 10009, 'loss/train': 1.8477665185928345} +07/25/2024 12:12:35 - INFO - __main__ - Step 10011: {'lr': 0.0004952181040345712, 'samples': 480528, 'steps': 10010, 'loss/train': 0.21849565207958221} +07/25/2024 12:12:35 - INFO - __main__ - Step 10012: {'lr': 0.0004952170800075714, 'samples': 480576, 'steps': 10011, 'loss/train': 1.6421644687652588} +07/25/2024 12:12:35 - INFO - __main__ - Step 10013: {'lr': 0.0004952160558719963, 'samples': 480624, 'steps': 10012, 'loss/train': 2.0351617336273193} +07/25/2024 12:12:35 - INFO - __main__ - Step 10014: {'lr': 0.0004952150316278464, 'samples': 480672, 'steps': 10013, 'loss/train': 1.6184868812561035} +07/25/2024 12:12:36 - INFO - __main__ - Step 10015: {'lr': 0.0004952140072751222, 'samples': 480720, 'steps': 10014, 'loss/train': 1.4829621315002441} +07/25/2024 12:12:36 - INFO - __main__ - Step 10016: {'lr': 0.0004952129828138241, 'samples': 480768, 'steps': 10015, 'loss/train': 2.220879316329956} +07/25/2024 12:12:36 - INFO - __main__ - Step 10017: {'lr': 0.0004952119582439526, 'samples': 480816, 'steps': 10016, 'loss/train': 1.786523461341858} +07/25/2024 12:12:36 - INFO - __main__ - Step 10018: {'lr': 0.0004952109335655082, 'samples': 480864, 'steps': 10017, 'loss/train': 1.037176489830017} +07/25/2024 12:12:37 - INFO - __main__ - Step 10019: {'lr': 0.0004952099087784911, 'samples': 480912, 'steps': 10018, 'loss/train': 1.40753972530365} +07/25/2024 12:12:37 - INFO - __main__ - Step 10020: {'lr': 0.000495208883882902, 'samples': 480960, 'steps': 10019, 'loss/train': 1.749956727027893} +07/25/2024 12:12:37 - INFO - __main__ - Step 10021: {'lr': 0.0004952078588787412, 'samples': 481008, 'steps': 10020, 'loss/train': 1.671415090560913} +07/25/2024 12:12:38 - INFO - __main__ - Step 10022: {'lr': 0.0004952068337660092, 'samples': 481056, 'steps': 10021, 'loss/train': 2.4485557079315186} +07/25/2024 12:12:38 - INFO - __main__ - Step 10023: {'lr': 0.0004952058085447066, 'samples': 481104, 'steps': 10022, 'loss/train': 1.809062123298645} +07/25/2024 12:12:38 - INFO - __main__ - Step 10024: {'lr': 0.0004952047832148338, 'samples': 481152, 'steps': 10023, 'loss/train': 1.85368013381958} +07/25/2024 12:12:38 - INFO - __main__ - Step 10025: {'lr': 0.0004952037577763911, 'samples': 481200, 'steps': 10024, 'loss/train': 1.7223345041275024} +07/25/2024 12:12:39 - INFO - __main__ - Step 10026: {'lr': 0.000495202732229379, 'samples': 481248, 'steps': 10025, 'loss/train': 2.1482694149017334} +07/25/2024 12:12:39 - INFO - __main__ - Step 10027: {'lr': 0.0004952017065737981, 'samples': 481296, 'steps': 10026, 'loss/train': 2.0023908615112305} +07/25/2024 12:12:39 - INFO - __main__ - Step 10028: {'lr': 0.0004952006808096487, 'samples': 481344, 'steps': 10027, 'loss/train': 2.012871026992798} +07/25/2024 12:12:40 - INFO - __main__ - Step 10029: {'lr': 0.0004951996549369313, 'samples': 481392, 'steps': 10028, 'loss/train': 2.2119076251983643} +07/25/2024 12:12:40 - INFO - __main__ - Step 10030: {'lr': 0.0004951986289556464, 'samples': 481440, 'steps': 10029, 'loss/train': 1.4951738119125366} +07/25/2024 12:12:40 - INFO - __main__ - Step 10031: {'lr': 0.0004951976028657945, 'samples': 481488, 'steps': 10030, 'loss/train': 2.0640292167663574} +07/25/2024 12:12:40 - INFO - __main__ - Step 10032: {'lr': 0.0004951965766673758, 'samples': 481536, 'steps': 10031, 'loss/train': 1.9897533655166626} +07/25/2024 12:12:41 - INFO - __main__ - Step 10033: {'lr': 0.000495195550360391, 'samples': 481584, 'steps': 10032, 'loss/train': 2.1226859092712402} +07/25/2024 12:12:41 - INFO - __main__ - Step 10034: {'lr': 0.0004951945239448404, 'samples': 481632, 'steps': 10033, 'loss/train': 2.393578052520752} +07/25/2024 12:12:41 - INFO - __main__ - Step 10035: {'lr': 0.0004951934974207246, 'samples': 481680, 'steps': 10034, 'loss/train': 0.8310956358909607} +07/25/2024 12:12:42 - INFO - __main__ - Step 10036: {'lr': 0.0004951924707880441, 'samples': 481728, 'steps': 10035, 'loss/train': 0.4129942059516907} +07/25/2024 12:12:42 - INFO - __main__ - Step 10037: {'lr': 0.0004951914440467991, 'samples': 481776, 'steps': 10036, 'loss/train': 2.6737215518951416} +07/25/2024 12:12:42 - INFO - __main__ - Step 10038: {'lr': 0.0004951904171969902, 'samples': 481824, 'steps': 10037, 'loss/train': 1.9594014883041382} +07/25/2024 12:12:42 - INFO - __main__ - Step 10039: {'lr': 0.0004951893902386179, 'samples': 481872, 'steps': 10038, 'loss/train': 1.9962620735168457} +07/25/2024 12:12:43 - INFO - __main__ - Step 10040: {'lr': 0.0004951883631716826, 'samples': 481920, 'steps': 10039, 'loss/train': 2.0280346870422363} +07/25/2024 12:12:43 - INFO - __main__ - Step 10041: {'lr': 0.0004951873359961848, 'samples': 481968, 'steps': 10040, 'loss/train': 2.1441707611083984} +07/25/2024 12:12:43 - INFO - __main__ - Step 10042: {'lr': 0.0004951863087121249, 'samples': 482016, 'steps': 10041, 'loss/train': 1.375882863998413} +07/25/2024 12:12:44 - INFO - __main__ - Step 10043: {'lr': 0.0004951852813195034, 'samples': 482064, 'steps': 10042, 'loss/train': 0.719922661781311} +07/25/2024 12:12:44 - INFO - __main__ - Step 10044: {'lr': 0.0004951842538183207, 'samples': 482112, 'steps': 10043, 'loss/train': 1.4628551006317139} +07/25/2024 12:12:44 - INFO - __main__ - Step 10045: {'lr': 0.0004951832262085772, 'samples': 482160, 'steps': 10044, 'loss/train': 0.9779746532440186} +07/25/2024 12:12:44 - INFO - __main__ - Step 10046: {'lr': 0.0004951821984902735, 'samples': 482208, 'steps': 10045, 'loss/train': 2.036726951599121} +07/25/2024 12:12:45 - INFO - __main__ - Step 10047: {'lr': 0.00049518117066341, 'samples': 482256, 'steps': 10046, 'loss/train': 1.3005733489990234} +07/25/2024 12:12:45 - INFO - __main__ - Step 10048: {'lr': 0.0004951801427279872, 'samples': 482304, 'steps': 10047, 'loss/train': 2.0568699836730957} +07/25/2024 12:12:45 - INFO - __main__ - Step 10049: {'lr': 0.0004951791146840053, 'samples': 482352, 'steps': 10048, 'loss/train': 1.64546537399292} +07/25/2024 12:12:46 - INFO - __main__ - Step 10050: {'lr': 0.0004951780865314652, 'samples': 482400, 'steps': 10049, 'loss/train': 1.9279978275299072} +07/25/2024 12:12:46 - INFO - __main__ - Step 10051: {'lr': 0.000495177058270367, 'samples': 482448, 'steps': 10050, 'loss/train': 2.0310022830963135} +07/25/2024 12:12:46 - INFO - __main__ - Step 10052: {'lr': 0.0004951760299007113, 'samples': 482496, 'steps': 10051, 'loss/train': 1.7644877433776855} +07/25/2024 12:12:46 - INFO - __main__ - Step 10053: {'lr': 0.0004951750014224984, 'samples': 482544, 'steps': 10052, 'loss/train': 2.129365921020508} +07/25/2024 12:12:47 - INFO - __main__ - Step 10054: {'lr': 0.0004951739728357291, 'samples': 482592, 'steps': 10053, 'loss/train': 1.6628711223602295} +07/25/2024 12:12:47 - INFO - __main__ - Step 10055: {'lr': 0.0004951729441404035, 'samples': 482640, 'steps': 10054, 'loss/train': 1.7141081094741821} +07/25/2024 12:12:47 - INFO - __main__ - Step 10056: {'lr': 0.0004951719153365223, 'samples': 482688, 'steps': 10055, 'loss/train': 2.3047313690185547} +07/25/2024 12:12:48 - INFO - __main__ - Step 10057: {'lr': 0.0004951708864240857, 'samples': 482736, 'steps': 10056, 'loss/train': 1.5251661539077759} +07/25/2024 12:12:48 - INFO - __main__ - Step 10058: {'lr': 0.0004951698574030944, 'samples': 482784, 'steps': 10057, 'loss/train': 1.9092128276824951} +07/25/2024 12:12:48 - DEBUG - datasets.packaged_modules.json.json - Batch of 10822774 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 12:12:48 - INFO - __main__ - Step 10059: {'lr': 0.0004951688282735487, 'samples': 482832, 'steps': 10058, 'loss/train': 2.1026053428649902} +07/25/2024 12:12:48 - INFO - __main__ - Step 10060: {'lr': 0.0004951677990354492, 'samples': 482880, 'steps': 10059, 'loss/train': 1.3317492008209229} +07/25/2024 12:12:49 - INFO - __main__ - Step 10061: {'lr': 0.0004951667696887961, 'samples': 482928, 'steps': 10060, 'loss/train': 2.6111981868743896} +07/25/2024 12:12:49 - INFO - __main__ - Step 10062: {'lr': 0.0004951657402335901, 'samples': 482976, 'steps': 10061, 'loss/train': 1.8560749292373657} +07/25/2024 12:12:49 - INFO - __main__ - Step 10063: {'lr': 0.0004951647106698316, 'samples': 483024, 'steps': 10062, 'loss/train': 1.8503847122192383} +07/25/2024 12:12:50 - INFO - __main__ - Step 10064: {'lr': 0.000495163680997521, 'samples': 483072, 'steps': 10063, 'loss/train': 2.2120766639709473} +07/25/2024 12:12:50 - INFO - __main__ - Step 10065: {'lr': 0.0004951626512166589, 'samples': 483120, 'steps': 10064, 'loss/train': 1.8912301063537598} +07/25/2024 12:12:50 - INFO - __main__ - Step 10066: {'lr': 0.0004951616213272456, 'samples': 483168, 'steps': 10065, 'loss/train': 1.2149277925491333} +07/25/2024 12:12:50 - INFO - __main__ - Step 10067: {'lr': 0.0004951605913292816, 'samples': 483216, 'steps': 10066, 'loss/train': 1.6248644590377808} +07/25/2024 12:12:51 - INFO - __main__ - Step 10068: {'lr': 0.0004951595612227673, 'samples': 483264, 'steps': 10067, 'loss/train': 1.3897093534469604} +07/25/2024 12:12:51 - INFO - __main__ - Step 10069: {'lr': 0.0004951585310077033, 'samples': 483312, 'steps': 10068, 'loss/train': 1.9830946922302246} +07/25/2024 12:12:51 - INFO - __main__ - Step 10070: {'lr': 0.0004951575006840899, 'samples': 483360, 'steps': 10069, 'loss/train': 2.202261209487915} +07/25/2024 12:12:52 - INFO - __main__ - Step 10071: {'lr': 0.0004951564702519276, 'samples': 483408, 'steps': 10070, 'loss/train': 1.605761170387268} +07/25/2024 12:12:52 - INFO - __main__ - Step 10072: {'lr': 0.0004951554397112169, 'samples': 483456, 'steps': 10071, 'loss/train': 2.4219110012054443} +07/25/2024 12:12:52 - INFO - __main__ - Step 10073: {'lr': 0.0004951544090619584, 'samples': 483504, 'steps': 10072, 'loss/train': 1.4318435192108154} +07/25/2024 12:12:52 - INFO - __main__ - Step 10074: {'lr': 0.0004951533783041522, 'samples': 483552, 'steps': 10073, 'loss/train': 2.0986275672912598} +07/25/2024 12:12:53 - INFO - __main__ - Step 10075: {'lr': 0.000495152347437799, 'samples': 483600, 'steps': 10074, 'loss/train': 1.837843418121338} +07/25/2024 12:12:53 - INFO - __main__ - Step 10076: {'lr': 0.0004951513164628993, 'samples': 483648, 'steps': 10075, 'loss/train': 1.9687161445617676} +07/25/2024 12:12:53 - INFO - __main__ - Step 10077: {'lr': 0.0004951502853794534, 'samples': 483696, 'steps': 10076, 'loss/train': 2.0557851791381836} +07/25/2024 12:12:54 - INFO - __main__ - Step 10078: {'lr': 0.0004951492541874618, 'samples': 483744, 'steps': 10077, 'loss/train': 1.840969204902649} +07/25/2024 12:12:54 - INFO - __main__ - Step 10079: {'lr': 0.0004951482228869251, 'samples': 483792, 'steps': 10078, 'loss/train': 2.2743334770202637} +07/25/2024 12:12:54 - INFO - __main__ - Step 10080: {'lr': 0.0004951471914778435, 'samples': 483840, 'steps': 10079, 'loss/train': 2.166361093521118} +07/25/2024 12:12:54 - INFO - __main__ - Step 10081: {'lr': 0.0004951461599602176, 'samples': 483888, 'steps': 10080, 'loss/train': 2.0687685012817383} +07/25/2024 12:12:55 - INFO - __main__ - Step 10082: {'lr': 0.0004951451283340479, 'samples': 483936, 'steps': 10081, 'loss/train': 2.083980083465576} +07/25/2024 12:12:55 - INFO - __main__ - Step 10083: {'lr': 0.0004951440965993347, 'samples': 483984, 'steps': 10082, 'loss/train': 1.7496776580810547} +07/25/2024 12:12:55 - INFO - __main__ - Step 10084: {'lr': 0.0004951430647560787, 'samples': 484032, 'steps': 10083, 'loss/train': 2.676994562149048} +07/25/2024 12:12:56 - INFO - __main__ - Step 10085: {'lr': 0.0004951420328042802, 'samples': 484080, 'steps': 10084, 'loss/train': 2.758410930633545} +07/25/2024 12:12:56 - INFO - __main__ - Step 10086: {'lr': 0.0004951410007439397, 'samples': 484128, 'steps': 10085, 'loss/train': 1.822609782218933} +07/25/2024 12:12:56 - INFO - __main__ - Step 10087: {'lr': 0.0004951399685750575, 'samples': 484176, 'steps': 10086, 'loss/train': 2.2716798782348633} +07/25/2024 12:12:56 - INFO - __main__ - Step 10088: {'lr': 0.0004951389362976343, 'samples': 484224, 'steps': 10087, 'loss/train': 1.9719583988189697} +07/25/2024 12:12:57 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488985 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 12:12:57 - INFO - __main__ - Step 10089: {'lr': 0.0004951379039116704, 'samples': 484272, 'steps': 10088, 'loss/train': 1.8740899562835693} +07/25/2024 12:12:57 - INFO - __main__ - Step 10090: {'lr': 0.0004951368714171664, 'samples': 484320, 'steps': 10089, 'loss/train': 1.0911246538162231} +07/25/2024 12:12:57 - INFO - __main__ - Step 10091: {'lr': 0.0004951358388141225, 'samples': 484368, 'steps': 10090, 'loss/train': 1.2332940101623535} +07/25/2024 12:12:58 - INFO - __main__ - Step 10092: {'lr': 0.0004951348061025395, 'samples': 484416, 'steps': 10091, 'loss/train': 1.2929449081420898} +07/25/2024 12:12:58 - INFO - __main__ - Step 10093: {'lr': 0.0004951337732824175, 'samples': 484464, 'steps': 10092, 'loss/train': 1.8549193143844604} +07/25/2024 12:12:58 - INFO - __main__ - Step 10094: {'lr': 0.0004951327403537573, 'samples': 484512, 'steps': 10093, 'loss/train': 2.2040324211120605} +07/25/2024 12:12:58 - INFO - __main__ - Step 10095: {'lr': 0.0004951317073165591, 'samples': 484560, 'steps': 10094, 'loss/train': 1.7702876329421997} +07/25/2024 12:12:59 - INFO - __main__ - Step 10096: {'lr': 0.0004951306741708234, 'samples': 484608, 'steps': 10095, 'loss/train': 2.032376289367676} +07/25/2024 12:12:59 - INFO - __main__ - Step 10097: {'lr': 0.0004951296409165508, 'samples': 484656, 'steps': 10096, 'loss/train': 1.3117525577545166} +07/25/2024 12:12:59 - INFO - __main__ - Step 10098: {'lr': 0.0004951286075537416, 'samples': 484704, 'steps': 10097, 'loss/train': 1.4728327989578247} +07/25/2024 12:13:00 - INFO - __main__ - Step 10099: {'lr': 0.0004951275740823964, 'samples': 484752, 'steps': 10098, 'loss/train': 1.7730780839920044} +07/25/2024 12:13:00 - INFO - __main__ - Step 10100: {'lr': 0.0004951265405025155, 'samples': 484800, 'steps': 10099, 'loss/train': 1.7772427797317505} +07/25/2024 12:13:00 - INFO - __main__ - Step 10101: {'lr': 0.0004951255068140995, 'samples': 484848, 'steps': 10100, 'loss/train': 2.0279455184936523} +07/25/2024 12:13:00 - INFO - __main__ - Step 10102: {'lr': 0.0004951244730171487, 'samples': 484896, 'steps': 10101, 'loss/train': 2.0059916973114014} +07/25/2024 12:13:01 - INFO - __main__ - Step 10103: {'lr': 0.0004951234391116637, 'samples': 484944, 'steps': 10102, 'loss/train': 2.3689918518066406} +07/25/2024 12:13:01 - INFO - __main__ - Step 10104: {'lr': 0.000495122405097645, 'samples': 484992, 'steps': 10103, 'loss/train': 1.9063626527786255} +07/25/2024 12:13:01 - INFO - __main__ - Step 10105: {'lr': 0.0004951213709750928, 'samples': 485040, 'steps': 10104, 'loss/train': 1.7327375411987305} +07/25/2024 12:13:02 - INFO - __main__ - Step 10106: {'lr': 0.0004951203367440078, 'samples': 485088, 'steps': 10105, 'loss/train': 1.9188536405563354} +07/25/2024 12:13:02 - INFO - __main__ - Step 10107: {'lr': 0.0004951193024043904, 'samples': 485136, 'steps': 10106, 'loss/train': 2.2457079887390137} +07/25/2024 12:13:02 - INFO - __main__ - Step 10108: {'lr': 0.0004951182679562411, 'samples': 485184, 'steps': 10107, 'loss/train': 1.8964015245437622} +07/25/2024 12:13:02 - INFO - __main__ - Step 10109: {'lr': 0.0004951172333995602, 'samples': 485232, 'steps': 10108, 'loss/train': 1.992087721824646} +07/25/2024 12:13:03 - INFO - __main__ - Step 10110: {'lr': 0.0004951161987343483, 'samples': 485280, 'steps': 10109, 'loss/train': 2.0544540882110596} +07/25/2024 12:13:03 - INFO - __main__ - Step 10111: {'lr': 0.0004951151639606058, 'samples': 485328, 'steps': 10110, 'loss/train': 2.059070348739624} +07/25/2024 12:13:03 - INFO - __main__ - Step 10112: {'lr': 0.0004951141290783331, 'samples': 485376, 'steps': 10111, 'loss/train': 2.2056450843811035} +07/25/2024 12:13:04 - INFO - __main__ - Step 10113: {'lr': 0.0004951130940875309, 'samples': 485424, 'steps': 10112, 'loss/train': 1.7313543558120728} +07/25/2024 12:13:04 - INFO - __main__ - Step 10114: {'lr': 0.0004951120589881993, 'samples': 485472, 'steps': 10113, 'loss/train': 0.9148439168930054} +07/25/2024 12:13:04 - INFO - __main__ - Step 10115: {'lr': 0.0004951110237803391, 'samples': 485520, 'steps': 10114, 'loss/train': 2.117408037185669} +07/25/2024 12:13:04 - INFO - __main__ - Step 10116: {'lr': 0.0004951099884639506, 'samples': 485568, 'steps': 10115, 'loss/train': 1.203749179840088} +07/25/2024 12:13:05 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488112 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 12:13:05 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488112 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 12:13:05 - INFO - __main__ - Step 10117: {'lr': 0.0004951089530390342, 'samples': 485616, 'steps': 10116, 'loss/train': 1.6685104370117188} +07/25/2024 12:13:05 - INFO - __main__ - Step 10118: {'lr': 0.0004951079175055904, 'samples': 485664, 'steps': 10117, 'loss/train': 1.945478081703186} +07/25/2024 12:13:05 - INFO - __main__ - Step 10119: {'lr': 0.0004951068818636197, 'samples': 485712, 'steps': 10118, 'loss/train': 2.1844491958618164} +07/25/2024 12:13:05 - INFO - __main__ - Step 10120: {'lr': 0.0004951058461131226, 'samples': 485760, 'steps': 10119, 'loss/train': 1.9408104419708252} +07/25/2024 12:13:06 - INFO - __main__ - Step 10121: {'lr': 0.0004951048102540995, 'samples': 485808, 'steps': 10120, 'loss/train': 1.3731870651245117} +07/25/2024 12:13:06 - INFO - __main__ - Step 10122: {'lr': 0.0004951037742865509, 'samples': 485856, 'steps': 10121, 'loss/train': 1.4589117765426636} +07/25/2024 12:13:06 - INFO - __main__ - Step 10123: {'lr': 0.0004951027382104771, 'samples': 485904, 'steps': 10122, 'loss/train': 1.3719254732131958} +07/25/2024 12:13:07 - INFO - __main__ - Step 10124: {'lr': 0.0004951017020258787, 'samples': 485952, 'steps': 10123, 'loss/train': 2.091651201248169} +07/25/2024 12:13:07 - INFO - __main__ - Step 10125: {'lr': 0.0004951006657327561, 'samples': 486000, 'steps': 10124, 'loss/train': 1.533692717552185} +07/25/2024 12:13:07 - INFO - __main__ - Step 10126: {'lr': 0.0004950996293311098, 'samples': 486048, 'steps': 10125, 'loss/train': 1.6621716022491455} +07/25/2024 12:13:07 - INFO - __main__ - Step 10127: {'lr': 0.0004950985928209404, 'samples': 486096, 'steps': 10126, 'loss/train': 1.3005448579788208} +07/25/2024 12:13:08 - INFO - __main__ - Step 10128: {'lr': 0.000495097556202248, 'samples': 486144, 'steps': 10127, 'loss/train': 1.8799381256103516} +07/25/2024 12:13:08 - INFO - __main__ - Step 10129: {'lr': 0.0004950965194750334, 'samples': 486192, 'steps': 10128, 'loss/train': 2.0835084915161133} +07/25/2024 12:13:08 - INFO - __main__ - Step 10130: {'lr': 0.0004950954826392969, 'samples': 486240, 'steps': 10129, 'loss/train': 2.0020768642425537} +07/25/2024 12:13:09 - INFO - __main__ - Step 10131: {'lr': 0.000495094445695039, 'samples': 486288, 'steps': 10130, 'loss/train': 1.856764793395996} +07/25/2024 12:13:09 - INFO - __main__ - Step 10132: {'lr': 0.0004950934086422602, 'samples': 486336, 'steps': 10131, 'loss/train': 1.9687371253967285} +07/25/2024 12:13:09 - INFO - __main__ - Step 10133: {'lr': 0.0004950923714809608, 'samples': 486384, 'steps': 10132, 'loss/train': 1.3785699605941772} +07/25/2024 12:13:09 - INFO - __main__ - Step 10134: {'lr': 0.0004950913342111414, 'samples': 486432, 'steps': 10133, 'loss/train': 1.8432390689849854} +07/25/2024 12:13:10 - INFO - __main__ - Step 10135: {'lr': 0.0004950902968328025, 'samples': 486480, 'steps': 10134, 'loss/train': 1.9127799272537231} +07/25/2024 12:13:10 - INFO - __main__ - Step 10136: {'lr': 0.0004950892593459444, 'samples': 486528, 'steps': 10135, 'loss/train': 2.1327710151672363} +07/25/2024 12:13:10 - INFO - __main__ - Step 10137: {'lr': 0.0004950882217505676, 'samples': 486576, 'steps': 10136, 'loss/train': 1.6419970989227295} +07/25/2024 12:13:11 - INFO - __main__ - Step 10138: {'lr': 0.0004950871840466727, 'samples': 486624, 'steps': 10137, 'loss/train': 1.913083553314209} +07/25/2024 12:13:11 - INFO - __main__ - Step 10139: {'lr': 0.00049508614623426, 'samples': 486672, 'steps': 10138, 'loss/train': 2.0105631351470947} +07/25/2024 12:13:11 - INFO - __main__ - Step 10140: {'lr': 0.0004950851083133301, 'samples': 486720, 'steps': 10139, 'loss/train': 1.3670367002487183} +07/25/2024 12:13:11 - INFO - __main__ - Step 10141: {'lr': 0.0004950840702838833, 'samples': 486768, 'steps': 10140, 'loss/train': 1.5532383918762207} +07/25/2024 12:13:12 - INFO - __main__ - Step 10142: {'lr': 0.0004950830321459202, 'samples': 486816, 'steps': 10141, 'loss/train': 1.7865573167800903} +07/25/2024 12:13:12 - INFO - __main__ - Step 10143: {'lr': 0.0004950819938994411, 'samples': 486864, 'steps': 10142, 'loss/train': 1.4481122493743896} +07/25/2024 12:13:12 - INFO - __main__ - Step 10144: {'lr': 0.0004950809555444467, 'samples': 486912, 'steps': 10143, 'loss/train': 1.9317071437835693} +07/25/2024 12:13:13 - INFO - __main__ - Step 10145: {'lr': 0.0004950799170809373, 'samples': 486960, 'steps': 10144, 'loss/train': 1.474518895149231} +07/25/2024 12:13:13 - INFO - __main__ - Step 10146: {'lr': 0.0004950788785089132, 'samples': 487008, 'steps': 10145, 'loss/train': 0.8735317587852478} +07/25/2024 12:13:13 - INFO - __main__ - Step 10147: {'lr': 0.0004950778398283752, 'samples': 487056, 'steps': 10146, 'loss/train': 1.6017383337020874} +07/25/2024 12:13:13 - INFO - __main__ - Step 10148: {'lr': 0.0004950768010393236, 'samples': 487104, 'steps': 10147, 'loss/train': 1.3185206651687622} +07/25/2024 12:13:14 - INFO - __main__ - Step 10149: {'lr': 0.0004950757621417588, 'samples': 487152, 'steps': 10148, 'loss/train': 2.2281582355499268} +07/25/2024 12:13:14 - INFO - __main__ - Step 10150: {'lr': 0.0004950747231356814, 'samples': 487200, 'steps': 10149, 'loss/train': 1.8337432146072388} +07/25/2024 12:13:14 - INFO - __main__ - Step 10151: {'lr': 0.0004950736840210917, 'samples': 487248, 'steps': 10150, 'loss/train': 0.4888598620891571} +07/25/2024 12:13:15 - INFO - __main__ - Step 10152: {'lr': 0.0004950726447979902, 'samples': 487296, 'steps': 10151, 'loss/train': 2.796433448791504} +07/25/2024 12:13:15 - INFO - __main__ - Step 10153: {'lr': 0.0004950716054663775, 'samples': 487344, 'steps': 10152, 'loss/train': 1.7042324542999268} +07/25/2024 12:13:15 - INFO - __main__ - Step 10154: {'lr': 0.000495070566026254, 'samples': 487392, 'steps': 10153, 'loss/train': 1.735171914100647} +07/25/2024 12:13:15 - INFO - __main__ - Step 10155: {'lr': 0.00049506952647762, 'samples': 487440, 'steps': 10154, 'loss/train': 1.7454164028167725} +07/25/2024 12:13:16 - INFO - __main__ - Step 10156: {'lr': 0.0004950684868204762, 'samples': 487488, 'steps': 10155, 'loss/train': 1.8911224603652954} +07/25/2024 12:13:16 - INFO - __main__ - Step 10157: {'lr': 0.0004950674470548227, 'samples': 487536, 'steps': 10156, 'loss/train': 1.922635555267334} +07/25/2024 12:13:16 - INFO - __main__ - Step 10158: {'lr': 0.0004950664071806605, 'samples': 487584, 'steps': 10157, 'loss/train': 1.4584813117980957} +07/25/2024 12:13:17 - INFO - __main__ - Step 10159: {'lr': 0.0004950653671979895, 'samples': 487632, 'steps': 10158, 'loss/train': 2.410616874694824} +07/25/2024 12:13:17 - INFO - __main__ - Step 10160: {'lr': 0.0004950643271068106, 'samples': 487680, 'steps': 10159, 'loss/train': 2.079800605773926} +07/25/2024 12:13:17 - INFO - __main__ - Step 10161: {'lr': 0.000495063286907124, 'samples': 487728, 'steps': 10160, 'loss/train': 1.8133419752120972} +07/25/2024 12:13:17 - INFO - __main__ - Step 10162: {'lr': 0.0004950622465989303, 'samples': 487776, 'steps': 10161, 'loss/train': 1.9649943113327026} +07/25/2024 12:13:18 - INFO - __main__ - Step 10163: {'lr': 0.0004950612061822299, 'samples': 487824, 'steps': 10162, 'loss/train': 1.98556649684906} +07/25/2024 12:13:18 - INFO - __main__ - Step 10164: {'lr': 0.0004950601656570233, 'samples': 487872, 'steps': 10163, 'loss/train': 1.7526482343673706} +07/25/2024 12:13:18 - INFO - __main__ - Step 10165: {'lr': 0.0004950591250233109, 'samples': 487920, 'steps': 10164, 'loss/train': 1.2727690935134888} +07/25/2024 12:13:19 - INFO - __main__ - Step 10166: {'lr': 0.0004950580842810931, 'samples': 487968, 'steps': 10165, 'loss/train': 2.049464225769043} +07/25/2024 12:13:19 - INFO - __main__ - Step 10167: {'lr': 0.0004950570434303705, 'samples': 488016, 'steps': 10166, 'loss/train': 1.24921715259552} +07/25/2024 12:13:19 - INFO - __main__ - Step 10168: {'lr': 0.0004950560024711435, 'samples': 488064, 'steps': 10167, 'loss/train': 2.239567279815674} +07/25/2024 12:13:19 - INFO - __main__ - Step 10169: {'lr': 0.0004950549614034126, 'samples': 488112, 'steps': 10168, 'loss/train': 1.6602991819381714} +07/25/2024 12:13:20 - INFO - __main__ - Step 10170: {'lr': 0.0004950539202271782, 'samples': 488160, 'steps': 10169, 'loss/train': 2.559614658355713} +07/25/2024 12:13:20 - INFO - __main__ - Step 10171: {'lr': 0.0004950528789424408, 'samples': 488208, 'steps': 10170, 'loss/train': 2.1892545223236084} +07/25/2024 12:13:20 - INFO - __main__ - Step 10172: {'lr': 0.0004950518375492008, 'samples': 488256, 'steps': 10171, 'loss/train': 1.9254426956176758} +07/25/2024 12:13:21 - INFO - __main__ - Step 10173: {'lr': 0.0004950507960474589, 'samples': 488304, 'steps': 10172, 'loss/train': 2.1343889236450195} +07/25/2024 12:13:21 - INFO - __main__ - Step 10174: {'lr': 0.0004950497544372151, 'samples': 488352, 'steps': 10173, 'loss/train': 2.499945640563965} +07/25/2024 12:13:21 - INFO - __main__ - Step 10175: {'lr': 0.0004950487127184703, 'samples': 488400, 'steps': 10174, 'loss/train': 1.5951203107833862} +07/25/2024 12:13:21 - INFO - __main__ - Step 10176: {'lr': 0.0004950476708912248, 'samples': 488448, 'steps': 10175, 'loss/train': 1.7268074750900269} +07/25/2024 12:13:22 - INFO - __main__ - Step 10177: {'lr': 0.000495046628955479, 'samples': 488496, 'steps': 10176, 'loss/train': 1.855013132095337} +07/25/2024 12:13:22 - INFO - __main__ - Step 10178: {'lr': 0.0004950455869112334, 'samples': 488544, 'steps': 10177, 'loss/train': 1.8310542106628418} +07/25/2024 12:13:22 - INFO - __main__ - Step 10179: {'lr': 0.0004950445447584886, 'samples': 488592, 'steps': 10178, 'loss/train': 1.845147728919983} +07/25/2024 12:13:23 - INFO - __main__ - Step 10180: {'lr': 0.0004950435024972448, 'samples': 488640, 'steps': 10179, 'loss/train': 1.901201605796814} +07/25/2024 12:13:23 - INFO - __main__ - Step 10181: {'lr': 0.0004950424601275027, 'samples': 488688, 'steps': 10180, 'loss/train': 2.2098419666290283} +07/25/2024 12:13:23 - INFO - __main__ - Step 10182: {'lr': 0.0004950414176492627, 'samples': 488736, 'steps': 10181, 'loss/train': 2.1229286193847656} +07/25/2024 12:13:23 - INFO - __main__ - Step 10183: {'lr': 0.0004950403750625252, 'samples': 488784, 'steps': 10182, 'loss/train': 2.2555696964263916} +07/25/2024 12:13:24 - INFO - __main__ - Step 10184: {'lr': 0.0004950393323672906, 'samples': 488832, 'steps': 10183, 'loss/train': 2.2548139095306396} +07/25/2024 12:13:24 - INFO - __main__ - Step 10185: {'lr': 0.0004950382895635595, 'samples': 488880, 'steps': 10184, 'loss/train': 3.4668357372283936} +07/25/2024 12:13:24 - INFO - __main__ - Step 10186: {'lr': 0.0004950372466513323, 'samples': 488928, 'steps': 10185, 'loss/train': 1.6000300645828247} +07/25/2024 12:13:25 - INFO - __main__ - Step 10187: {'lr': 0.0004950362036306095, 'samples': 488976, 'steps': 10186, 'loss/train': 1.7554869651794434} +07/25/2024 12:13:25 - INFO - __main__ - Step 10188: {'lr': 0.0004950351605013915, 'samples': 489024, 'steps': 10187, 'loss/train': 1.9417545795440674} +07/25/2024 12:13:25 - INFO - __main__ - Step 10189: {'lr': 0.0004950341172636788, 'samples': 489072, 'steps': 10188, 'loss/train': 1.82001531124115} +07/25/2024 12:13:25 - INFO - __main__ - Step 10190: {'lr': 0.0004950330739174718, 'samples': 489120, 'steps': 10189, 'loss/train': 1.913621187210083} +07/25/2024 12:13:26 - INFO - __main__ - Step 10191: {'lr': 0.0004950320304627712, 'samples': 489168, 'steps': 10190, 'loss/train': 1.4347059726715088} +07/25/2024 12:13:26 - INFO - __main__ - Step 10192: {'lr': 0.0004950309868995772, 'samples': 489216, 'steps': 10191, 'loss/train': 1.7561464309692383} +07/25/2024 12:13:26 - INFO - __main__ - Step 10193: {'lr': 0.0004950299432278902, 'samples': 489264, 'steps': 10192, 'loss/train': 1.2649660110473633} +07/25/2024 12:13:26 - INFO - __main__ - Step 10194: {'lr': 0.000495028899447711, 'samples': 489312, 'steps': 10193, 'loss/train': 1.7129229307174683} +07/25/2024 12:13:27 - INFO - __main__ - Step 10195: {'lr': 0.0004950278555590398, 'samples': 489360, 'steps': 10194, 'loss/train': 2.4640471935272217} +07/25/2024 12:13:27 - INFO - __main__ - Step 10196: {'lr': 0.0004950268115618771, 'samples': 489408, 'steps': 10195, 'loss/train': 2.4344687461853027} +07/25/2024 12:13:27 - INFO - __main__ - Step 10197: {'lr': 0.0004950257674562234, 'samples': 489456, 'steps': 10196, 'loss/train': 1.9808506965637207} +07/25/2024 12:13:28 - INFO - __main__ - Step 10198: {'lr': 0.0004950247232420792, 'samples': 489504, 'steps': 10197, 'loss/train': 1.683869481086731} +07/25/2024 12:13:28 - INFO - __main__ - Step 10199: {'lr': 0.0004950236789194449, 'samples': 489552, 'steps': 10198, 'loss/train': 1.9267336130142212} +07/25/2024 12:13:28 - INFO - __main__ - Step 10200: {'lr': 0.000495022634488321, 'samples': 489600, 'steps': 10199, 'loss/train': 1.4546953439712524} +07/25/2024 12:13:28 - INFO - __main__ - Step 10201: {'lr': 0.0004950215899487078, 'samples': 489648, 'steps': 10200, 'loss/train': 2.08762788772583} +07/25/2024 12:13:29 - INFO - __main__ - Step 10202: {'lr': 0.0004950205453006061, 'samples': 489696, 'steps': 10201, 'loss/train': 1.7633748054504395} +07/25/2024 12:13:29 - INFO - __main__ - Step 10203: {'lr': 0.0004950195005440161, 'samples': 489744, 'steps': 10202, 'loss/train': 1.755348801612854} +07/25/2024 12:13:29 - INFO - __main__ - Step 10204: {'lr': 0.0004950184556789383, 'samples': 489792, 'steps': 10203, 'loss/train': 2.0543372631073} +07/25/2024 12:13:30 - INFO - __main__ - Step 10205: {'lr': 0.0004950174107053733, 'samples': 489840, 'steps': 10204, 'loss/train': 2.0311851501464844} +07/25/2024 12:13:30 - INFO - __main__ - Step 10206: {'lr': 0.0004950163656233213, 'samples': 489888, 'steps': 10205, 'loss/train': 2.087566614151001} +07/25/2024 12:13:30 - INFO - __main__ - Step 10207: {'lr': 0.000495015320432783, 'samples': 489936, 'steps': 10206, 'loss/train': 1.897243618965149} +07/25/2024 12:13:30 - INFO - __main__ - Step 10208: {'lr': 0.0004950142751337588, 'samples': 489984, 'steps': 10207, 'loss/train': 1.8422861099243164} +07/25/2024 12:13:31 - INFO - __main__ - Step 10209: {'lr': 0.0004950132297262491, 'samples': 490032, 'steps': 10208, 'loss/train': 3.0778656005859375} +07/25/2024 12:13:31 - INFO - __main__ - Step 10210: {'lr': 0.0004950121842102544, 'samples': 490080, 'steps': 10209, 'loss/train': 1.6372578144073486} +07/25/2024 12:13:31 - INFO - __main__ - Step 10211: {'lr': 0.0004950111385857752, 'samples': 490128, 'steps': 10210, 'loss/train': 2.236417770385742} +07/25/2024 12:13:32 - INFO - __main__ - Step 10212: {'lr': 0.000495010092852812, 'samples': 490176, 'steps': 10211, 'loss/train': 2.271071434020996} +07/25/2024 12:13:32 - INFO - __main__ - Step 10213: {'lr': 0.0004950090470113651, 'samples': 490224, 'steps': 10212, 'loss/train': 1.7958112955093384} +07/25/2024 12:13:32 - INFO - __main__ - Step 10214: {'lr': 0.000495008001061435, 'samples': 490272, 'steps': 10213, 'loss/train': 2.011956214904785} +07/25/2024 12:13:32 - INFO - __main__ - Step 10215: {'lr': 0.0004950069550030224, 'samples': 490320, 'steps': 10214, 'loss/train': 2.010291337966919} +07/25/2024 12:13:33 - INFO - __main__ - Step 10216: {'lr': 0.0004950059088361274, 'samples': 490368, 'steps': 10215, 'loss/train': 2.1971194744110107} +07/25/2024 12:13:33 - INFO - __main__ - Step 10217: {'lr': 0.0004950048625607507, 'samples': 490416, 'steps': 10216, 'loss/train': 1.2968107461929321} +07/25/2024 12:13:33 - INFO - __main__ - Step 10218: {'lr': 0.0004950038161768928, 'samples': 490464, 'steps': 10217, 'loss/train': 2.027684450149536} +07/25/2024 12:13:34 - INFO - __main__ - Step 10219: {'lr': 0.0004950027696845541, 'samples': 490512, 'steps': 10218, 'loss/train': 2.2362053394317627} +07/25/2024 12:13:34 - INFO - __main__ - Step 10220: {'lr': 0.0004950017230837349, 'samples': 490560, 'steps': 10219, 'loss/train': 1.7645142078399658} +07/25/2024 12:13:34 - INFO - __main__ - Step 10221: {'lr': 0.0004950006763744358, 'samples': 490608, 'steps': 10220, 'loss/train': 1.7735058069229126} +07/25/2024 12:13:34 - INFO - __main__ - Step 10222: {'lr': 0.0004949996295566574, 'samples': 490656, 'steps': 10221, 'loss/train': 1.938146948814392} +07/25/2024 12:13:35 - INFO - __main__ - Step 10223: {'lr': 0.0004949985826304, 'samples': 490704, 'steps': 10222, 'loss/train': 1.913940668106079} +07/25/2024 12:13:35 - INFO - __main__ - Step 10224: {'lr': 0.000494997535595664, 'samples': 490752, 'steps': 10223, 'loss/train': 1.3946588039398193} +07/25/2024 12:13:35 - INFO - __main__ - Step 10225: {'lr': 0.00049499648845245, 'samples': 490800, 'steps': 10224, 'loss/train': 2.5401992797851562} +07/25/2024 12:13:36 - INFO - __main__ - Step 10226: {'lr': 0.0004949954412007585, 'samples': 490848, 'steps': 10225, 'loss/train': 2.4085123538970947} +07/25/2024 12:13:36 - INFO - __main__ - Step 10227: {'lr': 0.0004949943938405897, 'samples': 490896, 'steps': 10226, 'loss/train': 2.440391778945923} +07/25/2024 12:13:36 - INFO - __main__ - Step 10228: {'lr': 0.0004949933463719444, 'samples': 490944, 'steps': 10227, 'loss/train': 2.0374724864959717} +07/25/2024 12:13:36 - INFO - __main__ - Step 10229: {'lr': 0.0004949922987948228, 'samples': 490992, 'steps': 10228, 'loss/train': 2.1050050258636475} +07/25/2024 12:13:37 - INFO - __main__ - Step 10230: {'lr': 0.0004949912511092256, 'samples': 491040, 'steps': 10229, 'loss/train': 2.1430702209472656} +07/25/2024 12:13:37 - INFO - __main__ - Step 10231: {'lr': 0.0004949902033151531, 'samples': 491088, 'steps': 10230, 'loss/train': 2.2101798057556152} +07/25/2024 12:13:37 - INFO - __main__ - Step 10232: {'lr': 0.0004949891554126058, 'samples': 491136, 'steps': 10231, 'loss/train': 2.1331841945648193} +07/25/2024 12:13:38 - INFO - __main__ - Step 10233: {'lr': 0.0004949881074015841, 'samples': 491184, 'steps': 10232, 'loss/train': 2.4573934078216553} +07/25/2024 12:13:38 - INFO - __main__ - Step 10234: {'lr': 0.0004949870592820886, 'samples': 491232, 'steps': 10233, 'loss/train': 1.9087601900100708} +07/25/2024 12:13:38 - INFO - __main__ - Step 10235: {'lr': 0.0004949860110541196, 'samples': 491280, 'steps': 10234, 'loss/train': 1.8626470565795898} +07/25/2024 12:13:38 - INFO - __main__ - Step 10236: {'lr': 0.0004949849627176778, 'samples': 491328, 'steps': 10235, 'loss/train': 4.163414478302002} +07/25/2024 12:13:39 - INFO - __main__ - Step 10237: {'lr': 0.0004949839142727635, 'samples': 491376, 'steps': 10236, 'loss/train': 0.7120918035507202} +07/25/2024 12:13:39 - INFO - __main__ - Step 10238: {'lr': 0.0004949828657193771, 'samples': 491424, 'steps': 10237, 'loss/train': 2.343797445297241} +07/25/2024 12:13:39 - INFO - __main__ - Step 10239: {'lr': 0.0004949818170575192, 'samples': 491472, 'steps': 10238, 'loss/train': 1.9939727783203125} +07/25/2024 12:13:40 - INFO - __main__ - Step 10240: {'lr': 0.0004949807682871902, 'samples': 491520, 'steps': 10239, 'loss/train': 2.0994198322296143} +07/25/2024 12:13:40 - INFO - __main__ - Step 10241: {'lr': 0.0004949797194083906, 'samples': 491568, 'steps': 10240, 'loss/train': 1.3970983028411865} +07/25/2024 12:13:40 - INFO - __main__ - Step 10242: {'lr': 0.0004949786704211208, 'samples': 491616, 'steps': 10241, 'loss/train': 2.209822177886963} +07/25/2024 12:13:40 - INFO - __main__ - Step 10243: {'lr': 0.0004949776213253814, 'samples': 491664, 'steps': 10242, 'loss/train': 2.0002450942993164} +07/25/2024 12:13:41 - INFO - __main__ - Step 10244: {'lr': 0.0004949765721211726, 'samples': 491712, 'steps': 10243, 'loss/train': 2.187415838241577} +07/25/2024 12:13:41 - INFO - __main__ - Step 10245: {'lr': 0.0004949755228084952, 'samples': 491760, 'steps': 10244, 'loss/train': 1.8680890798568726} +07/25/2024 12:13:41 - INFO - __main__ - Step 10246: {'lr': 0.0004949744733873494, 'samples': 491808, 'steps': 10245, 'loss/train': 1.8006807565689087} +07/25/2024 12:13:42 - INFO - __main__ - Step 10247: {'lr': 0.0004949734238577359, 'samples': 491856, 'steps': 10246, 'loss/train': 1.5538607835769653} +07/25/2024 12:13:42 - INFO - __main__ - Step 10248: {'lr': 0.0004949723742196549, 'samples': 491904, 'steps': 10247, 'loss/train': 1.97471022605896} +07/25/2024 12:13:42 - INFO - __main__ - Step 10249: {'lr': 0.0004949713244731071, 'samples': 491952, 'steps': 10248, 'loss/train': 1.9401581287384033} +07/25/2024 12:13:42 - INFO - __main__ - Step 10250: {'lr': 0.0004949702746180929, 'samples': 492000, 'steps': 10249, 'loss/train': 2.5404052734375} +07/25/2024 12:13:43 - INFO - __main__ - Step 10251: {'lr': 0.0004949692246546126, 'samples': 492048, 'steps': 10250, 'loss/train': 2.5148584842681885} +07/25/2024 12:13:43 - INFO - __main__ - Step 10252: {'lr': 0.0004949681745826668, 'samples': 492096, 'steps': 10251, 'loss/train': 2.1866278648376465} +07/25/2024 12:13:43 - INFO - __main__ - Step 10253: {'lr': 0.000494967124402256, 'samples': 492144, 'steps': 10252, 'loss/train': 2.183173656463623} +07/25/2024 12:13:44 - INFO - __main__ - Step 10254: {'lr': 0.0004949660741133806, 'samples': 492192, 'steps': 10253, 'loss/train': 2.2021324634552} +07/25/2024 12:13:44 - INFO - __main__ - Step 10255: {'lr': 0.0004949650237160412, 'samples': 492240, 'steps': 10254, 'loss/train': 2.8111419677734375} +07/25/2024 12:13:44 - INFO - __main__ - Step 10256: {'lr': 0.0004949639732102379, 'samples': 492288, 'steps': 10255, 'loss/train': 2.179029703140259} +07/25/2024 12:13:44 - INFO - __main__ - Step 10257: {'lr': 0.0004949629225959717, 'samples': 492336, 'steps': 10256, 'loss/train': 2.0953989028930664} +07/25/2024 12:13:45 - INFO - __main__ - Step 10258: {'lr': 0.0004949618718732426, 'samples': 492384, 'steps': 10257, 'loss/train': 2.1921727657318115} +07/25/2024 12:13:45 - INFO - __main__ - Step 10259: {'lr': 0.0004949608210420512, 'samples': 492432, 'steps': 10258, 'loss/train': 0.8130803108215332} +07/25/2024 12:13:45 - INFO - __main__ - Step 10260: {'lr': 0.0004949597701023981, 'samples': 492480, 'steps': 10259, 'loss/train': 1.8598229885101318} +07/25/2024 12:13:46 - INFO - __main__ - Step 10261: {'lr': 0.0004949587190542838, 'samples': 492528, 'steps': 10260, 'loss/train': 0.37537381052970886} +07/25/2024 12:13:46 - INFO - __main__ - Step 10262: {'lr': 0.0004949576678977085, 'samples': 492576, 'steps': 10261, 'loss/train': 2.2157273292541504} +07/25/2024 12:13:46 - INFO - __main__ - Step 10263: {'lr': 0.0004949566166326727, 'samples': 492624, 'steps': 10262, 'loss/train': 1.92054283618927} +07/25/2024 12:13:46 - INFO - __main__ - Step 10264: {'lr': 0.0004949555652591772, 'samples': 492672, 'steps': 10263, 'loss/train': 1.8262684345245361} +07/25/2024 12:13:47 - INFO - __main__ - Step 10265: {'lr': 0.0004949545137772221, 'samples': 492720, 'steps': 10264, 'loss/train': 1.585667610168457} +07/25/2024 12:13:47 - INFO - __main__ - Step 10266: {'lr': 0.0004949534621868082, 'samples': 492768, 'steps': 10265, 'loss/train': 1.6173877716064453} +07/25/2024 12:13:47 - INFO - __main__ - Step 10267: {'lr': 0.0004949524104879355, 'samples': 492816, 'steps': 10266, 'loss/train': 2.560168743133545} +07/25/2024 12:13:48 - INFO - __main__ - Step 10268: {'lr': 0.000494951358680605, 'samples': 492864, 'steps': 10267, 'loss/train': 0.7781774997711182} +07/25/2024 12:13:48 - INFO - __main__ - Step 10269: {'lr': 0.0004949503067648166, 'samples': 492912, 'steps': 10268, 'loss/train': 2.1613423824310303} +07/25/2024 12:13:48 - INFO - __main__ - Step 10270: {'lr': 0.0004949492547405713, 'samples': 492960, 'steps': 10269, 'loss/train': 2.2821197509765625} +07/25/2024 12:13:48 - INFO - __main__ - Step 10271: {'lr': 0.0004949482026078692, 'samples': 493008, 'steps': 10270, 'loss/train': 1.6777325868606567} +07/25/2024 12:13:49 - INFO - __main__ - Step 10272: {'lr': 0.000494947150366711, 'samples': 493056, 'steps': 10271, 'loss/train': 2.1686534881591797} +07/25/2024 12:13:49 - INFO - __main__ - Step 10273: {'lr': 0.000494946098017097, 'samples': 493104, 'steps': 10272, 'loss/train': 1.7289286851882935} +07/25/2024 12:13:49 - INFO - __main__ - Step 10274: {'lr': 0.0004949450455590278, 'samples': 493152, 'steps': 10273, 'loss/train': 1.740702509880066} +07/25/2024 12:13:50 - INFO - __main__ - Step 10275: {'lr': 0.0004949439929925037, 'samples': 493200, 'steps': 10274, 'loss/train': 2.3783254623413086} +07/25/2024 12:13:50 - INFO - __main__ - Step 10276: {'lr': 0.0004949429403175254, 'samples': 493248, 'steps': 10275, 'loss/train': 1.8288720846176147} +07/25/2024 12:13:50 - INFO - __main__ - Step 10277: {'lr': 0.000494941887534093, 'samples': 493296, 'steps': 10276, 'loss/train': 1.4729344844818115} +07/25/2024 12:13:50 - INFO - __main__ - Step 10278: {'lr': 0.0004949408346422074, 'samples': 493344, 'steps': 10277, 'loss/train': 2.2834789752960205} +07/25/2024 12:13:51 - INFO - __main__ - Step 10279: {'lr': 0.0004949397816418689, 'samples': 493392, 'steps': 10278, 'loss/train': 2.7487597465515137} +07/25/2024 12:13:51 - INFO - __main__ - Step 10280: {'lr': 0.0004949387285330777, 'samples': 493440, 'steps': 10279, 'loss/train': 2.4986515045166016} +07/25/2024 12:13:51 - INFO - __main__ - Step 10281: {'lr': 0.0004949376753158347, 'samples': 493488, 'steps': 10280, 'loss/train': 1.8114088773727417} +07/25/2024 12:13:51 - INFO - __main__ - Step 10282: {'lr': 0.0004949366219901401, 'samples': 493536, 'steps': 10281, 'loss/train': 1.5876277685165405} +07/25/2024 12:13:52 - INFO - __main__ - Step 10283: {'lr': 0.0004949355685559944, 'samples': 493584, 'steps': 10282, 'loss/train': 0.4645058214664459} +07/25/2024 12:13:52 - INFO - __main__ - Step 10284: {'lr': 0.0004949345150133981, 'samples': 493632, 'steps': 10283, 'loss/train': 2.36209774017334} +07/25/2024 12:13:52 - INFO - __main__ - Step 10285: {'lr': 0.0004949334613623517, 'samples': 493680, 'steps': 10284, 'loss/train': 0.3093295395374298} +07/25/2024 12:13:53 - INFO - __main__ - Step 10286: {'lr': 0.0004949324076028556, 'samples': 493728, 'steps': 10285, 'loss/train': 1.9796689748764038} +07/25/2024 12:13:53 - INFO - __main__ - Step 10287: {'lr': 0.0004949313537349102, 'samples': 493776, 'steps': 10286, 'loss/train': 1.990080714225769} +07/25/2024 12:13:53 - INFO - __main__ - Step 10288: {'lr': 0.0004949302997585162, 'samples': 493824, 'steps': 10287, 'loss/train': 2.828307867050171} +07/25/2024 12:13:53 - INFO - __main__ - Step 10289: {'lr': 0.0004949292456736739, 'samples': 493872, 'steps': 10288, 'loss/train': 1.9992624521255493} +07/25/2024 12:13:54 - INFO - __main__ - Step 10290: {'lr': 0.0004949281914803837, 'samples': 493920, 'steps': 10289, 'loss/train': 1.939162015914917} +07/25/2024 12:13:54 - INFO - __main__ - Step 10291: {'lr': 0.0004949271371786462, 'samples': 493968, 'steps': 10290, 'loss/train': 2.4549477100372314} +07/25/2024 12:13:54 - INFO - __main__ - Step 10292: {'lr': 0.0004949260827684618, 'samples': 494016, 'steps': 10291, 'loss/train': 0.26382875442504883} +07/25/2024 12:13:55 - INFO - __main__ - Step 10293: {'lr': 0.0004949250282498311, 'samples': 494064, 'steps': 10292, 'loss/train': 2.3010146617889404} +07/25/2024 12:13:55 - INFO - __main__ - Step 10294: {'lr': 0.0004949239736227544, 'samples': 494112, 'steps': 10293, 'loss/train': 2.401392936706543} +07/25/2024 12:13:55 - INFO - __main__ - Step 10295: {'lr': 0.0004949229188872322, 'samples': 494160, 'steps': 10294, 'loss/train': 2.3659794330596924} +07/25/2024 12:13:55 - INFO - __main__ - Step 10296: {'lr': 0.000494921864043265, 'samples': 494208, 'steps': 10295, 'loss/train': 1.3161324262619019} +07/25/2024 12:13:56 - INFO - __main__ - Step 10297: {'lr': 0.0004949208090908534, 'samples': 494256, 'steps': 10296, 'loss/train': 2.499271869659424} +07/25/2024 12:13:56 - INFO - __main__ - Step 10298: {'lr': 0.0004949197540299975, 'samples': 494304, 'steps': 10297, 'loss/train': 2.2749881744384766} +07/25/2024 12:13:56 - INFO - __main__ - Step 10299: {'lr': 0.0004949186988606982, 'samples': 494352, 'steps': 10298, 'loss/train': 2.2226297855377197} +07/25/2024 12:13:57 - INFO - __main__ - Step 10300: {'lr': 0.0004949176435829557, 'samples': 494400, 'steps': 10299, 'loss/train': 2.0190279483795166} +07/25/2024 12:13:57 - INFO - __main__ - Step 10301: {'lr': 0.0004949165881967705, 'samples': 494448, 'steps': 10300, 'loss/train': 1.656226396560669} +07/25/2024 12:13:57 - INFO - __main__ - Step 10302: {'lr': 0.0004949155327021431, 'samples': 494496, 'steps': 10301, 'loss/train': 1.768897533416748} +07/25/2024 12:13:57 - INFO - __main__ - Step 10303: {'lr': 0.0004949144770990738, 'samples': 494544, 'steps': 10302, 'loss/train': 2.2126195430755615} +07/25/2024 12:13:58 - INFO - __main__ - Step 10304: {'lr': 0.0004949134213875634, 'samples': 494592, 'steps': 10303, 'loss/train': 1.8106493949890137} +07/25/2024 12:13:58 - INFO - __main__ - Step 10305: {'lr': 0.0004949123655676123, 'samples': 494640, 'steps': 10304, 'loss/train': 1.9551647901535034} +07/25/2024 12:13:58 - INFO - __main__ - Step 10306: {'lr': 0.0004949113096392207, 'samples': 494688, 'steps': 10305, 'loss/train': 2.0028722286224365} +07/25/2024 12:13:59 - INFO - __main__ - Step 10307: {'lr': 0.0004949102536023893, 'samples': 494736, 'steps': 10306, 'loss/train': 0.3604365289211273} +07/25/2024 12:13:59 - INFO - __main__ - Step 10308: {'lr': 0.0004949091974571186, 'samples': 494784, 'steps': 10307, 'loss/train': 2.028778076171875} +07/25/2024 12:13:59 - INFO - __main__ - Step 10309: {'lr': 0.0004949081412034087, 'samples': 494832, 'steps': 10308, 'loss/train': 0.9180581569671631} +07/25/2024 12:13:59 - INFO - __main__ - Step 10310: {'lr': 0.0004949070848412606, 'samples': 494880, 'steps': 10309, 'loss/train': 1.6685614585876465} +07/25/2024 12:14:00 - INFO - __main__ - Step 10311: {'lr': 0.0004949060283706744, 'samples': 494928, 'steps': 10310, 'loss/train': 1.028135061264038} +07/25/2024 12:14:00 - INFO - __main__ - Step 10312: {'lr': 0.0004949049717916507, 'samples': 494976, 'steps': 10311, 'loss/train': 2.6299734115600586} +07/25/2024 12:14:00 - INFO - __main__ - Step 10313: {'lr': 0.0004949039151041899, 'samples': 495024, 'steps': 10312, 'loss/train': 2.035576343536377} +07/25/2024 12:14:01 - INFO - __main__ - Step 10314: {'lr': 0.0004949028583082926, 'samples': 495072, 'steps': 10313, 'loss/train': 1.8230397701263428} +07/25/2024 12:14:01 - INFO - __main__ - Step 10315: {'lr': 0.0004949018014039591, 'samples': 495120, 'steps': 10314, 'loss/train': 2.3900983333587646} +07/25/2024 12:14:01 - INFO - __main__ - Step 10316: {'lr': 0.0004949007443911899, 'samples': 495168, 'steps': 10315, 'loss/train': 0.21791349351406097} +07/25/2024 12:14:01 - INFO - __main__ - Step 10317: {'lr': 0.0004948996872699855, 'samples': 495216, 'steps': 10316, 'loss/train': 2.0818049907684326} +07/25/2024 12:14:02 - INFO - __main__ - Step 10318: {'lr': 0.0004948986300403465, 'samples': 495264, 'steps': 10317, 'loss/train': 2.639913320541382} +07/25/2024 12:14:02 - INFO - __main__ - Step 10319: {'lr': 0.0004948975727022732, 'samples': 495312, 'steps': 10318, 'loss/train': 1.8662296533584595} +07/25/2024 12:14:02 - INFO - __main__ - Step 10320: {'lr': 0.0004948965152557661, 'samples': 495360, 'steps': 10319, 'loss/train': 1.7822010517120361} +07/25/2024 12:14:03 - INFO - __main__ - Step 10321: {'lr': 0.0004948954577008258, 'samples': 495408, 'steps': 10320, 'loss/train': 2.2002112865448} +07/25/2024 12:14:03 - INFO - __main__ - Step 10322: {'lr': 0.0004948944000374524, 'samples': 495456, 'steps': 10321, 'loss/train': 1.7035566568374634} +07/25/2024 12:14:03 - INFO - __main__ - Step 10323: {'lr': 0.0004948933422656469, 'samples': 495504, 'steps': 10322, 'loss/train': 1.4976987838745117} +07/25/2024 12:14:03 - INFO - __main__ - Step 10324: {'lr': 0.0004948922843854094, 'samples': 495552, 'steps': 10323, 'loss/train': 2.1155343055725098} +07/25/2024 12:14:04 - INFO - __main__ - Step 10325: {'lr': 0.0004948912263967403, 'samples': 495600, 'steps': 10324, 'loss/train': 2.177933692932129} +07/25/2024 12:14:04 - INFO - __main__ - Step 10326: {'lr': 0.0004948901682996404, 'samples': 495648, 'steps': 10325, 'loss/train': 1.8119560480117798} +07/25/2024 12:14:04 - INFO - __main__ - Step 10327: {'lr': 0.0004948891100941098, 'samples': 495696, 'steps': 10326, 'loss/train': 2.4003822803497314} +07/25/2024 12:14:05 - INFO - __main__ - Step 10328: {'lr': 0.0004948880517801494, 'samples': 495744, 'steps': 10327, 'loss/train': 2.3295202255249023} +07/25/2024 12:14:05 - INFO - __main__ - Step 10329: {'lr': 0.0004948869933577594, 'samples': 495792, 'steps': 10328, 'loss/train': 1.6279919147491455} +07/25/2024 12:14:05 - INFO - __main__ - Step 10330: {'lr': 0.0004948859348269402, 'samples': 495840, 'steps': 10329, 'loss/train': 1.5683367252349854} +07/25/2024 12:14:05 - INFO - __main__ - Step 10331: {'lr': 0.0004948848761876924, 'samples': 495888, 'steps': 10330, 'loss/train': 0.27430903911590576} +07/25/2024 12:14:06 - INFO - __main__ - Step 10332: {'lr': 0.0004948838174400164, 'samples': 495936, 'steps': 10331, 'loss/train': 2.503615140914917} +07/25/2024 12:14:06 - INFO - __main__ - Step 10333: {'lr': 0.0004948827585839128, 'samples': 495984, 'steps': 10332, 'loss/train': 2.2150676250457764} +07/25/2024 12:14:06 - INFO - __main__ - Step 10334: {'lr': 0.0004948816996193819, 'samples': 496032, 'steps': 10333, 'loss/train': 1.811962366104126} +07/25/2024 12:14:07 - INFO - __main__ - Step 10335: {'lr': 0.0004948806405464243, 'samples': 496080, 'steps': 10334, 'loss/train': 1.9414225816726685} +07/25/2024 12:14:07 - INFO - __main__ - Step 10336: {'lr': 0.0004948795813650405, 'samples': 496128, 'steps': 10335, 'loss/train': 2.423846960067749} +07/25/2024 12:14:07 - INFO - __main__ - Step 10337: {'lr': 0.0004948785220752306, 'samples': 496176, 'steps': 10336, 'loss/train': 1.9105072021484375} +07/25/2024 12:14:07 - INFO - __main__ - Step 10338: {'lr': 0.0004948774626769956, 'samples': 496224, 'steps': 10337, 'loss/train': 1.6054892539978027} +07/25/2024 12:14:08 - INFO - __main__ - Step 10339: {'lr': 0.0004948764031703357, 'samples': 496272, 'steps': 10338, 'loss/train': 1.8133447170257568} +07/25/2024 12:14:08 - INFO - __main__ - Step 10340: {'lr': 0.0004948753435552512, 'samples': 496320, 'steps': 10339, 'loss/train': 0.15649239718914032} +07/25/2024 12:14:08 - INFO - __main__ - Step 10341: {'lr': 0.0004948742838317429, 'samples': 496368, 'steps': 10340, 'loss/train': 2.2551355361938477} +07/25/2024 12:14:09 - INFO - __main__ - Step 10342: {'lr': 0.0004948732239998112, 'samples': 496416, 'steps': 10341, 'loss/train': 2.162710189819336} +07/25/2024 12:14:09 - INFO - __main__ - Step 10343: {'lr': 0.0004948721640594564, 'samples': 496464, 'steps': 10342, 'loss/train': 2.110229969024658} +07/25/2024 12:14:09 - INFO - __main__ - Step 10344: {'lr': 0.000494871104010679, 'samples': 496512, 'steps': 10343, 'loss/train': 1.3102360963821411} +07/25/2024 12:14:09 - INFO - __main__ - Step 10345: {'lr': 0.0004948700438534797, 'samples': 496560, 'steps': 10344, 'loss/train': 2.1051626205444336} +07/25/2024 12:14:10 - INFO - __main__ - Step 10346: {'lr': 0.0004948689835878587, 'samples': 496608, 'steps': 10345, 'loss/train': 2.044904947280884} +07/25/2024 12:14:10 - INFO - __main__ - Step 10347: {'lr': 0.0004948679232138166, 'samples': 496656, 'steps': 10346, 'loss/train': 2.1841835975646973} +07/25/2024 12:14:10 - INFO - __main__ - Step 10348: {'lr': 0.0004948668627313539, 'samples': 496704, 'steps': 10347, 'loss/train': 1.6703554391860962} +07/25/2024 12:14:11 - INFO - __main__ - Step 10349: {'lr': 0.000494865802140471, 'samples': 496752, 'steps': 10348, 'loss/train': 1.6513547897338867} +07/25/2024 12:14:11 - INFO - __main__ - Step 10350: {'lr': 0.0004948647414411683, 'samples': 496800, 'steps': 10349, 'loss/train': 2.0582785606384277} +07/25/2024 12:14:11 - INFO - __main__ - Step 10351: {'lr': 0.0004948636806334463, 'samples': 496848, 'steps': 10350, 'loss/train': 2.6738016605377197} +07/25/2024 12:14:11 - INFO - __main__ - Step 10352: {'lr': 0.0004948626197173057, 'samples': 496896, 'steps': 10351, 'loss/train': 2.3881475925445557} +07/25/2024 12:14:12 - INFO - __main__ - Step 10353: {'lr': 0.0004948615586927467, 'samples': 496944, 'steps': 10352, 'loss/train': 2.401413917541504} +07/25/2024 12:14:12 - INFO - __main__ - Step 10354: {'lr': 0.0004948604975597698, 'samples': 496992, 'steps': 10353, 'loss/train': 1.9223361015319824} +07/25/2024 12:14:12 - INFO - __main__ - Step 10355: {'lr': 0.0004948594363183757, 'samples': 497040, 'steps': 10354, 'loss/train': 0.21667428314685822} +07/25/2024 12:14:13 - INFO - __main__ - Step 10356: {'lr': 0.0004948583749685646, 'samples': 497088, 'steps': 10355, 'loss/train': 2.1374034881591797} +07/25/2024 12:14:13 - INFO - __main__ - Step 10357: {'lr': 0.0004948573135103371, 'samples': 497136, 'steps': 10356, 'loss/train': 2.453693151473999} +07/25/2024 12:14:13 - INFO - __main__ - Step 10358: {'lr': 0.0004948562519436936, 'samples': 497184, 'steps': 10357, 'loss/train': 1.6697156429290771} +07/25/2024 12:14:13 - INFO - __main__ - Step 10359: {'lr': 0.0004948551902686346, 'samples': 497232, 'steps': 10358, 'loss/train': 1.7352265119552612} +07/25/2024 12:14:14 - INFO - __main__ - Step 10360: {'lr': 0.0004948541284851606, 'samples': 497280, 'steps': 10359, 'loss/train': 1.5716346502304077} +07/25/2024 12:14:14 - INFO - __main__ - Step 10361: {'lr': 0.0004948530665932721, 'samples': 497328, 'steps': 10360, 'loss/train': 1.9056684970855713} +07/25/2024 12:14:14 - INFO - __main__ - Step 10362: {'lr': 0.0004948520045929695, 'samples': 497376, 'steps': 10361, 'loss/train': 1.1961146593093872} +07/25/2024 12:14:15 - INFO - __main__ - Step 10363: {'lr': 0.0004948509424842533, 'samples': 497424, 'steps': 10362, 'loss/train': 1.9909051656723022} +07/25/2024 12:14:15 - INFO - __main__ - Step 10364: {'lr': 0.000494849880267124, 'samples': 497472, 'steps': 10363, 'loss/train': 1.5613495111465454} +07/25/2024 12:14:15 - INFO - __main__ - Step 10365: {'lr': 0.000494848817941582, 'samples': 497520, 'steps': 10364, 'loss/train': 1.4326037168502808} +07/25/2024 12:14:15 - INFO - __main__ - Step 10366: {'lr': 0.0004948477555076277, 'samples': 497568, 'steps': 10365, 'loss/train': 1.7503108978271484} +07/25/2024 12:14:16 - INFO - __main__ - Step 10367: {'lr': 0.0004948466929652618, 'samples': 497616, 'steps': 10366, 'loss/train': 1.4484343528747559} +07/25/2024 12:14:16 - INFO - __main__ - Step 10368: {'lr': 0.0004948456303144846, 'samples': 497664, 'steps': 10367, 'loss/train': 2.0440618991851807} +07/25/2024 12:14:16 - INFO - __main__ - Step 10369: {'lr': 0.0004948445675552966, 'samples': 497712, 'steps': 10368, 'loss/train': 2.4266445636749268} +07/25/2024 12:14:16 - INFO - __main__ - Step 10370: {'lr': 0.0004948435046876983, 'samples': 497760, 'steps': 10369, 'loss/train': 2.201645612716675} +07/25/2024 12:14:17 - INFO - __main__ - Step 10371: {'lr': 0.0004948424417116902, 'samples': 497808, 'steps': 10370, 'loss/train': 1.6616355180740356} +07/25/2024 12:14:17 - INFO - __main__ - Step 10372: {'lr': 0.0004948413786272728, 'samples': 497856, 'steps': 10371, 'loss/train': 1.7481149435043335} +07/25/2024 12:14:17 - INFO - __main__ - Step 10373: {'lr': 0.0004948403154344463, 'samples': 497904, 'steps': 10372, 'loss/train': 0.8353353142738342} +07/25/2024 12:14:18 - INFO - __main__ - Step 10374: {'lr': 0.0004948392521332115, 'samples': 497952, 'steps': 10373, 'loss/train': 1.935337781906128} +07/25/2024 12:14:18 - INFO - __main__ - Step 10375: {'lr': 0.0004948381887235687, 'samples': 498000, 'steps': 10374, 'loss/train': 2.5845799446105957} +07/25/2024 12:14:18 - INFO - __main__ - Step 10376: {'lr': 0.0004948371252055184, 'samples': 498048, 'steps': 10375, 'loss/train': 2.2843716144561768} +07/25/2024 12:14:18 - INFO - __main__ - Step 10377: {'lr': 0.0004948360615790611, 'samples': 498096, 'steps': 10376, 'loss/train': 2.009347915649414} +07/25/2024 12:14:19 - INFO - __main__ - Step 10378: {'lr': 0.0004948349978441972, 'samples': 498144, 'steps': 10377, 'loss/train': 1.978729009628296} +07/25/2024 12:14:19 - INFO - __main__ - Step 10379: {'lr': 0.0004948339340009274, 'samples': 498192, 'steps': 10378, 'loss/train': 0.32205405831336975} +07/25/2024 12:14:19 - INFO - __main__ - Step 10380: {'lr': 0.0004948328700492518, 'samples': 498240, 'steps': 10379, 'loss/train': 1.7742286920547485} +07/25/2024 12:14:20 - INFO - __main__ - Step 10381: {'lr': 0.0004948318059891711, 'samples': 498288, 'steps': 10380, 'loss/train': 1.9839959144592285} +07/25/2024 12:14:20 - INFO - __main__ - Step 10382: {'lr': 0.0004948307418206857, 'samples': 498336, 'steps': 10381, 'loss/train': 0.7874761819839478} +07/25/2024 12:14:20 - INFO - __main__ - Step 10383: {'lr': 0.0004948296775437963, 'samples': 498384, 'steps': 10382, 'loss/train': 2.3992912769317627} +07/25/2024 12:14:20 - INFO - __main__ - Step 10384: {'lr': 0.0004948286131585029, 'samples': 498432, 'steps': 10383, 'loss/train': 1.9426857233047485} +07/25/2024 12:14:21 - INFO - __main__ - Step 10385: {'lr': 0.0004948275486648065, 'samples': 498480, 'steps': 10384, 'loss/train': 1.949028730392456} +07/25/2024 12:14:21 - INFO - __main__ - Step 10386: {'lr': 0.0004948264840627072, 'samples': 498528, 'steps': 10385, 'loss/train': 1.2399439811706543} +07/25/2024 12:14:21 - INFO - __main__ - Step 10387: {'lr': 0.0004948254193522056, 'samples': 498576, 'steps': 10386, 'loss/train': 1.9968072175979614} +07/25/2024 12:14:22 - INFO - __main__ - Step 10388: {'lr': 0.0004948243545333022, 'samples': 498624, 'steps': 10387, 'loss/train': 2.362603187561035} +07/25/2024 12:14:22 - INFO - __main__ - Step 10389: {'lr': 0.0004948232896059974, 'samples': 498672, 'steps': 10388, 'loss/train': 2.2638566493988037} +07/25/2024 12:14:22 - INFO - __main__ - Step 10390: {'lr': 0.0004948222245702917, 'samples': 498720, 'steps': 10389, 'loss/train': 1.9229143857955933} +07/25/2024 12:14:22 - INFO - __main__ - Step 10391: {'lr': 0.0004948211594261857, 'samples': 498768, 'steps': 10390, 'loss/train': 2.395739793777466} +07/25/2024 12:14:23 - INFO - __main__ - Step 10392: {'lr': 0.0004948200941736796, 'samples': 498816, 'steps': 10391, 'loss/train': 1.8569293022155762} +07/25/2024 12:14:23 - INFO - __main__ - Step 10393: {'lr': 0.0004948190288127741, 'samples': 498864, 'steps': 10392, 'loss/train': 1.24372398853302} +07/25/2024 12:14:23 - INFO - __main__ - Step 10394: {'lr': 0.0004948179633434695, 'samples': 498912, 'steps': 10393, 'loss/train': 2.327153205871582} +07/25/2024 12:14:24 - INFO - __main__ - Step 10395: {'lr': 0.0004948168977657665, 'samples': 498960, 'steps': 10394, 'loss/train': 1.7669957876205444} +07/25/2024 12:14:24 - INFO - __main__ - Step 10396: {'lr': 0.0004948158320796653, 'samples': 499008, 'steps': 10395, 'loss/train': 1.6109691858291626} +07/25/2024 12:14:24 - INFO - __main__ - Step 10397: {'lr': 0.0004948147662851666, 'samples': 499056, 'steps': 10396, 'loss/train': 1.7650700807571411} +07/25/2024 12:14:24 - INFO - __main__ - Step 10398: {'lr': 0.0004948137003822708, 'samples': 499104, 'steps': 10397, 'loss/train': 1.9152287244796753} +07/25/2024 12:14:25 - INFO - __main__ - Step 10399: {'lr': 0.0004948126343709782, 'samples': 499152, 'steps': 10398, 'loss/train': 2.081310749053955} +07/25/2024 12:14:25 - INFO - __main__ - Step 10400: {'lr': 0.0004948115682512896, 'samples': 499200, 'steps': 10399, 'loss/train': 1.747309923171997} +07/25/2024 12:14:25 - INFO - __main__ - Step 10401: {'lr': 0.0004948105020232053, 'samples': 499248, 'steps': 10400, 'loss/train': 2.0483503341674805} +07/25/2024 12:14:26 - INFO - __main__ - Step 10402: {'lr': 0.0004948094356867257, 'samples': 499296, 'steps': 10401, 'loss/train': 2.0761525630950928} +07/25/2024 12:14:26 - INFO - __main__ - Step 10403: {'lr': 0.0004948083692418513, 'samples': 499344, 'steps': 10402, 'loss/train': 0.221083402633667} +07/25/2024 12:14:26 - INFO - __main__ - Step 10404: {'lr': 0.0004948073026885826, 'samples': 499392, 'steps': 10403, 'loss/train': 2.0454907417297363} +07/25/2024 12:14:26 - INFO - __main__ - Step 10405: {'lr': 0.0004948062360269202, 'samples': 499440, 'steps': 10404, 'loss/train': 2.2264630794525146} +07/25/2024 12:14:27 - INFO - __main__ - Step 10406: {'lr': 0.0004948051692568645, 'samples': 499488, 'steps': 10405, 'loss/train': 1.2402924299240112} +07/25/2024 12:14:27 - INFO - __main__ - Step 10407: {'lr': 0.0004948041023784158, 'samples': 499536, 'steps': 10406, 'loss/train': 1.7168205976486206} +07/25/2024 12:14:27 - INFO - __main__ - Step 10408: {'lr': 0.0004948030353915747, 'samples': 499584, 'steps': 10407, 'loss/train': 2.188323974609375} +07/25/2024 12:14:28 - INFO - __main__ - Step 10409: {'lr': 0.0004948019682963418, 'samples': 499632, 'steps': 10408, 'loss/train': 1.5692185163497925} +07/25/2024 12:14:28 - INFO - __main__ - Step 10410: {'lr': 0.0004948009010927173, 'samples': 499680, 'steps': 10409, 'loss/train': 1.7875269651412964} +07/25/2024 12:14:28 - INFO - __main__ - Step 10411: {'lr': 0.0004947998337807018, 'samples': 499728, 'steps': 10410, 'loss/train': 1.832782506942749} +07/25/2024 12:14:28 - INFO - __main__ - Step 10412: {'lr': 0.0004947987663602959, 'samples': 499776, 'steps': 10411, 'loss/train': 2.1232728958129883} +07/25/2024 12:14:29 - INFO - __main__ - Step 10413: {'lr': 0.0004947976988315, 'samples': 499824, 'steps': 10412, 'loss/train': 1.588722825050354} +07/25/2024 12:14:29 - INFO - __main__ - Step 10414: {'lr': 0.0004947966311943144, 'samples': 499872, 'steps': 10413, 'loss/train': 1.9069409370422363} +07/25/2024 12:14:29 - INFO - __main__ - Step 10415: {'lr': 0.0004947955634487398, 'samples': 499920, 'steps': 10414, 'loss/train': 2.512176513671875} +07/25/2024 12:14:30 - INFO - __main__ - Step 10416: {'lr': 0.0004947944955947766, 'samples': 499968, 'steps': 10415, 'loss/train': 1.7592353820800781} +07/25/2024 12:14:30 - INFO - __main__ - Step 10417: {'lr': 0.0004947934276324252, 'samples': 500016, 'steps': 10416, 'loss/train': 1.9350720643997192} +07/25/2024 12:14:30 - INFO - __main__ - Step 10418: {'lr': 0.0004947923595616862, 'samples': 500064, 'steps': 10417, 'loss/train': 1.6225999593734741} +07/25/2024 12:14:30 - INFO - __main__ - Step 10419: {'lr': 0.00049479129138256, 'samples': 500112, 'steps': 10418, 'loss/train': 2.0375711917877197} +07/25/2024 12:14:31 - INFO - __main__ - Step 10420: {'lr': 0.0004947902230950469, 'samples': 500160, 'steps': 10419, 'loss/train': 1.8497517108917236} +07/25/2024 12:14:31 - INFO - __main__ - Step 10421: {'lr': 0.0004947891546991477, 'samples': 500208, 'steps': 10420, 'loss/train': 1.8911205530166626} +07/25/2024 12:14:31 - INFO - __main__ - Step 10422: {'lr': 0.0004947880861948627, 'samples': 500256, 'steps': 10421, 'loss/train': 2.233999252319336} +07/25/2024 12:14:32 - INFO - __main__ - Step 10423: {'lr': 0.0004947870175821924, 'samples': 500304, 'steps': 10422, 'loss/train': 2.1830804347991943} +07/25/2024 12:14:32 - INFO - __main__ - Step 10424: {'lr': 0.0004947859488611373, 'samples': 500352, 'steps': 10423, 'loss/train': 1.3228157758712769} +07/25/2024 12:14:32 - INFO - __main__ - Step 10425: {'lr': 0.0004947848800316978, 'samples': 500400, 'steps': 10424, 'loss/train': 2.0170669555664062} +07/25/2024 12:14:32 - INFO - __main__ - Step 10426: {'lr': 0.0004947838110938744, 'samples': 500448, 'steps': 10425, 'loss/train': 2.3185653686523438} +07/25/2024 12:14:33 - INFO - __main__ - Step 10427: {'lr': 0.0004947827420476676, 'samples': 500496, 'steps': 10426, 'loss/train': 0.24180898070335388} +07/25/2024 12:14:33 - INFO - __main__ - Step 10428: {'lr': 0.0004947816728930779, 'samples': 500544, 'steps': 10427, 'loss/train': 2.1112165451049805} +07/25/2024 12:14:33 - INFO - __main__ - Step 10429: {'lr': 0.0004947806036301056, 'samples': 500592, 'steps': 10428, 'loss/train': 2.414796829223633} +07/25/2024 12:14:34 - INFO - __main__ - Step 10430: {'lr': 0.0004947795342587516, 'samples': 500640, 'steps': 10429, 'loss/train': 1.7869971990585327} +07/25/2024 12:14:34 - INFO - __main__ - Step 10431: {'lr': 0.0004947784647790158, 'samples': 500688, 'steps': 10430, 'loss/train': 1.8596311807632446} +07/25/2024 12:14:34 - INFO - __main__ - Step 10432: {'lr': 0.0004947773951908991, 'samples': 500736, 'steps': 10431, 'loss/train': 1.9003453254699707} +07/25/2024 12:14:34 - INFO - __main__ - Step 10433: {'lr': 0.0004947763254944017, 'samples': 500784, 'steps': 10432, 'loss/train': 1.8391305208206177} +07/25/2024 12:14:35 - INFO - __main__ - Step 10434: {'lr': 0.0004947752556895243, 'samples': 500832, 'steps': 10433, 'loss/train': 1.7898601293563843} +07/25/2024 12:14:35 - INFO - __main__ - Step 10435: {'lr': 0.0004947741857762672, 'samples': 500880, 'steps': 10434, 'loss/train': 2.6541361808776855} +07/25/2024 12:14:35 - INFO - __main__ - Step 10436: {'lr': 0.000494773115754631, 'samples': 500928, 'steps': 10435, 'loss/train': 2.0546677112579346} +07/25/2024 12:14:36 - INFO - __main__ - Step 10437: {'lr': 0.0004947720456246162, 'samples': 500976, 'steps': 10436, 'loss/train': 1.7830591201782227} +07/25/2024 12:14:36 - INFO - __main__ - Step 10438: {'lr': 0.000494770975386223, 'samples': 501024, 'steps': 10437, 'loss/train': 1.9956852197647095} +07/25/2024 12:14:36 - INFO - __main__ - Step 10439: {'lr': 0.0004947699050394523, 'samples': 501072, 'steps': 10438, 'loss/train': 1.8537604808807373} +07/25/2024 12:14:36 - INFO - __main__ - Step 10440: {'lr': 0.0004947688345843043, 'samples': 501120, 'steps': 10439, 'loss/train': 1.219553828239441} +07/25/2024 12:14:37 - INFO - __main__ - Step 10441: {'lr': 0.0004947677640207794, 'samples': 501168, 'steps': 10440, 'loss/train': 2.130227565765381} +07/25/2024 12:14:37 - INFO - __main__ - Step 10442: {'lr': 0.0004947666933488782, 'samples': 501216, 'steps': 10441, 'loss/train': 1.970374584197998} +07/25/2024 12:14:37 - INFO - __main__ - Step 10443: {'lr': 0.0004947656225686011, 'samples': 501264, 'steps': 10442, 'loss/train': 1.7403385639190674} +07/25/2024 12:14:37 - INFO - __main__ - Step 10444: {'lr': 0.0004947645516799489, 'samples': 501312, 'steps': 10443, 'loss/train': 1.7966828346252441} +07/25/2024 12:14:38 - INFO - __main__ - Step 10445: {'lr': 0.0004947634806829217, 'samples': 501360, 'steps': 10444, 'loss/train': 2.23437237739563} +07/25/2024 12:14:38 - INFO - __main__ - Step 10446: {'lr': 0.00049476240957752, 'samples': 501408, 'steps': 10445, 'loss/train': 2.104031801223755} +07/25/2024 12:14:38 - INFO - __main__ - Step 10447: {'lr': 0.0004947613383637444, 'samples': 501456, 'steps': 10446, 'loss/train': 2.1692912578582764} +07/25/2024 12:14:39 - INFO - __main__ - Step 10448: {'lr': 0.0004947602670415953, 'samples': 501504, 'steps': 10447, 'loss/train': 1.7525813579559326} +07/25/2024 12:14:39 - INFO - __main__ - Step 10449: {'lr': 0.0004947591956110733, 'samples': 501552, 'steps': 10448, 'loss/train': 1.660476803779602} +07/25/2024 12:14:39 - INFO - __main__ - Step 10450: {'lr': 0.0004947581240721788, 'samples': 501600, 'steps': 10449, 'loss/train': 2.383798360824585} +07/25/2024 12:14:39 - INFO - __main__ - Step 10451: {'lr': 0.0004947570524249123, 'samples': 501648, 'steps': 10450, 'loss/train': 0.20770613849163055} +07/25/2024 12:14:40 - INFO - __main__ - Step 10452: {'lr': 0.000494755980669274, 'samples': 501696, 'steps': 10451, 'loss/train': 1.871012568473816} +07/25/2024 12:14:40 - INFO - __main__ - Step 10453: {'lr': 0.0004947549088052648, 'samples': 501744, 'steps': 10452, 'loss/train': 2.431891918182373} +07/25/2024 12:14:40 - INFO - __main__ - Step 10454: {'lr': 0.0004947538368328849, 'samples': 501792, 'steps': 10453, 'loss/train': 1.8921103477478027} +07/25/2024 12:14:41 - INFO - __main__ - Step 10455: {'lr': 0.0004947527647521349, 'samples': 501840, 'steps': 10454, 'loss/train': 1.3492414951324463} +07/25/2024 12:14:41 - INFO - __main__ - Step 10456: {'lr': 0.0004947516925630153, 'samples': 501888, 'steps': 10455, 'loss/train': 1.446194052696228} +07/25/2024 12:14:41 - INFO - __main__ - Step 10457: {'lr': 0.0004947506202655264, 'samples': 501936, 'steps': 10456, 'loss/train': 1.8769559860229492} +07/25/2024 12:14:41 - INFO - __main__ - Step 10458: {'lr': 0.0004947495478596688, 'samples': 501984, 'steps': 10457, 'loss/train': 1.3350465297698975} +07/25/2024 12:14:42 - INFO - __main__ - Step 10459: {'lr': 0.000494748475345443, 'samples': 502032, 'steps': 10458, 'loss/train': 1.7341620922088623} +07/25/2024 12:14:42 - INFO - __main__ - Step 10460: {'lr': 0.0004947474027228495, 'samples': 502080, 'steps': 10459, 'loss/train': 2.091585874557495} +07/25/2024 12:14:42 - INFO - __main__ - Step 10461: {'lr': 0.0004947463299918885, 'samples': 502128, 'steps': 10460, 'loss/train': 1.6250203847885132} +07/25/2024 12:14:43 - INFO - __main__ - Step 10462: {'lr': 0.0004947452571525608, 'samples': 502176, 'steps': 10461, 'loss/train': 2.0939583778381348} +07/25/2024 12:14:43 - INFO - __main__ - Step 10463: {'lr': 0.0004947441842048668, 'samples': 502224, 'steps': 10462, 'loss/train': 0.5932971239089966} +07/25/2024 12:14:43 - INFO - __main__ - Step 10464: {'lr': 0.0004947431111488069, 'samples': 502272, 'steps': 10463, 'loss/train': 1.4207900762557983} +07/25/2024 12:14:43 - INFO - __main__ - Step 10465: {'lr': 0.0004947420379843816, 'samples': 502320, 'steps': 10464, 'loss/train': 1.734567403793335} +07/25/2024 12:14:44 - INFO - __main__ - Step 10466: {'lr': 0.0004947409647115915, 'samples': 502368, 'steps': 10465, 'loss/train': 2.421738386154175} +07/25/2024 12:14:44 - INFO - __main__ - Step 10467: {'lr': 0.0004947398913304368, 'samples': 502416, 'steps': 10466, 'loss/train': 1.804047703742981} +07/25/2024 12:14:44 - INFO - __main__ - Step 10468: {'lr': 0.0004947388178409181, 'samples': 502464, 'steps': 10467, 'loss/train': 2.095505475997925} +07/25/2024 12:14:45 - INFO - __main__ - Step 10469: {'lr': 0.000494737744243036, 'samples': 502512, 'steps': 10468, 'loss/train': 1.3720136880874634} +07/25/2024 12:14:45 - INFO - __main__ - Step 10470: {'lr': 0.000494736670536791, 'samples': 502560, 'steps': 10469, 'loss/train': 2.2265915870666504} +07/25/2024 12:14:45 - INFO - __main__ - Step 10471: {'lr': 0.0004947355967221832, 'samples': 502608, 'steps': 10470, 'loss/train': 1.8810584545135498} +07/25/2024 12:14:45 - INFO - __main__ - Step 10472: {'lr': 0.0004947345227992135, 'samples': 502656, 'steps': 10471, 'loss/train': 2.2555954456329346} +07/25/2024 12:14:46 - INFO - __main__ - Step 10473: {'lr': 0.0004947334487678821, 'samples': 502704, 'steps': 10472, 'loss/train': 2.130861520767212} +07/25/2024 12:14:46 - INFO - __main__ - Step 10474: {'lr': 0.0004947323746281898, 'samples': 502752, 'steps': 10473, 'loss/train': 0.8471354246139526} +07/25/2024 12:14:46 - INFO - __main__ - Step 10475: {'lr': 0.0004947313003801367, 'samples': 502800, 'steps': 10474, 'loss/train': 0.16514670848846436} +07/25/2024 12:14:47 - INFO - __main__ - Step 10476: {'lr': 0.0004947302260237234, 'samples': 502848, 'steps': 10475, 'loss/train': 1.4512989521026611} +07/25/2024 12:14:47 - INFO - __main__ - Step 10477: {'lr': 0.0004947291515589506, 'samples': 502896, 'steps': 10476, 'loss/train': 2.087015390396118} +07/25/2024 12:14:47 - INFO - __main__ - Step 10478: {'lr': 0.0004947280769858184, 'samples': 502944, 'steps': 10477, 'loss/train': 1.8083393573760986} +07/25/2024 12:14:47 - INFO - __main__ - Step 10479: {'lr': 0.0004947270023043276, 'samples': 502992, 'steps': 10478, 'loss/train': 1.2568382024765015} +07/25/2024 12:14:48 - INFO - __main__ - Step 10480: {'lr': 0.0004947259275144784, 'samples': 503040, 'steps': 10479, 'loss/train': 1.9876351356506348} +07/25/2024 12:14:48 - INFO - __main__ - Step 10481: {'lr': 0.0004947248526162715, 'samples': 503088, 'steps': 10480, 'loss/train': 1.8166598081588745} +07/25/2024 12:14:48 - INFO - __main__ - Step 10482: {'lr': 0.0004947237776097073, 'samples': 503136, 'steps': 10481, 'loss/train': 1.8866204023361206} +07/25/2024 12:14:49 - INFO - __main__ - Step 10483: {'lr': 0.0004947227024947863, 'samples': 503184, 'steps': 10482, 'loss/train': 1.9620743989944458} +07/25/2024 12:14:49 - INFO - __main__ - Step 10484: {'lr': 0.0004947216272715089, 'samples': 503232, 'steps': 10483, 'loss/train': 2.0784969329833984} +07/25/2024 12:14:49 - INFO - __main__ - Step 10485: {'lr': 0.0004947205519398756, 'samples': 503280, 'steps': 10484, 'loss/train': 2.4951820373535156} +07/25/2024 12:14:49 - INFO - __main__ - Step 10486: {'lr': 0.0004947194764998869, 'samples': 503328, 'steps': 10485, 'loss/train': 1.6583894491195679} +07/25/2024 12:14:50 - INFO - __main__ - Step 10487: {'lr': 0.0004947184009515432, 'samples': 503376, 'steps': 10486, 'loss/train': 0.8057152032852173} +07/25/2024 12:14:50 - INFO - __main__ - Step 10488: {'lr': 0.0004947173252948452, 'samples': 503424, 'steps': 10487, 'loss/train': 0.7876482605934143} +07/25/2024 12:14:50 - INFO - __main__ - Step 10489: {'lr': 0.0004947162495297931, 'samples': 503472, 'steps': 10488, 'loss/train': 1.6673758029937744} +07/25/2024 12:14:51 - INFO - __main__ - Step 10490: {'lr': 0.0004947151736563876, 'samples': 503520, 'steps': 10489, 'loss/train': 1.8360880613327026} +07/25/2024 12:14:51 - INFO - __main__ - Step 10491: {'lr': 0.0004947140976746291, 'samples': 503568, 'steps': 10490, 'loss/train': 2.095452070236206} +07/25/2024 12:14:51 - INFO - __main__ - Step 10492: {'lr': 0.000494713021584518, 'samples': 503616, 'steps': 10491, 'loss/train': 1.6290911436080933} +07/25/2024 12:14:51 - INFO - __main__ - Step 10493: {'lr': 0.0004947119453860547, 'samples': 503664, 'steps': 10492, 'loss/train': 1.7560158967971802} +07/25/2024 12:14:52 - INFO - __main__ - Step 10494: {'lr': 0.00049471086907924, 'samples': 503712, 'steps': 10493, 'loss/train': 2.4039065837860107} +07/25/2024 12:14:52 - INFO - __main__ - Step 10495: {'lr': 0.0004947097926640741, 'samples': 503760, 'steps': 10494, 'loss/train': 1.297176480293274} +07/25/2024 12:14:52 - INFO - __main__ - Step 10496: {'lr': 0.0004947087161405576, 'samples': 503808, 'steps': 10495, 'loss/train': 2.1150598526000977} +07/25/2024 12:14:53 - INFO - __main__ - Step 10497: {'lr': 0.000494707639508691, 'samples': 503856, 'steps': 10496, 'loss/train': 2.095158576965332} +07/25/2024 12:14:53 - INFO - __main__ - Step 10498: {'lr': 0.0004947065627684744, 'samples': 503904, 'steps': 10497, 'loss/train': 1.9927027225494385} +07/25/2024 12:14:53 - INFO - __main__ - Step 10499: {'lr': 0.0004947054859199089, 'samples': 503952, 'steps': 10498, 'loss/train': 0.15072283148765564} +07/25/2024 12:14:53 - INFO - __main__ - Step 10500: {'lr': 0.0004947044089629947, 'samples': 504000, 'steps': 10499, 'loss/train': 1.468454122543335} +07/25/2024 12:14:54 - INFO - __main__ - Step 10501: {'lr': 0.000494703331897732, 'samples': 504048, 'steps': 10500, 'loss/train': 2.5892791748046875} +07/25/2024 12:14:54 - INFO - __main__ - Step 10502: {'lr': 0.0004947022547241216, 'samples': 504096, 'steps': 10501, 'loss/train': 1.624063491821289} +07/25/2024 12:14:54 - INFO - __main__ - Step 10503: {'lr': 0.000494701177442164, 'samples': 504144, 'steps': 10502, 'loss/train': 2.1847243309020996} +07/25/2024 12:14:55 - INFO - __main__ - Step 10504: {'lr': 0.0004947001000518595, 'samples': 504192, 'steps': 10503, 'loss/train': 1.997180461883545} +07/25/2024 12:14:55 - INFO - __main__ - Step 10505: {'lr': 0.0004946990225532088, 'samples': 504240, 'steps': 10504, 'loss/train': 1.7489367723464966} +07/25/2024 12:14:55 - INFO - __main__ - Step 10506: {'lr': 0.000494697944946212, 'samples': 504288, 'steps': 10505, 'loss/train': 2.1334075927734375} +07/25/2024 12:14:55 - INFO - __main__ - Step 10507: {'lr': 0.0004946968672308699, 'samples': 504336, 'steps': 10506, 'loss/train': 2.0966126918792725} +07/25/2024 12:14:56 - INFO - __main__ - Step 10508: {'lr': 0.0004946957894071829, 'samples': 504384, 'steps': 10507, 'loss/train': 1.0031497478485107} +07/25/2024 12:14:56 - INFO - __main__ - Step 10509: {'lr': 0.0004946947114751515, 'samples': 504432, 'steps': 10508, 'loss/train': 2.582958936691284} +07/25/2024 12:14:56 - INFO - __main__ - Step 10510: {'lr': 0.000494693633434776, 'samples': 504480, 'steps': 10509, 'loss/train': 1.7970798015594482} +07/25/2024 12:14:57 - INFO - __main__ - Step 10511: {'lr': 0.0004946925552860572, 'samples': 504528, 'steps': 10510, 'loss/train': 2.2565035820007324} +07/25/2024 12:14:57 - INFO - __main__ - Step 10512: {'lr': 0.0004946914770289953, 'samples': 504576, 'steps': 10511, 'loss/train': 1.894034743309021} +07/25/2024 12:14:57 - INFO - __main__ - Step 10513: {'lr': 0.0004946903986635908, 'samples': 504624, 'steps': 10512, 'loss/train': 2.0527656078338623} +07/25/2024 12:14:57 - INFO - __main__ - Step 10514: {'lr': 0.0004946893201898443, 'samples': 504672, 'steps': 10513, 'loss/train': 1.8775854110717773} +07/25/2024 12:14:58 - INFO - __main__ - Step 10515: {'lr': 0.0004946882416077563, 'samples': 504720, 'steps': 10514, 'loss/train': 0.7271775603294373} +07/25/2024 12:14:58 - INFO - __main__ - Step 10516: {'lr': 0.0004946871629173271, 'samples': 504768, 'steps': 10515, 'loss/train': 1.7075878381729126} +07/25/2024 12:14:58 - INFO - __main__ - Step 10517: {'lr': 0.0004946860841185574, 'samples': 504816, 'steps': 10516, 'loss/train': 1.3426593542099} +07/25/2024 12:14:59 - INFO - __main__ - Step 10518: {'lr': 0.0004946850052114474, 'samples': 504864, 'steps': 10517, 'loss/train': 1.9226948022842407} +07/25/2024 12:14:59 - INFO - __main__ - Step 10519: {'lr': 0.0004946839261959979, 'samples': 504912, 'steps': 10518, 'loss/train': 1.9645695686340332} +07/25/2024 12:14:59 - INFO - __main__ - Step 10520: {'lr': 0.0004946828470722091, 'samples': 504960, 'steps': 10519, 'loss/train': 1.8968451023101807} +07/25/2024 12:14:59 - INFO - __main__ - Step 10521: {'lr': 0.0004946817678400816, 'samples': 505008, 'steps': 10520, 'loss/train': 1.8120410442352295} +07/25/2024 12:15:00 - INFO - __main__ - Step 10522: {'lr': 0.000494680688499616, 'samples': 505056, 'steps': 10521, 'loss/train': 1.6023942232131958} +07/25/2024 12:15:00 - INFO - __main__ - Step 10523: {'lr': 0.0004946796090508125, 'samples': 505104, 'steps': 10522, 'loss/train': 0.18346698582172394} +07/25/2024 12:15:00 - INFO - __main__ - Step 10524: {'lr': 0.0004946785294936718, 'samples': 505152, 'steps': 10523, 'loss/train': 1.8839421272277832} +07/25/2024 12:15:00 - INFO - __main__ - Step 10525: {'lr': 0.0004946774498281942, 'samples': 505200, 'steps': 10524, 'loss/train': 2.300593852996826} +07/25/2024 12:15:01 - INFO - __main__ - Step 10526: {'lr': 0.0004946763700543804, 'samples': 505248, 'steps': 10525, 'loss/train': 1.9442485570907593} +07/25/2024 12:15:01 - INFO - __main__ - Step 10527: {'lr': 0.0004946752901722306, 'samples': 505296, 'steps': 10526, 'loss/train': 2.104722023010254} +07/25/2024 12:15:01 - INFO - __main__ - Step 10528: {'lr': 0.0004946742101817457, 'samples': 505344, 'steps': 10527, 'loss/train': 1.8529661893844604} +07/25/2024 12:15:02 - INFO - __main__ - Step 10529: {'lr': 0.0004946731300829257, 'samples': 505392, 'steps': 10528, 'loss/train': 1.8343816995620728} +07/25/2024 12:15:02 - INFO - __main__ - Step 10530: {'lr': 0.0004946720498757713, 'samples': 505440, 'steps': 10529, 'loss/train': 2.404557704925537} +07/25/2024 12:15:02 - INFO - __main__ - Step 10531: {'lr': 0.0004946709695602831, 'samples': 505488, 'steps': 10530, 'loss/train': 2.730924129486084} +07/25/2024 12:15:02 - INFO - __main__ - Step 10532: {'lr': 0.0004946698891364614, 'samples': 505536, 'steps': 10531, 'loss/train': 1.3221946954727173} +07/25/2024 12:15:03 - INFO - __main__ - Step 10533: {'lr': 0.0004946688086043066, 'samples': 505584, 'steps': 10532, 'loss/train': 1.9715676307678223} +07/25/2024 12:15:03 - INFO - __main__ - Step 10534: {'lr': 0.0004946677279638194, 'samples': 505632, 'steps': 10533, 'loss/train': 1.5438404083251953} +07/25/2024 12:15:03 - INFO - __main__ - Step 10535: {'lr': 0.0004946666472150003, 'samples': 505680, 'steps': 10534, 'loss/train': 1.5825878381729126} +07/25/2024 12:15:04 - INFO - __main__ - Step 10536: {'lr': 0.0004946655663578495, 'samples': 505728, 'steps': 10535, 'loss/train': 2.220529556274414} +07/25/2024 12:15:04 - INFO - __main__ - Step 10537: {'lr': 0.0004946644853923677, 'samples': 505776, 'steps': 10536, 'loss/train': 1.7045402526855469} +07/25/2024 12:15:04 - INFO - __main__ - Step 10538: {'lr': 0.0004946634043185554, 'samples': 505824, 'steps': 10537, 'loss/train': 1.7589579820632935} +07/25/2024 12:15:04 - INFO - __main__ - Step 10539: {'lr': 0.0004946623231364129, 'samples': 505872, 'steps': 10538, 'loss/train': 0.23850573599338531} +07/25/2024 12:15:05 - INFO - __main__ - Step 10540: {'lr': 0.0004946612418459407, 'samples': 505920, 'steps': 10539, 'loss/train': 2.2228550910949707} +07/25/2024 12:15:05 - INFO - __main__ - Step 10541: {'lr': 0.0004946601604471396, 'samples': 505968, 'steps': 10540, 'loss/train': 1.856568455696106} +07/25/2024 12:15:05 - INFO - __main__ - Step 10542: {'lr': 0.0004946590789400096, 'samples': 506016, 'steps': 10541, 'loss/train': 1.344226360321045} +07/25/2024 12:15:06 - INFO - __main__ - Step 10543: {'lr': 0.0004946579973245515, 'samples': 506064, 'steps': 10542, 'loss/train': 1.811065912246704} +07/25/2024 12:15:06 - INFO - __main__ - Step 10544: {'lr': 0.0004946569156007658, 'samples': 506112, 'steps': 10543, 'loss/train': 0.6426680088043213} +07/25/2024 12:15:06 - INFO - __main__ - Step 10545: {'lr': 0.0004946558337686527, 'samples': 506160, 'steps': 10544, 'loss/train': 1.7056841850280762} +07/25/2024 12:15:06 - INFO - __main__ - Step 10546: {'lr': 0.0004946547518282129, 'samples': 506208, 'steps': 10545, 'loss/train': 1.4240463972091675} +07/25/2024 12:15:07 - INFO - __main__ - Step 10547: {'lr': 0.0004946536697794469, 'samples': 506256, 'steps': 10546, 'loss/train': 0.26661190390586853} +07/25/2024 12:15:07 - INFO - __main__ - Step 10548: {'lr': 0.0004946525876223551, 'samples': 506304, 'steps': 10547, 'loss/train': 2.497032403945923} +07/25/2024 12:15:07 - INFO - __main__ - Step 10549: {'lr': 0.000494651505356938, 'samples': 506352, 'steps': 10548, 'loss/train': 2.2584593296051025} +07/25/2024 12:15:08 - INFO - __main__ - Step 10550: {'lr': 0.000494650422983196, 'samples': 506400, 'steps': 10549, 'loss/train': 2.023926258087158} +07/25/2024 12:15:08 - INFO - __main__ - Step 10551: {'lr': 0.0004946493405011297, 'samples': 506448, 'steps': 10550, 'loss/train': 2.0644938945770264} +07/25/2024 12:15:08 - INFO - __main__ - Step 10552: {'lr': 0.0004946482579107395, 'samples': 506496, 'steps': 10551, 'loss/train': 1.9611912965774536} +07/25/2024 12:15:08 - INFO - __main__ - Step 10553: {'lr': 0.000494647175212026, 'samples': 506544, 'steps': 10552, 'loss/train': 1.9773197174072266} +07/25/2024 12:15:09 - INFO - __main__ - Step 10554: {'lr': 0.0004946460924049894, 'samples': 506592, 'steps': 10553, 'loss/train': 2.2196991443634033} +07/25/2024 12:15:09 - INFO - __main__ - Step 10555: {'lr': 0.0004946450094896305, 'samples': 506640, 'steps': 10554, 'loss/train': 2.6183767318725586} +07/25/2024 12:15:09 - INFO - __main__ - Step 10556: {'lr': 0.0004946439264659495, 'samples': 506688, 'steps': 10555, 'loss/train': 2.092989444732666} +07/25/2024 12:15:10 - INFO - __main__ - Step 10557: {'lr': 0.0004946428433339472, 'samples': 506736, 'steps': 10556, 'loss/train': 1.8824267387390137} +07/25/2024 12:15:10 - INFO - __main__ - Step 10558: {'lr': 0.0004946417600936239, 'samples': 506784, 'steps': 10557, 'loss/train': 2.4515576362609863} +07/25/2024 12:15:10 - INFO - __main__ - Step 10559: {'lr': 0.00049464067674498, 'samples': 506832, 'steps': 10558, 'loss/train': 1.727707028388977} +07/25/2024 12:15:10 - INFO - __main__ - Step 10560: {'lr': 0.0004946395932880161, 'samples': 506880, 'steps': 10559, 'loss/train': 2.125671863555908} +07/25/2024 12:15:11 - INFO - __main__ - Step 10561: {'lr': 0.0004946385097227326, 'samples': 506928, 'steps': 10560, 'loss/train': 2.4277877807617188} +07/25/2024 12:15:11 - INFO - __main__ - Step 10562: {'lr': 0.00049463742604913, 'samples': 506976, 'steps': 10561, 'loss/train': 1.6551803350448608} +07/25/2024 12:15:11 - INFO - __main__ - Step 10563: {'lr': 0.0004946363422672088, 'samples': 507024, 'steps': 10562, 'loss/train': 0.14209716022014618} +07/25/2024 12:15:12 - INFO - __main__ - Step 10564: {'lr': 0.0004946352583769697, 'samples': 507072, 'steps': 10563, 'loss/train': 2.0794880390167236} +07/25/2024 12:15:12 - INFO - __main__ - Step 10565: {'lr': 0.0004946341743784127, 'samples': 507120, 'steps': 10564, 'loss/train': 1.5050089359283447} +07/25/2024 12:15:12 - INFO - __main__ - Step 10566: {'lr': 0.0004946330902715387, 'samples': 507168, 'steps': 10565, 'loss/train': 2.2154645919799805} +07/25/2024 12:15:12 - INFO - __main__ - Step 10567: {'lr': 0.0004946320060563479, 'samples': 507216, 'steps': 10566, 'loss/train': 1.473727822303772} +07/25/2024 12:15:13 - INFO - __main__ - Step 10568: {'lr': 0.000494630921732841, 'samples': 507264, 'steps': 10567, 'loss/train': 1.4114253520965576} +07/25/2024 12:15:13 - INFO - __main__ - Step 10569: {'lr': 0.0004946298373010182, 'samples': 507312, 'steps': 10568, 'loss/train': 1.7166166305541992} +07/25/2024 12:15:13 - INFO - __main__ - Step 10570: {'lr': 0.0004946287527608804, 'samples': 507360, 'steps': 10569, 'loss/train': 1.7450259923934937} +07/25/2024 12:15:14 - INFO - __main__ - Step 10571: {'lr': 0.0004946276681124278, 'samples': 507408, 'steps': 10570, 'loss/train': 0.2262483388185501} +07/25/2024 12:15:14 - INFO - __main__ - Step 10572: {'lr': 0.0004946265833556608, 'samples': 507456, 'steps': 10571, 'loss/train': 2.1011910438537598} +07/25/2024 12:15:14 - INFO - __main__ - Step 10573: {'lr': 0.0004946254984905801, 'samples': 507504, 'steps': 10572, 'loss/train': 1.4052931070327759} +07/25/2024 12:15:14 - INFO - __main__ - Step 10574: {'lr': 0.000494624413517186, 'samples': 507552, 'steps': 10573, 'loss/train': 1.7925689220428467} +07/25/2024 12:15:15 - INFO - __main__ - Step 10575: {'lr': 0.0004946233284354791, 'samples': 507600, 'steps': 10574, 'loss/train': 1.9394044876098633} +07/25/2024 12:15:15 - INFO - __main__ - Step 10576: {'lr': 0.0004946222432454599, 'samples': 507648, 'steps': 10575, 'loss/train': 1.7424153089523315} +07/25/2024 12:15:15 - INFO - __main__ - Step 10577: {'lr': 0.0004946211579471289, 'samples': 507696, 'steps': 10576, 'loss/train': 1.992042064666748} +07/25/2024 12:15:16 - INFO - __main__ - Step 10578: {'lr': 0.0004946200725404864, 'samples': 507744, 'steps': 10577, 'loss/train': 1.839147686958313} +07/25/2024 12:15:16 - INFO - __main__ - Step 10579: {'lr': 0.000494618987025533, 'samples': 507792, 'steps': 10578, 'loss/train': 1.9962804317474365} +07/25/2024 12:15:16 - INFO - __main__ - Step 10580: {'lr': 0.0004946179014022691, 'samples': 507840, 'steps': 10579, 'loss/train': 2.122080087661743} +07/25/2024 12:15:16 - INFO - __main__ - Step 10581: {'lr': 0.0004946168156706953, 'samples': 507888, 'steps': 10580, 'loss/train': 1.9832839965820312} +07/25/2024 12:15:17 - INFO - __main__ - Step 10582: {'lr': 0.000494615729830812, 'samples': 507936, 'steps': 10581, 'loss/train': 1.7619549036026} +07/25/2024 12:15:17 - INFO - __main__ - Step 10583: {'lr': 0.0004946146438826197, 'samples': 507984, 'steps': 10582, 'loss/train': 2.3720242977142334} +07/25/2024 12:15:17 - INFO - __main__ - Step 10584: {'lr': 0.0004946135578261189, 'samples': 508032, 'steps': 10583, 'loss/train': 1.5267560482025146} +07/25/2024 12:15:18 - INFO - __main__ - Step 10585: {'lr': 0.0004946124716613102, 'samples': 508080, 'steps': 10584, 'loss/train': 1.4261363744735718} +07/25/2024 12:15:18 - INFO - __main__ - Step 10586: {'lr': 0.0004946113853881938, 'samples': 508128, 'steps': 10585, 'loss/train': 2.162607192993164} +07/25/2024 12:15:18 - INFO - __main__ - Step 10587: {'lr': 0.0004946102990067703, 'samples': 508176, 'steps': 10586, 'loss/train': 0.1386747509241104} +07/25/2024 12:15:18 - INFO - __main__ - Step 10588: {'lr': 0.0004946092125170403, 'samples': 508224, 'steps': 10587, 'loss/train': 1.5664124488830566} +07/25/2024 12:15:19 - INFO - __main__ - Step 10589: {'lr': 0.0004946081259190043, 'samples': 508272, 'steps': 10588, 'loss/train': 1.9556245803833008} +07/25/2024 12:15:19 - INFO - __main__ - Step 10590: {'lr': 0.0004946070392126625, 'samples': 508320, 'steps': 10589, 'loss/train': 3.0217156410217285} +07/25/2024 12:15:19 - INFO - __main__ - Step 10591: {'lr': 0.0004946059523980157, 'samples': 508368, 'steps': 10590, 'loss/train': 1.5143752098083496} +07/25/2024 12:15:20 - INFO - __main__ - Step 10592: {'lr': 0.0004946048654750641, 'samples': 508416, 'steps': 10591, 'loss/train': 1.5332467555999756} +07/25/2024 12:15:20 - INFO - __main__ - Step 10593: {'lr': 0.0004946037784438084, 'samples': 508464, 'steps': 10592, 'loss/train': 1.846714735031128} +07/25/2024 12:15:20 - INFO - __main__ - Step 10594: {'lr': 0.000494602691304249, 'samples': 508512, 'steps': 10593, 'loss/train': 1.555979609489441} +07/25/2024 12:15:20 - INFO - __main__ - Step 10595: {'lr': 0.0004946016040563863, 'samples': 508560, 'steps': 10594, 'loss/train': 0.6406246423721313} +07/25/2024 12:15:21 - INFO - __main__ - Step 10596: {'lr': 0.0004946005167002209, 'samples': 508608, 'steps': 10595, 'loss/train': 2.1295995712280273} +07/25/2024 12:15:21 - INFO - __main__ - Step 10597: {'lr': 0.0004945994292357532, 'samples': 508656, 'steps': 10596, 'loss/train': 2.0994436740875244} +07/25/2024 12:15:21 - INFO - __main__ - Step 10598: {'lr': 0.0004945983416629839, 'samples': 508704, 'steps': 10597, 'loss/train': 1.7333931922912598} +07/25/2024 12:15:22 - INFO - __main__ - Step 10599: {'lr': 0.0004945972539819132, 'samples': 508752, 'steps': 10598, 'loss/train': 1.4528125524520874} +07/25/2024 12:15:22 - INFO - __main__ - Step 10600: {'lr': 0.0004945961661925416, 'samples': 508800, 'steps': 10599, 'loss/train': 1.862259864807129} +07/25/2024 12:15:22 - INFO - __main__ - Step 10601: {'lr': 0.0004945950782948698, 'samples': 508848, 'steps': 10600, 'loss/train': 1.192888855934143} +07/25/2024 12:15:22 - INFO - __main__ - Step 10602: {'lr': 0.0004945939902888982, 'samples': 508896, 'steps': 10601, 'loss/train': 2.23386287689209} +07/25/2024 12:15:23 - INFO - __main__ - Step 10603: {'lr': 0.0004945929021746272, 'samples': 508944, 'steps': 10602, 'loss/train': 2.0402982234954834} +07/25/2024 12:15:23 - INFO - __main__ - Step 10604: {'lr': 0.0004945918139520572, 'samples': 508992, 'steps': 10603, 'loss/train': 1.8409024477005005} +07/25/2024 12:15:23 - INFO - __main__ - Step 10605: {'lr': 0.0004945907256211888, 'samples': 509040, 'steps': 10604, 'loss/train': 1.7523174285888672} +07/25/2024 12:15:23 - INFO - __main__ - Step 10606: {'lr': 0.0004945896371820226, 'samples': 509088, 'steps': 10605, 'loss/train': 2.0155749320983887} +07/25/2024 12:15:24 - INFO - __main__ - Step 10607: {'lr': 0.0004945885486345589, 'samples': 509136, 'steps': 10606, 'loss/train': 2.0115842819213867} +07/25/2024 12:15:24 - INFO - __main__ - Step 10608: {'lr': 0.0004945874599787983, 'samples': 509184, 'steps': 10607, 'loss/train': 1.9491180181503296} +07/25/2024 12:15:24 - INFO - __main__ - Step 10609: {'lr': 0.0004945863712147411, 'samples': 509232, 'steps': 10608, 'loss/train': 1.6021103858947754} +07/25/2024 12:15:25 - INFO - __main__ - Step 10610: {'lr': 0.0004945852823423879, 'samples': 509280, 'steps': 10609, 'loss/train': 1.7334041595458984} +07/25/2024 12:15:25 - INFO - __main__ - Step 10611: {'lr': 0.0004945841933617393, 'samples': 509328, 'steps': 10610, 'loss/train': 0.16214615106582642} +07/25/2024 12:15:25 - INFO - __main__ - Step 10612: {'lr': 0.0004945831042727957, 'samples': 509376, 'steps': 10611, 'loss/train': 1.5219683647155762} +07/25/2024 12:15:25 - INFO - __main__ - Step 10613: {'lr': 0.0004945820150755576, 'samples': 509424, 'steps': 10612, 'loss/train': 2.1150901317596436} +07/25/2024 12:15:26 - INFO - __main__ - Step 10614: {'lr': 0.0004945809257700253, 'samples': 509472, 'steps': 10613, 'loss/train': 2.959043264389038} +07/25/2024 12:15:26 - INFO - __main__ - Step 10615: {'lr': 0.0004945798363561994, 'samples': 509520, 'steps': 10614, 'loss/train': 1.888620376586914} +07/25/2024 12:15:26 - INFO - __main__ - Step 10616: {'lr': 0.0004945787468340805, 'samples': 509568, 'steps': 10615, 'loss/train': 1.6741372346878052} +07/25/2024 12:15:27 - INFO - __main__ - Step 10617: {'lr': 0.0004945776572036689, 'samples': 509616, 'steps': 10616, 'loss/train': 1.5218151807785034} +07/25/2024 12:15:27 - INFO - __main__ - Step 10618: {'lr': 0.0004945765674649653, 'samples': 509664, 'steps': 10617, 'loss/train': 1.3085373640060425} +07/25/2024 12:15:27 - INFO - __main__ - Step 10619: {'lr': 0.0004945754776179699, 'samples': 509712, 'steps': 10618, 'loss/train': 1.9405170679092407} +07/25/2024 12:15:27 - INFO - __main__ - Step 10620: {'lr': 0.0004945743876626833, 'samples': 509760, 'steps': 10619, 'loss/train': 1.7951518297195435} +07/25/2024 12:15:28 - INFO - __main__ - Step 10621: {'lr': 0.0004945732975991062, 'samples': 509808, 'steps': 10620, 'loss/train': 1.9615228176116943} +07/25/2024 12:15:28 - INFO - __main__ - Step 10622: {'lr': 0.0004945722074272388, 'samples': 509856, 'steps': 10621, 'loss/train': 1.8652710914611816} +07/25/2024 12:15:28 - INFO - __main__ - Step 10623: {'lr': 0.0004945711171470817, 'samples': 509904, 'steps': 10622, 'loss/train': 2.099738359451294} +07/25/2024 12:15:29 - INFO - __main__ - Step 10624: {'lr': 0.0004945700267586353, 'samples': 509952, 'steps': 10623, 'loss/train': 1.8272755146026611} +07/25/2024 12:15:29 - INFO - __main__ - Step 10625: {'lr': 0.0004945689362619002, 'samples': 510000, 'steps': 10624, 'loss/train': 0.9206641316413879} +07/25/2024 12:15:29 - INFO - __main__ - Step 10626: {'lr': 0.0004945678456568769, 'samples': 510048, 'steps': 10625, 'loss/train': 2.424973487854004} +07/25/2024 12:15:29 - INFO - __main__ - Step 10627: {'lr': 0.0004945667549435657, 'samples': 510096, 'steps': 10626, 'loss/train': 1.3475418090820312} +07/25/2024 12:15:30 - INFO - __main__ - Step 10628: {'lr': 0.0004945656641219671, 'samples': 510144, 'steps': 10627, 'loss/train': 1.837019681930542} +07/25/2024 12:15:30 - INFO - __main__ - Step 10629: {'lr': 0.0004945645731920818, 'samples': 510192, 'steps': 10628, 'loss/train': 1.7349069118499756} +07/25/2024 12:15:30 - INFO - __main__ - Step 10630: {'lr': 0.0004945634821539103, 'samples': 510240, 'steps': 10629, 'loss/train': 2.1849379539489746} +07/25/2024 12:15:31 - INFO - __main__ - Step 10631: {'lr': 0.0004945623910074527, 'samples': 510288, 'steps': 10630, 'loss/train': 2.330871820449829} +07/25/2024 12:15:31 - INFO - __main__ - Step 10632: {'lr': 0.0004945612997527098, 'samples': 510336, 'steps': 10631, 'loss/train': 1.5774385929107666} +07/25/2024 12:15:31 - INFO - __main__ - Step 10633: {'lr': 0.000494560208389682, 'samples': 510384, 'steps': 10632, 'loss/train': 1.4282641410827637} +07/25/2024 12:15:31 - INFO - __main__ - Step 10634: {'lr': 0.0004945591169183698, 'samples': 510432, 'steps': 10633, 'loss/train': 2.1512489318847656} +07/25/2024 12:15:32 - INFO - __main__ - Step 10635: {'lr': 0.0004945580253387738, 'samples': 510480, 'steps': 10634, 'loss/train': 0.17744410037994385} +07/25/2024 12:15:32 - INFO - __main__ - Step 10636: {'lr': 0.0004945569336508943, 'samples': 510528, 'steps': 10635, 'loss/train': 1.6673921346664429} +07/25/2024 12:15:32 - INFO - __main__ - Step 10637: {'lr': 0.0004945558418547317, 'samples': 510576, 'steps': 10636, 'loss/train': 1.6741552352905273} +07/25/2024 12:15:33 - INFO - __main__ - Step 10638: {'lr': 0.0004945547499502868, 'samples': 510624, 'steps': 10637, 'loss/train': 2.3827428817749023} +07/25/2024 12:15:33 - INFO - __main__ - Step 10639: {'lr': 0.0004945536579375598, 'samples': 510672, 'steps': 10638, 'loss/train': 2.1484062671661377} +07/25/2024 12:15:33 - INFO - __main__ - Step 10640: {'lr': 0.0004945525658165514, 'samples': 510720, 'steps': 10639, 'loss/train': 1.6944396495819092} +07/25/2024 12:15:33 - INFO - __main__ - Step 10641: {'lr': 0.0004945514735872618, 'samples': 510768, 'steps': 10640, 'loss/train': 1.9560447931289673} +07/25/2024 12:15:34 - INFO - __main__ - Step 10642: {'lr': 0.0004945503812496917, 'samples': 510816, 'steps': 10641, 'loss/train': 1.675743818283081} +07/25/2024 12:15:34 - INFO - __main__ - Step 10643: {'lr': 0.0004945492888038417, 'samples': 510864, 'steps': 10642, 'loss/train': 1.4314029216766357} +07/25/2024 12:15:34 - INFO - __main__ - Step 10644: {'lr': 0.000494548196249712, 'samples': 510912, 'steps': 10643, 'loss/train': 1.7984297275543213} +07/25/2024 12:15:35 - INFO - __main__ - Step 10645: {'lr': 0.0004945471035873033, 'samples': 510960, 'steps': 10644, 'loss/train': 1.9201105833053589} +07/25/2024 12:15:35 - INFO - __main__ - Step 10646: {'lr': 0.0004945460108166159, 'samples': 511008, 'steps': 10645, 'loss/train': 1.68024742603302} +07/25/2024 12:15:35 - INFO - __main__ - Step 10647: {'lr': 0.0004945449179376505, 'samples': 511056, 'steps': 10646, 'loss/train': 1.6918963193893433} +07/25/2024 12:15:35 - INFO - __main__ - Step 10648: {'lr': 0.0004945438249504073, 'samples': 511104, 'steps': 10647, 'loss/train': 1.7285478115081787} +07/25/2024 12:15:36 - INFO - __main__ - Step 10649: {'lr': 0.0004945427318548871, 'samples': 511152, 'steps': 10648, 'loss/train': 1.0136961936950684} +07/25/2024 12:15:36 - INFO - __main__ - Step 10650: {'lr': 0.0004945416386510902, 'samples': 511200, 'steps': 10649, 'loss/train': 3.632708787918091} +07/25/2024 12:15:36 - INFO - __main__ - Step 10651: {'lr': 0.0004945405453390169, 'samples': 511248, 'steps': 10650, 'loss/train': 1.7515783309936523} +07/25/2024 12:15:37 - INFO - __main__ - Step 10652: {'lr': 0.0004945394519186681, 'samples': 511296, 'steps': 10651, 'loss/train': 1.7981683015823364} +07/25/2024 12:15:37 - INFO - __main__ - Step 10653: {'lr': 0.0004945383583900441, 'samples': 511344, 'steps': 10652, 'loss/train': 1.1089448928833008} +07/25/2024 12:15:37 - INFO - __main__ - Step 10654: {'lr': 0.0004945372647531453, 'samples': 511392, 'steps': 10653, 'loss/train': 2.143301486968994} +07/25/2024 12:15:37 - INFO - __main__ - Step 10655: {'lr': 0.0004945361710079723, 'samples': 511440, 'steps': 10654, 'loss/train': 1.8500851392745972} +07/25/2024 12:15:38 - INFO - __main__ - Step 10656: {'lr': 0.0004945350771545255, 'samples': 511488, 'steps': 10655, 'loss/train': 2.2668964862823486} +07/25/2024 12:15:38 - INFO - __main__ - Step 10657: {'lr': 0.0004945339831928054, 'samples': 511536, 'steps': 10656, 'loss/train': 0.9987391829490662} +07/25/2024 12:15:38 - INFO - __main__ - Step 10658: {'lr': 0.0004945328891228126, 'samples': 511584, 'steps': 10657, 'loss/train': 3.359147787094116} +07/25/2024 12:15:39 - INFO - __main__ - Step 10659: {'lr': 0.0004945317949445473, 'samples': 511632, 'steps': 10658, 'loss/train': 0.15774819254875183} +07/25/2024 12:15:39 - INFO - __main__ - Step 10660: {'lr': 0.0004945307006580103, 'samples': 511680, 'steps': 10659, 'loss/train': 2.0329675674438477} +07/25/2024 12:15:39 - INFO - __main__ - Step 10661: {'lr': 0.000494529606263202, 'samples': 511728, 'steps': 10660, 'loss/train': 1.5801246166229248} +07/25/2024 12:15:39 - INFO - __main__ - Step 10662: {'lr': 0.0004945285117601228, 'samples': 511776, 'steps': 10661, 'loss/train': 2.5955872535705566} +07/25/2024 12:15:40 - INFO - __main__ - Step 10663: {'lr': 0.0004945274171487732, 'samples': 511824, 'steps': 10662, 'loss/train': 1.5287586450576782} +07/25/2024 12:15:40 - INFO - __main__ - Step 10664: {'lr': 0.0004945263224291537, 'samples': 511872, 'steps': 10663, 'loss/train': 1.8941675424575806} +07/25/2024 12:15:40 - INFO - __main__ - Step 10665: {'lr': 0.0004945252276012649, 'samples': 511920, 'steps': 10664, 'loss/train': 2.0203652381896973} +07/25/2024 12:15:41 - INFO - __main__ - Step 10666: {'lr': 0.0004945241326651071, 'samples': 511968, 'steps': 10665, 'loss/train': 1.7996240854263306} +07/25/2024 12:15:41 - INFO - __main__ - Step 10667: {'lr': 0.0004945230376206808, 'samples': 512016, 'steps': 10666, 'loss/train': 2.0600273609161377} +07/25/2024 12:15:41 - INFO - __main__ - Step 10668: {'lr': 0.0004945219424679866, 'samples': 512064, 'steps': 10667, 'loss/train': 1.818935751914978} +07/25/2024 12:15:41 - INFO - __main__ - Step 10669: {'lr': 0.000494520847207025, 'samples': 512112, 'steps': 10668, 'loss/train': 0.8802597522735596} +07/25/2024 12:15:42 - INFO - __main__ - Step 10670: {'lr': 0.0004945197518377964, 'samples': 512160, 'steps': 10669, 'loss/train': 1.6116598844528198} +07/25/2024 12:15:42 - INFO - __main__ - Step 10671: {'lr': 0.0004945186563603013, 'samples': 512208, 'steps': 10670, 'loss/train': 1.9703364372253418} +07/25/2024 12:15:42 - INFO - __main__ - Step 10672: {'lr': 0.0004945175607745402, 'samples': 512256, 'steps': 10671, 'loss/train': 0.7639942169189453} +07/25/2024 12:15:43 - INFO - __main__ - Step 10673: {'lr': 0.0004945164650805136, 'samples': 512304, 'steps': 10672, 'loss/train': 1.2740113735198975} +07/25/2024 12:15:43 - INFO - __main__ - Step 10674: {'lr': 0.0004945153692782218, 'samples': 512352, 'steps': 10673, 'loss/train': 2.5994365215301514} +07/25/2024 12:15:43 - INFO - __main__ - Step 10675: {'lr': 0.0004945142733676656, 'samples': 512400, 'steps': 10674, 'loss/train': 1.8296691179275513} +07/25/2024 12:15:43 - INFO - __main__ - Step 10676: {'lr': 0.0004945131773488453, 'samples': 512448, 'steps': 10675, 'loss/train': 2.0240719318389893} +07/25/2024 12:15:44 - INFO - __main__ - Step 10677: {'lr': 0.0004945120812217613, 'samples': 512496, 'steps': 10676, 'loss/train': 0.37178298830986023} +07/25/2024 12:15:44 - INFO - __main__ - Step 10678: {'lr': 0.0004945109849864144, 'samples': 512544, 'steps': 10677, 'loss/train': 1.8430842161178589} +07/25/2024 12:15:44 - INFO - __main__ - Step 10679: {'lr': 0.0004945098886428047, 'samples': 512592, 'steps': 10678, 'loss/train': 1.579896330833435} +07/25/2024 12:15:45 - INFO - __main__ - Step 10680: {'lr': 0.000494508792190933, 'samples': 512640, 'steps': 10679, 'loss/train': 1.6885544061660767} +07/25/2024 12:15:45 - INFO - __main__ - Step 10681: {'lr': 0.0004945076956307996, 'samples': 512688, 'steps': 10680, 'loss/train': 2.096580982208252} +07/25/2024 12:15:45 - INFO - __main__ - Step 10682: {'lr': 0.0004945065989624051, 'samples': 512736, 'steps': 10681, 'loss/train': 2.24806809425354} +07/25/2024 12:15:45 - INFO - __main__ - Step 10683: {'lr': 0.0004945055021857498, 'samples': 512784, 'steps': 10682, 'loss/train': 0.12042363733053207} +07/25/2024 12:15:46 - INFO - __main__ - Step 10684: {'lr': 0.0004945044053008344, 'samples': 512832, 'steps': 10683, 'loss/train': 1.979325294494629} +07/25/2024 12:15:46 - INFO - __main__ - Step 10685: {'lr': 0.0004945033083076593, 'samples': 512880, 'steps': 10684, 'loss/train': 1.5638768672943115} +07/25/2024 12:15:46 - INFO - __main__ - Step 10686: {'lr': 0.0004945022112062251, 'samples': 512928, 'steps': 10685, 'loss/train': 1.2484248876571655} +07/25/2024 12:15:46 - INFO - __main__ - Step 10687: {'lr': 0.000494501113996532, 'samples': 512976, 'steps': 10686, 'loss/train': 1.149600625038147} +07/25/2024 12:15:47 - INFO - __main__ - Step 10688: {'lr': 0.0004945000166785808, 'samples': 513024, 'steps': 10687, 'loss/train': 2.0457570552825928} +07/25/2024 12:15:47 - INFO - __main__ - Step 10689: {'lr': 0.0004944989192523717, 'samples': 513072, 'steps': 10688, 'loss/train': 2.022160291671753} +07/25/2024 12:15:47 - INFO - __main__ - Step 10690: {'lr': 0.0004944978217179054, 'samples': 513120, 'steps': 10689, 'loss/train': 2.0372469425201416} +07/25/2024 12:15:48 - INFO - __main__ - Step 10691: {'lr': 0.0004944967240751823, 'samples': 513168, 'steps': 10690, 'loss/train': 1.9252002239227295} +07/25/2024 12:15:48 - INFO - __main__ - Step 10692: {'lr': 0.000494495626324203, 'samples': 513216, 'steps': 10691, 'loss/train': 1.422151803970337} +07/25/2024 12:15:48 - INFO - __main__ - Step 10693: {'lr': 0.0004944945284649677, 'samples': 513264, 'steps': 10692, 'loss/train': 2.090731620788574} +07/25/2024 12:15:48 - INFO - __main__ - Step 10694: {'lr': 0.0004944934304974772, 'samples': 513312, 'steps': 10693, 'loss/train': 1.979947566986084} +07/25/2024 12:15:49 - INFO - __main__ - Step 10695: {'lr': 0.0004944923324217319, 'samples': 513360, 'steps': 10694, 'loss/train': 1.8361105918884277} +07/25/2024 12:15:49 - INFO - __main__ - Step 10696: {'lr': 0.0004944912342377321, 'samples': 513408, 'steps': 10695, 'loss/train': 1.7762596607208252} +07/25/2024 12:15:49 - INFO - __main__ - Step 10697: {'lr': 0.0004944901359454785, 'samples': 513456, 'steps': 10696, 'loss/train': 1.3090654611587524} +07/25/2024 12:15:50 - INFO - __main__ - Step 10698: {'lr': 0.0004944890375449716, 'samples': 513504, 'steps': 10697, 'loss/train': 2.196974277496338} +07/25/2024 12:15:50 - INFO - __main__ - Step 10699: {'lr': 0.0004944879390362117, 'samples': 513552, 'steps': 10698, 'loss/train': 2.2012178897857666} +07/25/2024 12:15:50 - INFO - __main__ - Step 10700: {'lr': 0.0004944868404191994, 'samples': 513600, 'steps': 10699, 'loss/train': 1.4873650074005127} +07/25/2024 12:15:50 - INFO - __main__ - Step 10701: {'lr': 0.0004944857416939352, 'samples': 513648, 'steps': 10700, 'loss/train': 0.2129158079624176} +07/25/2024 12:15:51 - INFO - __main__ - Step 10702: {'lr': 0.0004944846428604195, 'samples': 513696, 'steps': 10701, 'loss/train': 2.3291518688201904} +07/25/2024 12:15:51 - INFO - __main__ - Step 10703: {'lr': 0.0004944835439186529, 'samples': 513744, 'steps': 10702, 'loss/train': 2.102860689163208} +07/25/2024 12:15:51 - INFO - __main__ - Step 10704: {'lr': 0.0004944824448686359, 'samples': 513792, 'steps': 10703, 'loss/train': 2.296177387237549} +07/25/2024 12:15:52 - INFO - __main__ - Step 10705: {'lr': 0.0004944813457103689, 'samples': 513840, 'steps': 10704, 'loss/train': 2.0440268516540527} +07/25/2024 12:15:52 - INFO - __main__ - Step 10706: {'lr': 0.0004944802464438524, 'samples': 513888, 'steps': 10705, 'loss/train': 2.0417144298553467} +07/25/2024 12:15:52 - INFO - __main__ - Step 10707: {'lr': 0.0004944791470690869, 'samples': 513936, 'steps': 10706, 'loss/train': 0.17704664170742035} +07/25/2024 12:15:52 - INFO - __main__ - Step 10708: {'lr': 0.0004944780475860728, 'samples': 513984, 'steps': 10707, 'loss/train': 1.832165002822876} +07/25/2024 12:15:53 - INFO - __main__ - Step 10709: {'lr': 0.0004944769479948108, 'samples': 514032, 'steps': 10708, 'loss/train': 2.2640841007232666} +07/25/2024 12:15:53 - INFO - __main__ - Step 10710: {'lr': 0.0004944758482953012, 'samples': 514080, 'steps': 10709, 'loss/train': 2.3330814838409424} +07/25/2024 12:15:53 - INFO - __main__ - Step 10711: {'lr': 0.0004944747484875445, 'samples': 514128, 'steps': 10710, 'loss/train': 1.2774773836135864} +07/25/2024 12:15:54 - INFO - __main__ - Step 10712: {'lr': 0.0004944736485715414, 'samples': 514176, 'steps': 10711, 'loss/train': 2.004241704940796} +07/25/2024 12:15:54 - INFO - __main__ - Step 10713: {'lr': 0.0004944725485472919, 'samples': 514224, 'steps': 10712, 'loss/train': 2.0737619400024414} +07/25/2024 12:15:54 - INFO - __main__ - Step 10714: {'lr': 0.0004944714484147971, 'samples': 514272, 'steps': 10713, 'loss/train': 2.893357753753662} +07/25/2024 12:15:54 - INFO - __main__ - Step 10715: {'lr': 0.0004944703481740571, 'samples': 514320, 'steps': 10714, 'loss/train': 1.8481080532073975} +07/25/2024 12:15:55 - INFO - __main__ - Step 10716: {'lr': 0.0004944692478250724, 'samples': 514368, 'steps': 10715, 'loss/train': 2.0370168685913086} +07/25/2024 12:15:55 - INFO - __main__ - Step 10717: {'lr': 0.0004944681473678436, 'samples': 514416, 'steps': 10716, 'loss/train': 1.3032667636871338} +07/25/2024 12:15:55 - INFO - __main__ - Step 10718: {'lr': 0.0004944670468023713, 'samples': 514464, 'steps': 10717, 'loss/train': 2.096033811569214} +07/25/2024 12:15:56 - INFO - __main__ - Step 10719: {'lr': 0.0004944659461286557, 'samples': 514512, 'steps': 10718, 'loss/train': 1.673536777496338} +07/25/2024 12:15:56 - INFO - __main__ - Step 10720: {'lr': 0.0004944648453466975, 'samples': 514560, 'steps': 10719, 'loss/train': 1.6234604120254517} +07/25/2024 12:15:56 - INFO - __main__ - Step 10721: {'lr': 0.000494463744456497, 'samples': 514608, 'steps': 10720, 'loss/train': 1.159556269645691} +07/25/2024 12:15:56 - INFO - __main__ - Step 10722: {'lr': 0.0004944626434580549, 'samples': 514656, 'steps': 10721, 'loss/train': 2.140594482421875} +07/25/2024 12:15:57 - INFO - __main__ - Step 10723: {'lr': 0.0004944615423513716, 'samples': 514704, 'steps': 10722, 'loss/train': 1.985074520111084} +07/25/2024 12:15:57 - INFO - __main__ - Step 10724: {'lr': 0.0004944604411364475, 'samples': 514752, 'steps': 10723, 'loss/train': 2.118452548980713} +07/25/2024 12:15:57 - INFO - __main__ - Step 10725: {'lr': 0.0004944593398132832, 'samples': 514800, 'steps': 10724, 'loss/train': 0.15915346145629883} +07/25/2024 12:15:58 - INFO - __main__ - Step 10726: {'lr': 0.0004944582383818792, 'samples': 514848, 'steps': 10725, 'loss/train': 1.9744287729263306} +07/25/2024 12:15:58 - INFO - __main__ - Step 10727: {'lr': 0.0004944571368422359, 'samples': 514896, 'steps': 10726, 'loss/train': 2.242337465286255} +07/25/2024 12:15:58 - INFO - __main__ - Step 10728: {'lr': 0.0004944560351943539, 'samples': 514944, 'steps': 10727, 'loss/train': 1.3872114419937134} +07/25/2024 12:15:58 - INFO - __main__ - Step 10729: {'lr': 0.0004944549334382336, 'samples': 514992, 'steps': 10728, 'loss/train': 1.8341361284255981} +07/25/2024 12:15:59 - INFO - __main__ - Step 10730: {'lr': 0.0004944538315738754, 'samples': 515040, 'steps': 10729, 'loss/train': 1.6008737087249756} +07/25/2024 12:15:59 - INFO - __main__ - Step 10731: {'lr': 0.0004944527296012799, 'samples': 515088, 'steps': 10730, 'loss/train': 0.1696408987045288} +07/25/2024 12:15:59 - INFO - __main__ - Step 10732: {'lr': 0.0004944516275204477, 'samples': 515136, 'steps': 10731, 'loss/train': 1.8127268552780151} +07/25/2024 12:16:00 - INFO - __main__ - Step 10733: {'lr': 0.0004944505253313791, 'samples': 515184, 'steps': 10732, 'loss/train': 1.3351601362228394} +07/25/2024 12:16:00 - INFO - __main__ - Step 10734: {'lr': 0.0004944494230340747, 'samples': 515232, 'steps': 10733, 'loss/train': 2.0384161472320557} +07/25/2024 12:16:00 - INFO - __main__ - Step 10735: {'lr': 0.0004944483206285349, 'samples': 515280, 'steps': 10734, 'loss/train': 1.9399566650390625} +07/25/2024 12:16:00 - INFO - __main__ - Step 10736: {'lr': 0.0004944472181147602, 'samples': 515328, 'steps': 10735, 'loss/train': 1.6803802251815796} +07/25/2024 12:16:01 - INFO - __main__ - Step 10737: {'lr': 0.0004944461154927512, 'samples': 515376, 'steps': 10736, 'loss/train': 1.960642695426941} +07/25/2024 12:16:01 - INFO - __main__ - Step 10738: {'lr': 0.0004944450127625082, 'samples': 515424, 'steps': 10737, 'loss/train': 2.2502059936523438} +07/25/2024 12:16:01 - INFO - __main__ - Step 10739: {'lr': 0.0004944439099240319, 'samples': 515472, 'steps': 10738, 'loss/train': 2.1370999813079834} +07/25/2024 12:16:02 - INFO - __main__ - Step 10740: {'lr': 0.0004944428069773226, 'samples': 515520, 'steps': 10739, 'loss/train': 1.8609843254089355} +07/25/2024 12:16:02 - INFO - __main__ - Step 10741: {'lr': 0.000494441703922381, 'samples': 515568, 'steps': 10740, 'loss/train': 2.1154229640960693} +07/25/2024 12:16:02 - INFO - __main__ - Step 10742: {'lr': 0.0004944406007592074, 'samples': 515616, 'steps': 10741, 'loss/train': 1.7206803560256958} +07/25/2024 12:16:02 - INFO - __main__ - Step 10743: {'lr': 0.0004944394974878023, 'samples': 515664, 'steps': 10742, 'loss/train': 2.1957600116729736} +07/25/2024 12:16:03 - INFO - __main__ - Step 10744: {'lr': 0.0004944383941081663, 'samples': 515712, 'steps': 10743, 'loss/train': 1.5645267963409424} +07/25/2024 12:16:03 - INFO - __main__ - Step 10745: {'lr': 0.0004944372906202998, 'samples': 515760, 'steps': 10744, 'loss/train': 1.2086071968078613} +07/25/2024 12:16:03 - INFO - __main__ - Step 10746: {'lr': 0.0004944361870242033, 'samples': 515808, 'steps': 10745, 'loss/train': 2.1281533241271973} +07/25/2024 12:16:04 - INFO - __main__ - Step 10747: {'lr': 0.0004944350833198774, 'samples': 515856, 'steps': 10746, 'loss/train': 1.86782705783844} +07/25/2024 12:16:04 - INFO - __main__ - Step 10748: {'lr': 0.0004944339795073224, 'samples': 515904, 'steps': 10747, 'loss/train': 2.7357888221740723} +07/25/2024 12:16:04 - INFO - __main__ - Step 10749: {'lr': 0.000494432875586539, 'samples': 515952, 'steps': 10748, 'loss/train': 0.17896856367588043} +07/25/2024 12:16:04 - INFO - __main__ - Step 10750: {'lr': 0.0004944317715575274, 'samples': 516000, 'steps': 10749, 'loss/train': 2.399489402770996} +07/25/2024 12:16:05 - INFO - __main__ - Step 10751: {'lr': 0.0004944306674202883, 'samples': 516048, 'steps': 10750, 'loss/train': 2.076535701751709} +07/25/2024 12:16:05 - INFO - __main__ - Step 10752: {'lr': 0.0004944295631748222, 'samples': 516096, 'steps': 10751, 'loss/train': 1.8158656358718872} +07/25/2024 12:16:05 - INFO - __main__ - Step 10753: {'lr': 0.0004944284588211295, 'samples': 516144, 'steps': 10752, 'loss/train': 2.1199777126312256} +07/25/2024 12:16:06 - INFO - __main__ - Step 10754: {'lr': 0.0004944273543592108, 'samples': 516192, 'steps': 10753, 'loss/train': 1.9266585111618042} +07/25/2024 12:16:06 - INFO - __main__ - Step 10755: {'lr': 0.0004944262497890665, 'samples': 516240, 'steps': 10754, 'loss/train': 0.17531810700893402} +07/25/2024 12:16:06 - INFO - __main__ - Step 10756: {'lr': 0.0004944251451106971, 'samples': 516288, 'steps': 10755, 'loss/train': 2.4175310134887695} +07/25/2024 12:16:06 - INFO - __main__ - Step 10757: {'lr': 0.000494424040324103, 'samples': 516336, 'steps': 10756, 'loss/train': 2.3353114128112793} +07/25/2024 12:16:07 - INFO - __main__ - Step 10758: {'lr': 0.0004944229354292848, 'samples': 516384, 'steps': 10757, 'loss/train': 2.0668575763702393} +07/25/2024 12:16:07 - INFO - __main__ - Step 10759: {'lr': 0.0004944218304262432, 'samples': 516432, 'steps': 10758, 'loss/train': 1.9892024993896484} +07/25/2024 12:16:07 - INFO - __main__ - Step 10760: {'lr': 0.0004944207253149782, 'samples': 516480, 'steps': 10759, 'loss/train': 1.8826061487197876} +07/25/2024 12:16:08 - INFO - __main__ - Step 10761: {'lr': 0.0004944196200954906, 'samples': 516528, 'steps': 10760, 'loss/train': 2.1124517917633057} +07/25/2024 12:16:08 - INFO - __main__ - Step 10762: {'lr': 0.0004944185147677809, 'samples': 516576, 'steps': 10761, 'loss/train': 1.9834191799163818} +07/25/2024 12:16:08 - INFO - __main__ - Step 10763: {'lr': 0.0004944174093318495, 'samples': 516624, 'steps': 10762, 'loss/train': 1.6374009847640991} +07/25/2024 12:16:08 - INFO - __main__ - Step 10764: {'lr': 0.000494416303787697, 'samples': 516672, 'steps': 10763, 'loss/train': 1.928983449935913} +07/25/2024 12:16:09 - INFO - __main__ - Step 10765: {'lr': 0.0004944151981353237, 'samples': 516720, 'steps': 10764, 'loss/train': 2.5468380451202393} +07/25/2024 12:16:09 - INFO - __main__ - Step 10766: {'lr': 0.0004944140923747302, 'samples': 516768, 'steps': 10765, 'loss/train': 1.9341955184936523} +07/25/2024 12:16:09 - INFO - __main__ - Step 10767: {'lr': 0.0004944129865059171, 'samples': 516816, 'steps': 10766, 'loss/train': 2.214810848236084} +07/25/2024 12:16:10 - INFO - __main__ - Step 10768: {'lr': 0.0004944118805288847, 'samples': 516864, 'steps': 10767, 'loss/train': 2.0992212295532227} +07/25/2024 12:16:10 - INFO - __main__ - Step 10769: {'lr': 0.0004944107744436336, 'samples': 516912, 'steps': 10768, 'loss/train': 0.9145034551620483} +07/25/2024 12:16:10 - INFO - __main__ - Step 10770: {'lr': 0.0004944096682501642, 'samples': 516960, 'steps': 10769, 'loss/train': 2.278001070022583} +07/25/2024 12:16:10 - INFO - __main__ - Step 10771: {'lr': 0.000494408561948477, 'samples': 517008, 'steps': 10770, 'loss/train': 1.7317404747009277} +07/25/2024 12:16:11 - INFO - __main__ - Step 10772: {'lr': 0.0004944074555385727, 'samples': 517056, 'steps': 10771, 'loss/train': 2.3118226528167725} +07/25/2024 12:16:11 - INFO - __main__ - Step 10773: {'lr': 0.0004944063490204516, 'samples': 517104, 'steps': 10772, 'loss/train': 0.18612690269947052} +07/25/2024 12:16:11 - INFO - __main__ - Step 10774: {'lr': 0.0004944052423941141, 'samples': 517152, 'steps': 10773, 'loss/train': 1.9407124519348145} +07/25/2024 12:16:11 - INFO - __main__ - Step 10775: {'lr': 0.0004944041356595609, 'samples': 517200, 'steps': 10774, 'loss/train': 1.5867843627929688} +07/25/2024 12:16:12 - INFO - __main__ - Step 10776: {'lr': 0.0004944030288167923, 'samples': 517248, 'steps': 10775, 'loss/train': 1.5976862907409668} +07/25/2024 12:16:12 - INFO - __main__ - Step 10777: {'lr': 0.000494401921865809, 'samples': 517296, 'steps': 10776, 'loss/train': 1.3567839860916138} +07/25/2024 12:16:12 - INFO - __main__ - Step 10778: {'lr': 0.0004944008148066113, 'samples': 517344, 'steps': 10777, 'loss/train': 2.092169761657715} +07/25/2024 12:16:13 - INFO - __main__ - Step 10779: {'lr': 0.0004943997076391998, 'samples': 517392, 'steps': 10778, 'loss/train': 1.011039137840271} +07/25/2024 12:16:13 - INFO - __main__ - Step 10780: {'lr': 0.000494398600363575, 'samples': 517440, 'steps': 10779, 'loss/train': 1.9800052642822266} +07/25/2024 12:16:13 - INFO - __main__ - Step 10781: {'lr': 0.0004943974929797373, 'samples': 517488, 'steps': 10780, 'loss/train': 2.098480463027954} +07/25/2024 12:16:13 - INFO - __main__ - Step 10782: {'lr': 0.0004943963854876872, 'samples': 517536, 'steps': 10781, 'loss/train': 2.2716968059539795} +07/25/2024 12:16:14 - INFO - __main__ - Step 10783: {'lr': 0.0004943952778874252, 'samples': 517584, 'steps': 10782, 'loss/train': 2.12821102142334} +07/25/2024 12:16:14 - INFO - __main__ - Step 10784: {'lr': 0.000494394170178952, 'samples': 517632, 'steps': 10783, 'loss/train': 1.3999602794647217} +07/25/2024 12:16:14 - INFO - __main__ - Step 10785: {'lr': 0.0004943930623622677, 'samples': 517680, 'steps': 10784, 'loss/train': 2.205916404724121} +07/25/2024 12:16:15 - INFO - __main__ - Step 10786: {'lr': 0.0004943919544373732, 'samples': 517728, 'steps': 10785, 'loss/train': 1.7050352096557617} +07/25/2024 12:16:15 - INFO - __main__ - Step 10787: {'lr': 0.0004943908464042685, 'samples': 517776, 'steps': 10786, 'loss/train': 2.3651609420776367} +07/25/2024 12:16:15 - INFO - __main__ - Step 10788: {'lr': 0.0004943897382629546, 'samples': 517824, 'steps': 10787, 'loss/train': 1.3380608558654785} +07/25/2024 12:16:15 - INFO - __main__ - Step 10789: {'lr': 0.0004943886300134318, 'samples': 517872, 'steps': 10788, 'loss/train': 2.2609963417053223} +07/25/2024 12:16:16 - INFO - __main__ - Step 10790: {'lr': 0.0004943875216557005, 'samples': 517920, 'steps': 10789, 'loss/train': 1.2375895977020264} +07/25/2024 12:16:16 - INFO - __main__ - Step 10791: {'lr': 0.0004943864131897612, 'samples': 517968, 'steps': 10790, 'loss/train': 1.7051756381988525} +07/25/2024 12:16:16 - INFO - __main__ - Step 10792: {'lr': 0.0004943853046156145, 'samples': 518016, 'steps': 10791, 'loss/train': 1.8633464574813843} +07/25/2024 12:16:17 - INFO - __main__ - Step 10793: {'lr': 0.0004943841959332609, 'samples': 518064, 'steps': 10792, 'loss/train': 0.8647708892822266} +07/25/2024 12:16:17 - INFO - __main__ - Step 10794: {'lr': 0.0004943830871427008, 'samples': 518112, 'steps': 10793, 'loss/train': 1.977771520614624} +07/25/2024 12:16:17 - INFO - __main__ - Step 10795: {'lr': 0.0004943819782439347, 'samples': 518160, 'steps': 10794, 'loss/train': 1.227334976196289} +07/25/2024 12:16:17 - INFO - __main__ - Step 10796: {'lr': 0.000494380869236963, 'samples': 518208, 'steps': 10795, 'loss/train': 1.8483833074569702} +07/25/2024 12:16:18 - INFO - __main__ - Step 10797: {'lr': 0.0004943797601217863, 'samples': 518256, 'steps': 10796, 'loss/train': 0.2663266062736511} +07/25/2024 12:16:18 - INFO - __main__ - Step 10798: {'lr': 0.0004943786508984052, 'samples': 518304, 'steps': 10797, 'loss/train': 1.928139567375183} +07/25/2024 12:16:18 - INFO - __main__ - Step 10799: {'lr': 0.0004943775415668201, 'samples': 518352, 'steps': 10798, 'loss/train': 1.6184617280960083} +07/25/2024 12:16:19 - INFO - __main__ - Step 10800: {'lr': 0.0004943764321270315, 'samples': 518400, 'steps': 10799, 'loss/train': 1.558488130569458} +07/25/2024 12:16:19 - INFO - __main__ - Step 10801: {'lr': 0.0004943753225790397, 'samples': 518448, 'steps': 10800, 'loss/train': 1.938657283782959} +07/25/2024 12:16:19 - INFO - __main__ - Step 10802: {'lr': 0.0004943742129228455, 'samples': 518496, 'steps': 10801, 'loss/train': 2.124009370803833} +07/25/2024 12:16:19 - INFO - __main__ - Step 10803: {'lr': 0.0004943731031584491, 'samples': 518544, 'steps': 10802, 'loss/train': 1.8702892065048218} +07/25/2024 12:16:20 - INFO - __main__ - Step 10804: {'lr': 0.0004943719932858512, 'samples': 518592, 'steps': 10803, 'loss/train': 2.1629772186279297} +07/25/2024 12:16:20 - INFO - __main__ - Step 10805: {'lr': 0.0004943708833050523, 'samples': 518640, 'steps': 10804, 'loss/train': 1.8900455236434937} +07/25/2024 12:16:20 - INFO - __main__ - Step 10806: {'lr': 0.0004943697732160527, 'samples': 518688, 'steps': 10805, 'loss/train': 2.205090045928955} +07/25/2024 12:16:21 - INFO - __main__ - Step 10807: {'lr': 0.0004943686630188532, 'samples': 518736, 'steps': 10806, 'loss/train': 1.9096511602401733} +07/25/2024 12:16:21 - INFO - __main__ - Step 10808: {'lr': 0.0004943675527134539, 'samples': 518784, 'steps': 10807, 'loss/train': 2.050483465194702} +07/25/2024 12:16:21 - INFO - __main__ - Step 10809: {'lr': 0.0004943664422998556, 'samples': 518832, 'steps': 10808, 'loss/train': 2.0927679538726807} +07/25/2024 12:16:21 - INFO - __main__ - Step 10810: {'lr': 0.0004943653317780587, 'samples': 518880, 'steps': 10809, 'loss/train': 1.1171135902404785} +07/25/2024 12:16:22 - INFO - __main__ - Step 10811: {'lr': 0.0004943642211480636, 'samples': 518928, 'steps': 10810, 'loss/train': 2.005655288696289} +07/25/2024 12:16:22 - INFO - __main__ - Step 10812: {'lr': 0.000494363110409871, 'samples': 518976, 'steps': 10811, 'loss/train': 1.1271981000900269} +07/25/2024 12:16:22 - INFO - __main__ - Step 10813: {'lr': 0.000494361999563481, 'samples': 519024, 'steps': 10812, 'loss/train': 2.009021043777466} +07/25/2024 12:16:23 - INFO - __main__ - Step 10814: {'lr': 0.0004943608886088946, 'samples': 519072, 'steps': 10813, 'loss/train': 1.79841148853302} +07/25/2024 12:16:23 - INFO - __main__ - Step 10815: {'lr': 0.000494359777546112, 'samples': 519120, 'steps': 10814, 'loss/train': 1.4087233543395996} +07/25/2024 12:16:23 - INFO - __main__ - Step 10816: {'lr': 0.0004943586663751338, 'samples': 519168, 'steps': 10815, 'loss/train': 2.0006418228149414} +07/25/2024 12:16:23 - INFO - __main__ - Step 10817: {'lr': 0.0004943575550959603, 'samples': 519216, 'steps': 10816, 'loss/train': 0.9501753449440002} +07/25/2024 12:16:24 - INFO - __main__ - Step 10818: {'lr': 0.0004943564437085922, 'samples': 519264, 'steps': 10817, 'loss/train': 1.8021421432495117} +07/25/2024 12:16:24 - INFO - __main__ - Step 10819: {'lr': 0.0004943553322130298, 'samples': 519312, 'steps': 10818, 'loss/train': 1.8556525707244873} +07/25/2024 12:16:24 - INFO - __main__ - Step 10820: {'lr': 0.0004943542206092738, 'samples': 519360, 'steps': 10819, 'loss/train': 2.3368942737579346} +07/25/2024 12:16:25 - INFO - __main__ - Step 10821: {'lr': 0.0004943531088973245, 'samples': 519408, 'steps': 10820, 'loss/train': 0.1406421661376953} +07/25/2024 12:16:25 - INFO - __main__ - Step 10822: {'lr': 0.0004943519970771826, 'samples': 519456, 'steps': 10821, 'loss/train': 1.9369171857833862} +07/25/2024 12:16:25 - INFO - __main__ - Step 10823: {'lr': 0.0004943508851488485, 'samples': 519504, 'steps': 10822, 'loss/train': 1.7629103660583496} +07/25/2024 12:16:25 - INFO - __main__ - Step 10824: {'lr': 0.0004943497731123227, 'samples': 519552, 'steps': 10823, 'loss/train': 2.3464479446411133} +07/25/2024 12:16:26 - INFO - __main__ - Step 10825: {'lr': 0.0004943486609676055, 'samples': 519600, 'steps': 10824, 'loss/train': 1.5782010555267334} +07/25/2024 12:16:26 - INFO - __main__ - Step 10826: {'lr': 0.0004943475487146977, 'samples': 519648, 'steps': 10825, 'loss/train': 1.9291754961013794} +07/25/2024 12:16:26 - INFO - __main__ - Step 10827: {'lr': 0.0004943464363535995, 'samples': 519696, 'steps': 10826, 'loss/train': 2.1793274879455566} +07/25/2024 12:16:27 - INFO - __main__ - Step 10828: {'lr': 0.0004943453238843117, 'samples': 519744, 'steps': 10827, 'loss/train': 2.4827239513397217} +07/25/2024 12:16:27 - INFO - __main__ - Step 10829: {'lr': 0.0004943442113068346, 'samples': 519792, 'steps': 10828, 'loss/train': 1.775072455406189} +07/25/2024 12:16:27 - INFO - __main__ - Step 10830: {'lr': 0.0004943430986211688, 'samples': 519840, 'steps': 10829, 'loss/train': 1.5240813493728638} +07/25/2024 12:16:27 - INFO - __main__ - Step 10831: {'lr': 0.0004943419858273146, 'samples': 519888, 'steps': 10830, 'loss/train': 1.9194284677505493} +07/25/2024 12:16:28 - INFO - __main__ - Step 10832: {'lr': 0.0004943408729252727, 'samples': 519936, 'steps': 10831, 'loss/train': 1.9536528587341309} +07/25/2024 12:16:28 - INFO - __main__ - Step 10833: {'lr': 0.0004943397599150434, 'samples': 519984, 'steps': 10832, 'loss/train': 1.6187925338745117} +07/25/2024 12:16:28 - INFO - __main__ - Step 10834: {'lr': 0.0004943386467966273, 'samples': 520032, 'steps': 10833, 'loss/train': 1.6868306398391724} +07/25/2024 12:16:29 - INFO - __main__ - Step 10835: {'lr': 0.0004943375335700249, 'samples': 520080, 'steps': 10834, 'loss/train': 1.8497551679611206} +07/25/2024 12:16:29 - INFO - __main__ - Step 10836: {'lr': 0.0004943364202352369, 'samples': 520128, 'steps': 10835, 'loss/train': 1.3391863107681274} +07/25/2024 12:16:29 - INFO - __main__ - Step 10837: {'lr': 0.0004943353067922634, 'samples': 520176, 'steps': 10836, 'loss/train': 2.0916833877563477} +07/25/2024 12:16:29 - INFO - __main__ - Step 10838: {'lr': 0.0004943341932411051, 'samples': 520224, 'steps': 10837, 'loss/train': 2.0711727142333984} +07/25/2024 12:16:30 - INFO - __main__ - Step 10839: {'lr': 0.0004943330795817624, 'samples': 520272, 'steps': 10838, 'loss/train': 1.934058427810669} +07/25/2024 12:16:30 - INFO - __main__ - Step 10840: {'lr': 0.000494331965814236, 'samples': 520320, 'steps': 10839, 'loss/train': 1.5133798122406006} +07/25/2024 12:16:30 - INFO - __main__ - Step 10841: {'lr': 0.0004943308519385261, 'samples': 520368, 'steps': 10840, 'loss/train': 0.9480827450752258} +07/25/2024 12:16:31 - INFO - __main__ - Step 10842: {'lr': 0.0004943297379546334, 'samples': 520416, 'steps': 10841, 'loss/train': 1.514754056930542} +07/25/2024 12:16:31 - INFO - __main__ - Step 10843: {'lr': 0.0004943286238625584, 'samples': 520464, 'steps': 10842, 'loss/train': 0.9362345337867737} +07/25/2024 12:16:31 - INFO - __main__ - Step 10844: {'lr': 0.0004943275096623014, 'samples': 520512, 'steps': 10843, 'loss/train': 1.8835643529891968} +07/25/2024 12:16:31 - INFO - __main__ - Step 10845: {'lr': 0.0004943263953538632, 'samples': 520560, 'steps': 10844, 'loss/train': 0.14402058720588684} +07/25/2024 12:16:32 - INFO - __main__ - Step 10846: {'lr': 0.000494325280937244, 'samples': 520608, 'steps': 10845, 'loss/train': 2.215391159057617} +07/25/2024 12:16:32 - INFO - __main__ - Step 10847: {'lr': 0.0004943241664124445, 'samples': 520656, 'steps': 10846, 'loss/train': 1.390217661857605} +07/25/2024 12:16:32 - INFO - __main__ - Step 10848: {'lr': 0.000494323051779465, 'samples': 520704, 'steps': 10847, 'loss/train': 2.3909506797790527} +07/25/2024 12:16:33 - INFO - __main__ - Step 10849: {'lr': 0.0004943219370383061, 'samples': 520752, 'steps': 10848, 'loss/train': 1.7675435543060303} +07/25/2024 12:16:33 - INFO - __main__ - Step 10850: {'lr': 0.0004943208221889683, 'samples': 520800, 'steps': 10849, 'loss/train': 1.8489309549331665} +07/25/2024 12:16:33 - INFO - __main__ - Step 10851: {'lr': 0.0004943197072314522, 'samples': 520848, 'steps': 10850, 'loss/train': 2.31780743598938} +07/25/2024 12:16:33 - INFO - __main__ - Step 10852: {'lr': 0.000494318592165758, 'samples': 520896, 'steps': 10851, 'loss/train': 2.049370765686035} +07/25/2024 12:16:34 - INFO - __main__ - Step 10853: {'lr': 0.0004943174769918866, 'samples': 520944, 'steps': 10852, 'loss/train': 2.241844654083252} +07/25/2024 12:16:34 - INFO - __main__ - Step 10854: {'lr': 0.000494316361709838, 'samples': 520992, 'steps': 10853, 'loss/train': 1.468374490737915} +07/25/2024 12:16:34 - INFO - __main__ - Step 10855: {'lr': 0.0004943152463196132, 'samples': 521040, 'steps': 10854, 'loss/train': 2.330108404159546} +07/25/2024 12:16:35 - INFO - __main__ - Step 10856: {'lr': 0.0004943141308212123, 'samples': 521088, 'steps': 10855, 'loss/train': 1.8215571641921997} +07/25/2024 12:16:35 - INFO - __main__ - Step 10857: {'lr': 0.0004943130152146361, 'samples': 521136, 'steps': 10856, 'loss/train': 1.9035098552703857} +07/25/2024 12:16:35 - INFO - __main__ - Step 10858: {'lr': 0.0004943118994998848, 'samples': 521184, 'steps': 10857, 'loss/train': 1.8139357566833496} +07/25/2024 12:16:35 - INFO - __main__ - Step 10859: {'lr': 0.0004943107836769591, 'samples': 521232, 'steps': 10858, 'loss/train': 2.368330478668213} +07/25/2024 12:16:36 - INFO - __main__ - Step 10860: {'lr': 0.0004943096677458594, 'samples': 521280, 'steps': 10859, 'loss/train': 1.5967148542404175} +07/25/2024 12:16:36 - INFO - __main__ - Step 10861: {'lr': 0.0004943085517065862, 'samples': 521328, 'steps': 10860, 'loss/train': 2.140291690826416} +07/25/2024 12:16:36 - INFO - __main__ - Step 10862: {'lr': 0.0004943074355591401, 'samples': 521376, 'steps': 10861, 'loss/train': 1.6820398569107056} +07/25/2024 12:16:36 - INFO - __main__ - Step 10863: {'lr': 0.0004943063193035215, 'samples': 521424, 'steps': 10862, 'loss/train': 1.3505727052688599} +07/25/2024 12:16:37 - INFO - __main__ - Step 10864: {'lr': 0.0004943052029397308, 'samples': 521472, 'steps': 10863, 'loss/train': 1.8275089263916016} +07/25/2024 12:16:37 - INFO - __main__ - Step 10865: {'lr': 0.0004943040864677687, 'samples': 521520, 'steps': 10864, 'loss/train': 0.9576075077056885} +07/25/2024 12:16:37 - INFO - __main__ - Step 10866: {'lr': 0.0004943029698876355, 'samples': 521568, 'steps': 10865, 'loss/train': 2.0667269229888916} +07/25/2024 12:16:38 - INFO - __main__ - Step 10867: {'lr': 0.000494301853199332, 'samples': 521616, 'steps': 10866, 'loss/train': 2.2068233489990234} +07/25/2024 12:16:38 - INFO - __main__ - Step 10868: {'lr': 0.0004943007364028583, 'samples': 521664, 'steps': 10867, 'loss/train': 2.5233118534088135} +07/25/2024 12:16:38 - INFO - __main__ - Step 10869: {'lr': 0.0004942996194982151, 'samples': 521712, 'steps': 10868, 'loss/train': 0.15605337917804718} +07/25/2024 12:16:38 - INFO - __main__ - Step 10870: {'lr': 0.0004942985024854029, 'samples': 521760, 'steps': 10869, 'loss/train': 1.8604358434677124} +07/25/2024 12:16:39 - INFO - __main__ - Step 10871: {'lr': 0.0004942973853644221, 'samples': 521808, 'steps': 10870, 'loss/train': 1.7247920036315918} +07/25/2024 12:16:39 - INFO - __main__ - Step 10872: {'lr': 0.0004942962681352734, 'samples': 521856, 'steps': 10871, 'loss/train': 0.9504038691520691} +07/25/2024 12:16:39 - INFO - __main__ - Step 10873: {'lr': 0.0004942951507979571, 'samples': 521904, 'steps': 10872, 'loss/train': 1.9272383451461792} +07/25/2024 12:16:40 - INFO - __main__ - Step 10874: {'lr': 0.0004942940333524737, 'samples': 521952, 'steps': 10873, 'loss/train': 1.682752251625061} +07/25/2024 12:16:40 - INFO - __main__ - Step 10875: {'lr': 0.0004942929157988238, 'samples': 522000, 'steps': 10874, 'loss/train': 2.104543685913086} +07/25/2024 12:16:40 - INFO - __main__ - Step 10876: {'lr': 0.0004942917981370079, 'samples': 522048, 'steps': 10875, 'loss/train': 2.2463622093200684} +07/25/2024 12:16:40 - INFO - __main__ - Step 10877: {'lr': 0.0004942906803670264, 'samples': 522096, 'steps': 10876, 'loss/train': 2.1404049396514893} +07/25/2024 12:16:41 - INFO - __main__ - Step 10878: {'lr': 0.0004942895624888798, 'samples': 522144, 'steps': 10877, 'loss/train': 1.2204581499099731} +07/25/2024 12:16:41 - INFO - __main__ - Step 10879: {'lr': 0.0004942884445025685, 'samples': 522192, 'steps': 10878, 'loss/train': 1.9925023317337036} +07/25/2024 12:16:41 - INFO - __main__ - Step 10880: {'lr': 0.0004942873264080933, 'samples': 522240, 'steps': 10879, 'loss/train': 1.897085428237915} +07/25/2024 12:16:42 - INFO - __main__ - Step 10881: {'lr': 0.0004942862082054544, 'samples': 522288, 'steps': 10880, 'loss/train': 2.274078845977783} +07/25/2024 12:16:42 - INFO - __main__ - Step 10882: {'lr': 0.0004942850898946525, 'samples': 522336, 'steps': 10881, 'loss/train': 2.1869921684265137} +07/25/2024 12:16:42 - INFO - __main__ - Step 10883: {'lr': 0.0004942839714756881, 'samples': 522384, 'steps': 10882, 'loss/train': 2.3328299522399902} +07/25/2024 12:16:42 - INFO - __main__ - Step 10884: {'lr': 0.0004942828529485614, 'samples': 522432, 'steps': 10883, 'loss/train': 2.025468349456787} +07/25/2024 12:16:43 - INFO - __main__ - Step 10885: {'lr': 0.0004942817343132731, 'samples': 522480, 'steps': 10884, 'loss/train': 2.0812978744506836} +07/25/2024 12:16:43 - INFO - __main__ - Step 10886: {'lr': 0.0004942806155698239, 'samples': 522528, 'steps': 10885, 'loss/train': 1.578988790512085} +07/25/2024 12:16:43 - INFO - __main__ - Step 10887: {'lr': 0.0004942794967182139, 'samples': 522576, 'steps': 10886, 'loss/train': 1.848595142364502} +07/25/2024 12:16:44 - INFO - __main__ - Step 10888: {'lr': 0.0004942783777584439, 'samples': 522624, 'steps': 10887, 'loss/train': 1.2393238544464111} +07/25/2024 12:16:44 - INFO - __main__ - Step 10889: {'lr': 0.0004942772586905142, 'samples': 522672, 'steps': 10888, 'loss/train': 1.7952044010162354} +07/25/2024 12:16:44 - INFO - __main__ - Step 10890: {'lr': 0.0004942761395144254, 'samples': 522720, 'steps': 10889, 'loss/train': 2.065556526184082} +07/25/2024 12:16:44 - INFO - __main__ - Step 10891: {'lr': 0.000494275020230178, 'samples': 522768, 'steps': 10890, 'loss/train': 2.0082504749298096} +07/25/2024 12:16:45 - INFO - __main__ - Step 10892: {'lr': 0.0004942739008377724, 'samples': 522816, 'steps': 10891, 'loss/train': 2.619337797164917} +07/25/2024 12:16:45 - INFO - __main__ - Step 10893: {'lr': 0.0004942727813372093, 'samples': 522864, 'steps': 10892, 'loss/train': 0.19718915224075317} +07/25/2024 12:16:45 - INFO - __main__ - Step 10894: {'lr': 0.0004942716617284889, 'samples': 522912, 'steps': 10893, 'loss/train': 2.2517142295837402} +07/25/2024 12:16:46 - INFO - __main__ - Step 10895: {'lr': 0.0004942705420116119, 'samples': 522960, 'steps': 10894, 'loss/train': 2.0217137336730957} +07/25/2024 12:16:46 - INFO - __main__ - Step 10896: {'lr': 0.0004942694221865788, 'samples': 523008, 'steps': 10895, 'loss/train': 1.7834830284118652} +07/25/2024 12:16:46 - INFO - __main__ - Step 10897: {'lr': 0.0004942683022533899, 'samples': 523056, 'steps': 10896, 'loss/train': 1.6355338096618652} +07/25/2024 12:16:46 - INFO - __main__ - Step 10898: {'lr': 0.000494267182212046, 'samples': 523104, 'steps': 10897, 'loss/train': 2.1153483390808105} +07/25/2024 12:16:47 - INFO - __main__ - Step 10899: {'lr': 0.0004942660620625473, 'samples': 523152, 'steps': 10898, 'loss/train': 2.3533270359039307} +07/25/2024 12:16:47 - INFO - __main__ - Step 10900: {'lr': 0.0004942649418048946, 'samples': 523200, 'steps': 10899, 'loss/train': 1.8343913555145264} +07/25/2024 12:16:47 - INFO - __main__ - Step 10901: {'lr': 0.0004942638214390881, 'samples': 523248, 'steps': 10900, 'loss/train': 0.8187244534492493} +07/25/2024 12:16:48 - INFO - __main__ - Step 10902: {'lr': 0.0004942627009651285, 'samples': 523296, 'steps': 10901, 'loss/train': 2.1764931678771973} +07/25/2024 12:16:48 - INFO - __main__ - Step 10903: {'lr': 0.0004942615803830161, 'samples': 523344, 'steps': 10902, 'loss/train': 2.077648878097534} +07/25/2024 12:16:48 - INFO - __main__ - Step 10904: {'lr': 0.0004942604596927516, 'samples': 523392, 'steps': 10903, 'loss/train': 1.9266356229782104} +07/25/2024 12:16:48 - INFO - __main__ - Step 10905: {'lr': 0.0004942593388943354, 'samples': 523440, 'steps': 10904, 'loss/train': 1.8042383193969727} +07/25/2024 12:16:49 - INFO - __main__ - Step 10906: {'lr': 0.000494258217987768, 'samples': 523488, 'steps': 10905, 'loss/train': 1.9581727981567383} +07/25/2024 12:16:49 - INFO - __main__ - Step 10907: {'lr': 0.00049425709697305, 'samples': 523536, 'steps': 10906, 'loss/train': 2.1676743030548096} +07/25/2024 12:16:49 - INFO - __main__ - Step 10908: {'lr': 0.0004942559758501817, 'samples': 523584, 'steps': 10907, 'loss/train': 1.657022476196289} +07/25/2024 12:16:50 - INFO - __main__ - Step 10909: {'lr': 0.0004942548546191637, 'samples': 523632, 'steps': 10908, 'loss/train': 2.2956395149230957} +07/25/2024 12:16:50 - INFO - __main__ - Step 10910: {'lr': 0.0004942537332799965, 'samples': 523680, 'steps': 10909, 'loss/train': 2.3875603675842285} +07/25/2024 12:16:50 - INFO - __main__ - Step 10911: {'lr': 0.0004942526118326806, 'samples': 523728, 'steps': 10910, 'loss/train': 2.1901755332946777} +07/25/2024 12:16:50 - INFO - __main__ - Step 10912: {'lr': 0.0004942514902772165, 'samples': 523776, 'steps': 10911, 'loss/train': 1.0755927562713623} +07/25/2024 12:16:51 - INFO - __main__ - Step 10913: {'lr': 0.0004942503686136047, 'samples': 523824, 'steps': 10912, 'loss/train': 1.5890674591064453} +07/25/2024 12:16:51 - INFO - __main__ - Step 10914: {'lr': 0.0004942492468418457, 'samples': 523872, 'steps': 10913, 'loss/train': 1.9532102346420288} +07/25/2024 12:16:51 - INFO - __main__ - Step 10915: {'lr': 0.0004942481249619399, 'samples': 523920, 'steps': 10914, 'loss/train': 1.8128776550292969} +07/25/2024 12:16:52 - INFO - __main__ - Step 10916: {'lr': 0.0004942470029738879, 'samples': 523968, 'steps': 10915, 'loss/train': 2.1303606033325195} +07/25/2024 12:16:52 - INFO - __main__ - Step 10917: {'lr': 0.0004942458808776902, 'samples': 524016, 'steps': 10916, 'loss/train': 0.39800897240638733} +07/25/2024 12:16:52 - INFO - __main__ - Step 10918: {'lr': 0.0004942447586733473, 'samples': 524064, 'steps': 10917, 'loss/train': 1.843494176864624} +07/25/2024 12:16:52 - INFO - __main__ - Step 10919: {'lr': 0.0004942436363608595, 'samples': 524112, 'steps': 10918, 'loss/train': 2.1257495880126953} +07/25/2024 12:16:53 - INFO - __main__ - Step 10920: {'lr': 0.0004942425139402276, 'samples': 524160, 'steps': 10919, 'loss/train': 2.2486941814422607} +07/25/2024 12:16:53 - INFO - __main__ - Step 10921: {'lr': 0.0004942413914114519, 'samples': 524208, 'steps': 10920, 'loss/train': 1.7785993814468384} +07/25/2024 12:16:53 - INFO - __main__ - Step 10922: {'lr': 0.000494240268774533, 'samples': 524256, 'steps': 10921, 'loss/train': 2.075437545776367} +07/25/2024 12:16:54 - INFO - __main__ - Step 10923: {'lr': 0.0004942391460294714, 'samples': 524304, 'steps': 10922, 'loss/train': 2.058529853820801} +07/25/2024 12:16:54 - INFO - __main__ - Step 10924: {'lr': 0.0004942380231762675, 'samples': 524352, 'steps': 10923, 'loss/train': 2.122285842895508} +07/25/2024 12:16:54 - INFO - __main__ - Step 10925: {'lr': 0.0004942369002149218, 'samples': 524400, 'steps': 10924, 'loss/train': 1.3572453260421753} +07/25/2024 12:16:54 - INFO - __main__ - Step 10926: {'lr': 0.0004942357771454349, 'samples': 524448, 'steps': 10925, 'loss/train': 1.854659914970398} +07/25/2024 12:16:55 - INFO - __main__ - Step 10927: {'lr': 0.0004942346539678072, 'samples': 524496, 'steps': 10926, 'loss/train': 2.2425320148468018} +07/25/2024 12:16:55 - INFO - __main__ - Step 10928: {'lr': 0.0004942335306820393, 'samples': 524544, 'steps': 10927, 'loss/train': 2.0897250175476074} +07/25/2024 12:16:55 - INFO - __main__ - Step 10929: {'lr': 0.0004942324072881315, 'samples': 524592, 'steps': 10928, 'loss/train': 2.060620069503784} +07/25/2024 12:16:56 - INFO - __main__ - Step 10930: {'lr': 0.0004942312837860846, 'samples': 524640, 'steps': 10929, 'loss/train': 1.9906831979751587} +07/25/2024 12:16:56 - INFO - __main__ - Step 10931: {'lr': 0.0004942301601758988, 'samples': 524688, 'steps': 10930, 'loss/train': 2.0814833641052246} +07/25/2024 12:16:56 - INFO - __main__ - Step 10932: {'lr': 0.0004942290364575748, 'samples': 524736, 'steps': 10931, 'loss/train': 1.9995194673538208} +07/25/2024 12:16:56 - INFO - __main__ - Step 10933: {'lr': 0.000494227912631113, 'samples': 524784, 'steps': 10932, 'loss/train': 1.675217866897583} +07/25/2024 12:16:57 - INFO - __main__ - Step 10934: {'lr': 0.000494226788696514, 'samples': 524832, 'steps': 10933, 'loss/train': 2.1986262798309326} +07/25/2024 12:16:57 - INFO - __main__ - Step 10935: {'lr': 0.0004942256646537781, 'samples': 524880, 'steps': 10934, 'loss/train': 1.8321130275726318} +07/25/2024 12:16:57 - INFO - __main__ - Step 10936: {'lr': 0.000494224540502906, 'samples': 524928, 'steps': 10935, 'loss/train': 0.9576518535614014} +07/25/2024 12:16:57 - INFO - __main__ - Step 10937: {'lr': 0.0004942234162438981, 'samples': 524976, 'steps': 10936, 'loss/train': 1.6257655620574951} +07/25/2024 12:16:58 - INFO - __main__ - Step 10938: {'lr': 0.000494222291876755, 'samples': 525024, 'steps': 10937, 'loss/train': 1.91858971118927} +07/25/2024 12:16:58 - INFO - __main__ - Step 10939: {'lr': 0.0004942211674014769, 'samples': 525072, 'steps': 10938, 'loss/train': 1.5697178840637207} +07/25/2024 12:16:58 - INFO - __main__ - Step 10940: {'lr': 0.0004942200428180646, 'samples': 525120, 'steps': 10939, 'loss/train': 1.8340322971343994} +07/25/2024 12:16:59 - INFO - __main__ - Step 10941: {'lr': 0.0004942189181265186, 'samples': 525168, 'steps': 10940, 'loss/train': 2.2946810722351074} +07/25/2024 12:16:59 - INFO - __main__ - Step 10942: {'lr': 0.0004942177933268393, 'samples': 525216, 'steps': 10941, 'loss/train': 2.332473039627075} +07/25/2024 12:16:59 - INFO - __main__ - Step 10943: {'lr': 0.0004942166684190272, 'samples': 525264, 'steps': 10942, 'loss/train': 2.298499822616577} +07/25/2024 12:16:59 - INFO - __main__ - Step 10944: {'lr': 0.0004942155434030826, 'samples': 525312, 'steps': 10943, 'loss/train': 2.0134470462799072} +07/25/2024 12:17:00 - INFO - __main__ - Step 10945: {'lr': 0.0004942144182790064, 'samples': 525360, 'steps': 10944, 'loss/train': 1.5098040103912354} +07/25/2024 12:17:00 - INFO - __main__ - Step 10946: {'lr': 0.0004942132930467988, 'samples': 525408, 'steps': 10945, 'loss/train': 2.2010750770568848} +07/25/2024 12:17:00 - INFO - __main__ - Step 10947: {'lr': 0.0004942121677064605, 'samples': 525456, 'steps': 10946, 'loss/train': 1.5892326831817627} +07/25/2024 12:17:01 - INFO - __main__ - Step 10948: {'lr': 0.0004942110422579919, 'samples': 525504, 'steps': 10947, 'loss/train': 1.746505856513977} +07/25/2024 12:17:01 - INFO - __main__ - Step 10949: {'lr': 0.0004942099167013934, 'samples': 525552, 'steps': 10948, 'loss/train': 1.7180287837982178} +07/25/2024 12:17:01 - INFO - __main__ - Step 10950: {'lr': 0.0004942087910366656, 'samples': 525600, 'steps': 10949, 'loss/train': 2.021699905395508} +07/25/2024 12:17:01 - INFO - __main__ - Step 10951: {'lr': 0.000494207665263809, 'samples': 525648, 'steps': 10950, 'loss/train': 1.8676005601882935} +07/25/2024 12:17:02 - INFO - __main__ - Step 10952: {'lr': 0.0004942065393828241, 'samples': 525696, 'steps': 10951, 'loss/train': 1.9765464067459106} +07/25/2024 12:17:02 - INFO - __main__ - Step 10953: {'lr': 0.0004942054133937114, 'samples': 525744, 'steps': 10952, 'loss/train': 2.0608582496643066} +07/25/2024 12:17:02 - INFO - __main__ - Step 10954: {'lr': 0.0004942042872964714, 'samples': 525792, 'steps': 10953, 'loss/train': 2.1771020889282227} +07/25/2024 12:17:03 - INFO - __main__ - Step 10955: {'lr': 0.0004942031610911046, 'samples': 525840, 'steps': 10954, 'loss/train': 2.016275644302368} +07/25/2024 12:17:03 - INFO - __main__ - Step 10956: {'lr': 0.0004942020347776114, 'samples': 525888, 'steps': 10955, 'loss/train': 1.9866061210632324} +07/25/2024 12:17:03 - INFO - __main__ - Step 10957: {'lr': 0.0004942009083559924, 'samples': 525936, 'steps': 10956, 'loss/train': 2.422226905822754} +07/25/2024 12:17:03 - INFO - __main__ - Step 10958: {'lr': 0.0004941997818262481, 'samples': 525984, 'steps': 10957, 'loss/train': 1.6744134426116943} +07/25/2024 12:17:04 - INFO - __main__ - Step 10959: {'lr': 0.0004941986551883791, 'samples': 526032, 'steps': 10958, 'loss/train': 1.946379542350769} +07/25/2024 12:17:04 - INFO - __main__ - Step 10960: {'lr': 0.0004941975284423856, 'samples': 526080, 'steps': 10959, 'loss/train': 1.3383835554122925} +07/25/2024 12:17:04 - INFO - __main__ - Step 10961: {'lr': 0.0004941964015882683, 'samples': 526128, 'steps': 10960, 'loss/train': 1.965980887413025} +07/25/2024 12:17:05 - INFO - __main__ - Step 10962: {'lr': 0.0004941952746260278, 'samples': 526176, 'steps': 10961, 'loss/train': 2.1713614463806152} +07/25/2024 12:17:05 - INFO - __main__ - Step 10963: {'lr': 0.0004941941475556643, 'samples': 526224, 'steps': 10962, 'loss/train': 2.2099530696868896} +07/25/2024 12:17:05 - INFO - __main__ - Step 10964: {'lr': 0.0004941930203771787, 'samples': 526272, 'steps': 10963, 'loss/train': 2.155694007873535} +07/25/2024 12:17:05 - INFO - __main__ - Step 10965: {'lr': 0.000494191893090571, 'samples': 526320, 'steps': 10964, 'loss/train': 2.10321307182312} +07/25/2024 12:17:06 - INFO - __main__ - Step 10966: {'lr': 0.0004941907656958421, 'samples': 526368, 'steps': 10965, 'loss/train': 1.7889270782470703} +07/25/2024 12:17:06 - INFO - __main__ - Step 10967: {'lr': 0.0004941896381929924, 'samples': 526416, 'steps': 10966, 'loss/train': 2.1872925758361816} +07/25/2024 12:17:06 - INFO - __main__ - Step 10968: {'lr': 0.0004941885105820223, 'samples': 526464, 'steps': 10967, 'loss/train': 2.4274744987487793} +07/25/2024 12:17:07 - INFO - __main__ - Step 10969: {'lr': 0.0004941873828629324, 'samples': 526512, 'steps': 10968, 'loss/train': 1.8546252250671387} +07/25/2024 12:17:07 - INFO - __main__ - Step 10970: {'lr': 0.0004941862550357232, 'samples': 526560, 'steps': 10969, 'loss/train': 2.0360541343688965} +07/25/2024 12:17:07 - INFO - __main__ - Step 10971: {'lr': 0.0004941851271003952, 'samples': 526608, 'steps': 10970, 'loss/train': 2.1159112453460693} +07/25/2024 12:17:07 - INFO - __main__ - Step 10972: {'lr': 0.0004941839990569489, 'samples': 526656, 'steps': 10971, 'loss/train': 1.8544061183929443} +07/25/2024 12:17:08 - INFO - __main__ - Step 10973: {'lr': 0.0004941828709053846, 'samples': 526704, 'steps': 10972, 'loss/train': 2.0362424850463867} +07/25/2024 12:17:08 - INFO - __main__ - Step 10974: {'lr': 0.0004941817426457031, 'samples': 526752, 'steps': 10973, 'loss/train': 2.321603536605835} +07/25/2024 12:17:08 - INFO - __main__ - Step 10975: {'lr': 0.0004941806142779047, 'samples': 526800, 'steps': 10974, 'loss/train': 1.7283686399459839} +07/25/2024 12:17:09 - INFO - __main__ - Step 10976: {'lr': 0.00049417948580199, 'samples': 526848, 'steps': 10975, 'loss/train': 1.892826795578003} +07/25/2024 12:17:09 - INFO - __main__ - Step 10977: {'lr': 0.0004941783572179595, 'samples': 526896, 'steps': 10976, 'loss/train': 2.0692949295043945} +07/25/2024 12:17:09 - INFO - __main__ - Step 10978: {'lr': 0.0004941772285258136, 'samples': 526944, 'steps': 10977, 'loss/train': 2.2282357215881348} +07/25/2024 12:17:09 - INFO - __main__ - Step 10979: {'lr': 0.0004941760997255529, 'samples': 526992, 'steps': 10978, 'loss/train': 1.8373520374298096} +07/25/2024 12:17:10 - INFO - __main__ - Step 10980: {'lr': 0.0004941749708171777, 'samples': 527040, 'steps': 10979, 'loss/train': 2.2156195640563965} +07/25/2024 12:17:10 - INFO - __main__ - Step 10981: {'lr': 0.0004941738418006889, 'samples': 527088, 'steps': 10980, 'loss/train': 1.891074776649475} +07/25/2024 12:17:10 - INFO - __main__ - Step 10982: {'lr': 0.0004941727126760866, 'samples': 527136, 'steps': 10981, 'loss/train': 1.8317326307296753} +07/25/2024 12:17:11 - INFO - __main__ - Step 10983: {'lr': 0.0004941715834433717, 'samples': 527184, 'steps': 10982, 'loss/train': 1.9432045221328735} +07/25/2024 12:17:11 - INFO - __main__ - Step 10984: {'lr': 0.0004941704541025442, 'samples': 527232, 'steps': 10983, 'loss/train': 1.725831389427185} +07/25/2024 12:17:11 - INFO - __main__ - Step 10985: {'lr': 0.000494169324653605, 'samples': 527280, 'steps': 10984, 'loss/train': 2.047147750854492} +07/25/2024 12:17:11 - INFO - __main__ - Step 10986: {'lr': 0.0004941681950965544, 'samples': 527328, 'steps': 10985, 'loss/train': 1.9268382787704468} +07/25/2024 12:17:12 - INFO - __main__ - Step 10987: {'lr': 0.000494167065431393, 'samples': 527376, 'steps': 10986, 'loss/train': 1.6910209655761719} +07/25/2024 12:17:12 - INFO - __main__ - Step 10988: {'lr': 0.0004941659356581213, 'samples': 527424, 'steps': 10987, 'loss/train': 1.770988941192627} +07/25/2024 12:17:12 - INFO - __main__ - Step 10989: {'lr': 0.0004941648057767398, 'samples': 527472, 'steps': 10988, 'loss/train': 2.8008430004119873} +07/25/2024 12:17:13 - INFO - __main__ - Step 10990: {'lr': 0.0004941636757872489, 'samples': 527520, 'steps': 10989, 'loss/train': 1.1261793375015259} +07/25/2024 12:17:13 - INFO - __main__ - Step 10991: {'lr': 0.0004941625456896491, 'samples': 527568, 'steps': 10990, 'loss/train': 1.9542549848556519} +07/25/2024 12:17:13 - INFO - __main__ - Step 10992: {'lr': 0.0004941614154839411, 'samples': 527616, 'steps': 10991, 'loss/train': 2.282266616821289} +07/25/2024 12:17:13 - INFO - __main__ - Step 10993: {'lr': 0.0004941602851701251, 'samples': 527664, 'steps': 10992, 'loss/train': 2.040079116821289} +07/25/2024 12:17:14 - INFO - __main__ - Step 10994: {'lr': 0.0004941591547482019, 'samples': 527712, 'steps': 10993, 'loss/train': 2.430945634841919} +07/25/2024 12:17:14 - INFO - __main__ - Step 10995: {'lr': 0.0004941580242181718, 'samples': 527760, 'steps': 10994, 'loss/train': 1.5829135179519653} +07/25/2024 12:17:14 - INFO - __main__ - Step 10996: {'lr': 0.0004941568935800354, 'samples': 527808, 'steps': 10995, 'loss/train': 3.7309420108795166} +07/25/2024 12:17:15 - INFO - __main__ - Step 10997: {'lr': 0.0004941557628337931, 'samples': 527856, 'steps': 10996, 'loss/train': 2.2326271533966064} +07/25/2024 12:17:15 - INFO - __main__ - Step 10998: {'lr': 0.0004941546319794457, 'samples': 527904, 'steps': 10997, 'loss/train': 1.3745313882827759} +07/25/2024 12:17:15 - INFO - __main__ - Step 10999: {'lr': 0.0004941535010169933, 'samples': 527952, 'steps': 10998, 'loss/train': 2.1203839778900146} +07/25/2024 12:17:15 - INFO - __main__ - Step 11000: {'lr': 0.0004941523699464365, 'samples': 528000, 'steps': 10999, 'loss/train': 0.4669210910797119} +07/25/2024 12:17:16 - INFO - __main__ - Step 11001: {'lr': 0.0004941512387677759, 'samples': 528048, 'steps': 11000, 'loss/train': 2.2365989685058594} +07/25/2024 12:17:16 - INFO - __main__ - Step 11002: {'lr': 0.0004941501074810121, 'samples': 528096, 'steps': 11001, 'loss/train': 2.215866804122925} +07/25/2024 12:17:16 - INFO - __main__ - Step 11003: {'lr': 0.0004941489760861454, 'samples': 528144, 'steps': 11002, 'loss/train': 1.7045022249221802} +07/25/2024 12:17:17 - INFO - __main__ - Step 11004: {'lr': 0.0004941478445831763, 'samples': 528192, 'steps': 11003, 'loss/train': 1.9278703927993774} +07/25/2024 12:17:17 - INFO - __main__ - Step 11005: {'lr': 0.0004941467129721055, 'samples': 528240, 'steps': 11004, 'loss/train': 1.2434816360473633} +07/25/2024 12:17:17 - INFO - __main__ - Step 11006: {'lr': 0.0004941455812529332, 'samples': 528288, 'steps': 11005, 'loss/train': 2.1212122440338135} +07/25/2024 12:17:17 - INFO - __main__ - Step 11007: {'lr': 0.0004941444494256603, 'samples': 528336, 'steps': 11006, 'loss/train': 1.3933329582214355} +07/25/2024 12:17:18 - INFO - __main__ - Step 11008: {'lr': 0.0004941433174902869, 'samples': 528384, 'steps': 11007, 'loss/train': 1.647842288017273} +07/25/2024 12:17:18 - INFO - __main__ - Step 11009: {'lr': 0.0004941421854468139, 'samples': 528432, 'steps': 11008, 'loss/train': 2.494379997253418} +07/25/2024 12:17:18 - INFO - __main__ - Step 11010: {'lr': 0.0004941410532952414, 'samples': 528480, 'steps': 11009, 'loss/train': 2.3806769847869873} +07/25/2024 12:17:18 - INFO - __main__ - Step 11011: {'lr': 0.0004941399210355701, 'samples': 528528, 'steps': 11010, 'loss/train': 2.032423496246338} +07/25/2024 12:17:19 - INFO - __main__ - Step 11012: {'lr': 0.0004941387886678005, 'samples': 528576, 'steps': 11011, 'loss/train': 1.8437544107437134} +07/25/2024 12:17:19 - INFO - __main__ - Step 11013: {'lr': 0.0004941376561919331, 'samples': 528624, 'steps': 11012, 'loss/train': 1.9895398616790771} +07/25/2024 12:17:19 - INFO - __main__ - Step 11014: {'lr': 0.0004941365236079683, 'samples': 528672, 'steps': 11013, 'loss/train': 1.6780076026916504} +07/25/2024 12:17:20 - INFO - __main__ - Step 11015: {'lr': 0.0004941353909159069, 'samples': 528720, 'steps': 11014, 'loss/train': 1.9459400177001953} +07/25/2024 12:17:20 - INFO - __main__ - Step 11016: {'lr': 0.000494134258115749, 'samples': 528768, 'steps': 11015, 'loss/train': 1.963382601737976} +07/25/2024 12:17:20 - INFO - __main__ - Step 11017: {'lr': 0.0004941331252074953, 'samples': 528816, 'steps': 11016, 'loss/train': 1.8396741151809692} +07/25/2024 12:17:20 - INFO - __main__ - Step 11018: {'lr': 0.0004941319921911464, 'samples': 528864, 'steps': 11017, 'loss/train': 0.8884175419807434} +07/25/2024 12:17:21 - INFO - __main__ - Step 11019: {'lr': 0.0004941308590667026, 'samples': 528912, 'steps': 11018, 'loss/train': 1.754204273223877} +07/25/2024 12:17:21 - INFO - __main__ - Step 11020: {'lr': 0.0004941297258341645, 'samples': 528960, 'steps': 11019, 'loss/train': 2.636065721511841} +07/25/2024 12:17:21 - INFO - __main__ - Step 11021: {'lr': 0.0004941285924935327, 'samples': 529008, 'steps': 11020, 'loss/train': 2.3054428100585938} +07/25/2024 12:17:22 - INFO - __main__ - Step 11022: {'lr': 0.0004941274590448075, 'samples': 529056, 'steps': 11021, 'loss/train': 2.187495708465576} +07/25/2024 12:17:22 - INFO - __main__ - Step 11023: {'lr': 0.0004941263254879895, 'samples': 529104, 'steps': 11022, 'loss/train': 2.340292453765869} +07/25/2024 12:17:22 - INFO - __main__ - Step 11024: {'lr': 0.0004941251918230792, 'samples': 529152, 'steps': 11023, 'loss/train': 0.3857311010360718} +07/25/2024 12:17:22 - INFO - __main__ - Step 11025: {'lr': 0.0004941240580500772, 'samples': 529200, 'steps': 11024, 'loss/train': 2.4660983085632324} +07/25/2024 12:17:23 - INFO - __main__ - Step 11026: {'lr': 0.0004941229241689838, 'samples': 529248, 'steps': 11025, 'loss/train': 2.1743741035461426} +07/25/2024 12:17:23 - INFO - __main__ - Step 11027: {'lr': 0.0004941217901797997, 'samples': 529296, 'steps': 11026, 'loss/train': 2.2194998264312744} +07/25/2024 12:17:23 - INFO - __main__ - Step 11028: {'lr': 0.0004941206560825252, 'samples': 529344, 'steps': 11027, 'loss/train': 2.0461549758911133} +07/25/2024 12:17:24 - INFO - __main__ - Step 11029: {'lr': 0.0004941195218771611, 'samples': 529392, 'steps': 11028, 'loss/train': 1.9669698476791382} +07/25/2024 12:17:24 - INFO - __main__ - Step 11030: {'lr': 0.0004941183875637075, 'samples': 529440, 'steps': 11029, 'loss/train': 1.8199834823608398} +07/25/2024 12:17:24 - INFO - __main__ - Step 11031: {'lr': 0.0004941172531421652, 'samples': 529488, 'steps': 11030, 'loss/train': 2.5196681022644043} +07/25/2024 12:17:24 - INFO - __main__ - Step 11032: {'lr': 0.0004941161186125347, 'samples': 529536, 'steps': 11031, 'loss/train': 1.8083124160766602} +07/25/2024 12:17:25 - INFO - __main__ - Step 11033: {'lr': 0.0004941149839748163, 'samples': 529584, 'steps': 11032, 'loss/train': 2.3602609634399414} +07/25/2024 12:17:25 - INFO - __main__ - Step 11034: {'lr': 0.0004941138492290107, 'samples': 529632, 'steps': 11033, 'loss/train': 2.163944721221924} +07/25/2024 12:17:25 - INFO - __main__ - Step 11035: {'lr': 0.0004941127143751183, 'samples': 529680, 'steps': 11034, 'loss/train': 2.387847661972046} +07/25/2024 12:17:26 - INFO - __main__ - Step 11036: {'lr': 0.0004941115794131396, 'samples': 529728, 'steps': 11035, 'loss/train': 1.7407220602035522} +07/25/2024 12:17:26 - INFO - __main__ - Step 11037: {'lr': 0.0004941104443430752, 'samples': 529776, 'steps': 11036, 'loss/train': 1.8861863613128662} +07/25/2024 12:17:26 - INFO - __main__ - Step 11038: {'lr': 0.0004941093091649255, 'samples': 529824, 'steps': 11037, 'loss/train': 1.575600504875183} +07/25/2024 12:17:26 - INFO - __main__ - Step 11039: {'lr': 0.000494108173878691, 'samples': 529872, 'steps': 11038, 'loss/train': 1.2752052545547485} +07/25/2024 12:17:27 - INFO - __main__ - Step 11040: {'lr': 0.0004941070384843723, 'samples': 529920, 'steps': 11039, 'loss/train': 2.15793514251709} +07/25/2024 12:17:27 - INFO - __main__ - Step 11041: {'lr': 0.0004941059029819698, 'samples': 529968, 'steps': 11040, 'loss/train': 1.863427758216858} +07/25/2024 12:17:27 - INFO - __main__ - Step 11042: {'lr': 0.0004941047673714842, 'samples': 530016, 'steps': 11041, 'loss/train': 2.008603096008301} +07/25/2024 12:17:28 - INFO - __main__ - Step 11043: {'lr': 0.0004941036316529156, 'samples': 530064, 'steps': 11042, 'loss/train': 1.2353302240371704} +07/25/2024 12:17:28 - INFO - __main__ - Step 11044: {'lr': 0.0004941024958262649, 'samples': 530112, 'steps': 11043, 'loss/train': 1.6384902000427246} +07/25/2024 12:17:28 - INFO - __main__ - Step 11045: {'lr': 0.0004941013598915324, 'samples': 530160, 'steps': 11044, 'loss/train': 1.6947847604751587} +07/25/2024 12:17:28 - INFO - __main__ - Step 11046: {'lr': 0.0004941002238487187, 'samples': 530208, 'steps': 11045, 'loss/train': 1.8139610290527344} +07/25/2024 12:17:29 - INFO - __main__ - Step 11047: {'lr': 0.0004940990876978243, 'samples': 530256, 'steps': 11046, 'loss/train': 1.7904844284057617} +07/25/2024 12:17:29 - INFO - __main__ - Step 11048: {'lr': 0.0004940979514388495, 'samples': 530304, 'steps': 11047, 'loss/train': 0.18085071444511414} +07/25/2024 12:17:29 - INFO - __main__ - Step 11049: {'lr': 0.0004940968150717952, 'samples': 530352, 'steps': 11048, 'loss/train': 1.9936407804489136} +07/25/2024 12:17:30 - INFO - __main__ - Step 11050: {'lr': 0.0004940956785966615, 'samples': 530400, 'steps': 11049, 'loss/train': 2.128392457962036} +07/25/2024 12:17:30 - INFO - __main__ - Step 11051: {'lr': 0.000494094542013449, 'samples': 530448, 'steps': 11050, 'loss/train': 2.4070684909820557} +07/25/2024 12:17:30 - INFO - __main__ - Step 11052: {'lr': 0.0004940934053221585, 'samples': 530496, 'steps': 11051, 'loss/train': 1.9793055057525635} +07/25/2024 12:17:30 - INFO - __main__ - Step 11053: {'lr': 0.0004940922685227901, 'samples': 530544, 'steps': 11052, 'loss/train': 1.2335978746414185} +07/25/2024 12:17:31 - INFO - __main__ - Step 11054: {'lr': 0.0004940911316153446, 'samples': 530592, 'steps': 11053, 'loss/train': 2.283857583999634} +07/25/2024 12:17:31 - INFO - __main__ - Step 11055: {'lr': 0.0004940899945998224, 'samples': 530640, 'steps': 11054, 'loss/train': 2.42134690284729} +07/25/2024 12:17:31 - INFO - __main__ - Step 11056: {'lr': 0.0004940888574762239, 'samples': 530688, 'steps': 11055, 'loss/train': 2.410139322280884} +07/25/2024 12:17:32 - INFO - __main__ - Step 11057: {'lr': 0.0004940877202445497, 'samples': 530736, 'steps': 11056, 'loss/train': 1.732680082321167} +07/25/2024 12:17:32 - INFO - __main__ - Step 11058: {'lr': 0.0004940865829048003, 'samples': 530784, 'steps': 11057, 'loss/train': 2.217583417892456} +07/25/2024 12:17:32 - INFO - __main__ - Step 11059: {'lr': 0.0004940854454569762, 'samples': 530832, 'steps': 11058, 'loss/train': 1.865689754486084} +07/25/2024 12:17:32 - INFO - __main__ - Step 11060: {'lr': 0.0004940843079010779, 'samples': 530880, 'steps': 11059, 'loss/train': 2.5117576122283936} +07/25/2024 12:17:33 - INFO - __main__ - Step 11061: {'lr': 0.0004940831702371061, 'samples': 530928, 'steps': 11060, 'loss/train': 2.210150718688965} +07/25/2024 12:17:33 - INFO - __main__ - Step 11062: {'lr': 0.0004940820324650609, 'samples': 530976, 'steps': 11061, 'loss/train': 1.1806672811508179} +07/25/2024 12:17:33 - INFO - __main__ - Step 11063: {'lr': 0.000494080894584943, 'samples': 531024, 'steps': 11062, 'loss/train': 2.4916627407073975} +07/25/2024 12:17:34 - INFO - __main__ - Step 11064: {'lr': 0.0004940797565967529, 'samples': 531072, 'steps': 11063, 'loss/train': 2.1521568298339844} +07/25/2024 12:17:34 - INFO - __main__ - Step 11065: {'lr': 0.0004940786185004913, 'samples': 531120, 'steps': 11064, 'loss/train': 1.88032066822052} +07/25/2024 12:17:34 - INFO - __main__ - Step 11066: {'lr': 0.0004940774802961584, 'samples': 531168, 'steps': 11065, 'loss/train': 1.5940396785736084} +07/25/2024 12:17:34 - INFO - __main__ - Step 11067: {'lr': 0.0004940763419837549, 'samples': 531216, 'steps': 11066, 'loss/train': 1.5711712837219238} +07/25/2024 12:17:35 - INFO - __main__ - Step 11068: {'lr': 0.0004940752035632812, 'samples': 531264, 'steps': 11067, 'loss/train': 1.994676113128662} +07/25/2024 12:17:35 - INFO - __main__ - Step 11069: {'lr': 0.0004940740650347377, 'samples': 531312, 'steps': 11068, 'loss/train': 2.0779590606689453} +07/25/2024 12:17:35 - INFO - __main__ - Step 11070: {'lr': 0.0004940729263981253, 'samples': 531360, 'steps': 11069, 'loss/train': 1.5912785530090332} +07/25/2024 12:17:36 - INFO - __main__ - Step 11071: {'lr': 0.0004940717876534441, 'samples': 531408, 'steps': 11070, 'loss/train': 2.0026190280914307} +07/25/2024 12:17:36 - INFO - __main__ - Step 11072: {'lr': 0.0004940706488006947, 'samples': 531456, 'steps': 11071, 'loss/train': 0.2130386084318161} +07/25/2024 12:17:36 - INFO - __main__ - Step 11073: {'lr': 0.0004940695098398778, 'samples': 531504, 'steps': 11072, 'loss/train': 1.883116364479065} +07/25/2024 12:17:36 - INFO - __main__ - Step 11074: {'lr': 0.0004940683707709936, 'samples': 531552, 'steps': 11073, 'loss/train': 2.1242635250091553} +07/25/2024 12:17:37 - INFO - __main__ - Step 11075: {'lr': 0.0004940672315940429, 'samples': 531600, 'steps': 11074, 'loss/train': 1.3865950107574463} +07/25/2024 12:17:37 - INFO - __main__ - Step 11076: {'lr': 0.000494066092309026, 'samples': 531648, 'steps': 11075, 'loss/train': 1.3314718008041382} +07/25/2024 12:17:37 - INFO - __main__ - Step 11077: {'lr': 0.0004940649529159435, 'samples': 531696, 'steps': 11076, 'loss/train': 2.513625144958496} +07/25/2024 12:17:38 - INFO - __main__ - Step 11078: {'lr': 0.0004940638134147959, 'samples': 531744, 'steps': 11077, 'loss/train': 2.1485657691955566} +07/25/2024 12:17:38 - INFO - __main__ - Step 11079: {'lr': 0.0004940626738055837, 'samples': 531792, 'steps': 11078, 'loss/train': 1.8410251140594482} +07/25/2024 12:17:38 - INFO - __main__ - Step 11080: {'lr': 0.0004940615340883074, 'samples': 531840, 'steps': 11079, 'loss/train': 1.7597248554229736} +07/25/2024 12:17:38 - INFO - __main__ - Step 11081: {'lr': 0.0004940603942629674, 'samples': 531888, 'steps': 11080, 'loss/train': 2.32368803024292} +07/25/2024 12:17:39 - INFO - __main__ - Step 11082: {'lr': 0.0004940592543295643, 'samples': 531936, 'steps': 11081, 'loss/train': 1.9794179201126099} +07/25/2024 12:17:39 - INFO - __main__ - Step 11083: {'lr': 0.0004940581142880986, 'samples': 531984, 'steps': 11082, 'loss/train': 1.9802604913711548} +07/25/2024 12:17:39 - INFO - __main__ - Step 11084: {'lr': 0.0004940569741385709, 'samples': 532032, 'steps': 11083, 'loss/train': 2.15702748298645} +07/25/2024 12:17:39 - INFO - __main__ - Step 11085: {'lr': 0.0004940558338809815, 'samples': 532080, 'steps': 11084, 'loss/train': 2.387535572052002} +07/25/2024 12:17:40 - INFO - __main__ - Step 11086: {'lr': 0.0004940546935153311, 'samples': 532128, 'steps': 11085, 'loss/train': 2.1578140258789062} +07/25/2024 12:17:40 - INFO - __main__ - Step 11087: {'lr': 0.00049405355304162, 'samples': 532176, 'steps': 11086, 'loss/train': 2.0677311420440674} +07/25/2024 12:17:40 - INFO - __main__ - Step 11088: {'lr': 0.0004940524124598489, 'samples': 532224, 'steps': 11087, 'loss/train': 1.6481449604034424} +07/25/2024 12:17:41 - INFO - __main__ - Step 11089: {'lr': 0.0004940512717700183, 'samples': 532272, 'steps': 11088, 'loss/train': 1.896888017654419} +07/25/2024 12:17:41 - INFO - __main__ - Step 11090: {'lr': 0.0004940501309721285, 'samples': 532320, 'steps': 11089, 'loss/train': 1.649458408355713} +07/25/2024 12:17:41 - INFO - __main__ - Step 11091: {'lr': 0.0004940489900661802, 'samples': 532368, 'steps': 11090, 'loss/train': 2.0073153972625732} +07/25/2024 12:17:41 - INFO - __main__ - Step 11092: {'lr': 0.0004940478490521739, 'samples': 532416, 'steps': 11091, 'loss/train': 1.926943063735962} +07/25/2024 12:17:42 - INFO - __main__ - Step 11093: {'lr': 0.00049404670793011, 'samples': 532464, 'steps': 11092, 'loss/train': 1.1771160364151} +07/25/2024 12:17:42 - INFO - __main__ - Step 11094: {'lr': 0.000494045566699989, 'samples': 532512, 'steps': 11093, 'loss/train': 1.6209743022918701} +07/25/2024 12:17:42 - INFO - __main__ - Step 11095: {'lr': 0.0004940444253618117, 'samples': 532560, 'steps': 11094, 'loss/train': 2.1572327613830566} +07/25/2024 12:17:43 - INFO - __main__ - Step 11096: {'lr': 0.0004940432839155781, 'samples': 532608, 'steps': 11095, 'loss/train': 0.24691353738307953} +07/25/2024 12:17:43 - INFO - __main__ - Step 11097: {'lr': 0.000494042142361289, 'samples': 532656, 'steps': 11096, 'loss/train': 1.786273717880249} +07/25/2024 12:17:43 - INFO - __main__ - Step 11098: {'lr': 0.000494041000698945, 'samples': 532704, 'steps': 11097, 'loss/train': 2.201597213745117} +07/25/2024 12:17:43 - INFO - __main__ - Step 11099: {'lr': 0.0004940398589285464, 'samples': 532752, 'steps': 11098, 'loss/train': 0.956780195236206} +07/25/2024 12:17:44 - INFO - __main__ - Step 11100: {'lr': 0.0004940387170500938, 'samples': 532800, 'steps': 11099, 'loss/train': 2.2548346519470215} +07/25/2024 12:17:44 - INFO - __main__ - Step 11101: {'lr': 0.0004940375750635877, 'samples': 532848, 'steps': 11100, 'loss/train': 1.787450909614563} +07/25/2024 12:17:44 - INFO - __main__ - Step 11102: {'lr': 0.0004940364329690286, 'samples': 532896, 'steps': 11101, 'loss/train': 2.3395419120788574} +07/25/2024 12:17:45 - INFO - __main__ - Step 11103: {'lr': 0.000494035290766417, 'samples': 532944, 'steps': 11102, 'loss/train': 2.0245654582977295} +07/25/2024 12:17:45 - INFO - __main__ - Step 11104: {'lr': 0.0004940341484557534, 'samples': 532992, 'steps': 11103, 'loss/train': 2.1137242317199707} +07/25/2024 12:17:45 - INFO - __main__ - Step 11105: {'lr': 0.0004940330060370383, 'samples': 533040, 'steps': 11104, 'loss/train': 2.634561061859131} +07/25/2024 12:17:45 - INFO - __main__ - Step 11106: {'lr': 0.0004940318635102723, 'samples': 533088, 'steps': 11105, 'loss/train': 1.7644450664520264} +07/25/2024 12:17:46 - INFO - __main__ - Step 11107: {'lr': 0.0004940307208754558, 'samples': 533136, 'steps': 11106, 'loss/train': 2.883969783782959} +07/25/2024 12:17:46 - INFO - __main__ - Step 11108: {'lr': 0.0004940295781325893, 'samples': 533184, 'steps': 11107, 'loss/train': 2.378058433532715} +07/25/2024 12:17:46 - INFO - __main__ - Step 11109: {'lr': 0.0004940284352816734, 'samples': 533232, 'steps': 11108, 'loss/train': 1.7533351182937622} +07/25/2024 12:17:47 - INFO - __main__ - Step 11110: {'lr': 0.0004940272923227085, 'samples': 533280, 'steps': 11109, 'loss/train': 1.4923126697540283} +07/25/2024 12:17:47 - INFO - __main__ - Step 11111: {'lr': 0.0004940261492556952, 'samples': 533328, 'steps': 11110, 'loss/train': 1.8287996053695679} +07/25/2024 12:17:47 - INFO - __main__ - Step 11112: {'lr': 0.0004940250060806339, 'samples': 533376, 'steps': 11111, 'loss/train': 2.7436108589172363} +07/25/2024 12:17:47 - INFO - __main__ - Step 11113: {'lr': 0.0004940238627975252, 'samples': 533424, 'steps': 11112, 'loss/train': 1.5524568557739258} +07/25/2024 12:17:48 - INFO - __main__ - Step 11114: {'lr': 0.0004940227194063695, 'samples': 533472, 'steps': 11113, 'loss/train': 2.3043289184570312} +07/25/2024 12:17:48 - INFO - __main__ - Step 11115: {'lr': 0.0004940215759071674, 'samples': 533520, 'steps': 11114, 'loss/train': 2.4192543029785156} +07/25/2024 12:17:48 - INFO - __main__ - Step 11116: {'lr': 0.0004940204322999195, 'samples': 533568, 'steps': 11115, 'loss/train': 1.5547202825546265} +07/25/2024 12:17:49 - INFO - __main__ - Step 11117: {'lr': 0.0004940192885846261, 'samples': 533616, 'steps': 11116, 'loss/train': 2.3454196453094482} +07/25/2024 12:17:49 - INFO - __main__ - Step 11118: {'lr': 0.0004940181447612877, 'samples': 533664, 'steps': 11117, 'loss/train': 1.631461262702942} +07/25/2024 12:17:49 - INFO - __main__ - Step 11119: {'lr': 0.0004940170008299051, 'samples': 533712, 'steps': 11118, 'loss/train': 2.346558094024658} +07/25/2024 12:17:49 - INFO - __main__ - Step 11120: {'lr': 0.0004940158567904785, 'samples': 533760, 'steps': 11119, 'loss/train': 0.21697956323623657} +07/25/2024 12:17:50 - INFO - __main__ - Step 11121: {'lr': 0.0004940147126430085, 'samples': 533808, 'steps': 11120, 'loss/train': 1.8896774053573608} +07/25/2024 12:17:50 - INFO - __main__ - Step 11122: {'lr': 0.0004940135683874957, 'samples': 533856, 'steps': 11121, 'loss/train': 2.5173330307006836} +07/25/2024 12:17:50 - INFO - __main__ - Step 11123: {'lr': 0.0004940124240239405, 'samples': 533904, 'steps': 11122, 'loss/train': 1.6223692893981934} +07/25/2024 12:17:51 - INFO - __main__ - Step 11124: {'lr': 0.0004940112795523433, 'samples': 533952, 'steps': 11123, 'loss/train': 1.1867306232452393} +07/25/2024 12:17:51 - INFO - __main__ - Step 11125: {'lr': 0.000494010134972705, 'samples': 534000, 'steps': 11124, 'loss/train': 1.7128483057022095} +07/25/2024 12:17:51 - INFO - __main__ - Step 11126: {'lr': 0.0004940089902850256, 'samples': 534048, 'steps': 11125, 'loss/train': 1.9483802318572998} +07/25/2024 12:17:51 - INFO - __main__ - Step 11127: {'lr': 0.000494007845489306, 'samples': 534096, 'steps': 11126, 'loss/train': 1.7942711114883423} +07/25/2024 12:17:52 - INFO - __main__ - Step 11128: {'lr': 0.0004940067005855466, 'samples': 534144, 'steps': 11127, 'loss/train': 2.552264928817749} +07/25/2024 12:17:52 - INFO - __main__ - Step 11129: {'lr': 0.0004940055555737478, 'samples': 534192, 'steps': 11128, 'loss/train': 2.5458483695983887} +07/25/2024 12:17:52 - INFO - __main__ - Step 11130: {'lr': 0.00049400441045391, 'samples': 534240, 'steps': 11129, 'loss/train': 1.5750385522842407} +07/25/2024 12:17:53 - INFO - __main__ - Step 11131: {'lr': 0.0004940032652260342, 'samples': 534288, 'steps': 11130, 'loss/train': 2.292487859725952} +07/25/2024 12:17:53 - INFO - __main__ - Step 11132: {'lr': 0.0004940021198901204, 'samples': 534336, 'steps': 11131, 'loss/train': 1.7716193199157715} +07/25/2024 12:17:53 - INFO - __main__ - Step 11133: {'lr': 0.0004940009744461693, 'samples': 534384, 'steps': 11132, 'loss/train': 2.0290637016296387} +07/25/2024 12:17:53 - INFO - __main__ - Step 11134: {'lr': 0.0004939998288941814, 'samples': 534432, 'steps': 11133, 'loss/train': 1.7342015504837036} +07/25/2024 12:17:54 - INFO - __main__ - Step 11135: {'lr': 0.0004939986832341575, 'samples': 534480, 'steps': 11134, 'loss/train': 1.669790267944336} +07/25/2024 12:17:54 - INFO - __main__ - Step 11136: {'lr': 0.0004939975374660975, 'samples': 534528, 'steps': 11135, 'loss/train': 1.4805481433868408} +07/25/2024 12:17:54 - INFO - __main__ - Step 11137: {'lr': 0.0004939963915900023, 'samples': 534576, 'steps': 11136, 'loss/train': 1.6554687023162842} +07/25/2024 12:17:55 - INFO - __main__ - Step 11138: {'lr': 0.0004939952456058723, 'samples': 534624, 'steps': 11137, 'loss/train': 2.1292364597320557} +07/25/2024 12:17:55 - INFO - __main__ - Step 11139: {'lr': 0.0004939940995137081, 'samples': 534672, 'steps': 11138, 'loss/train': 2.546217679977417} +07/25/2024 12:17:55 - INFO - __main__ - Step 11140: {'lr': 0.0004939929533135101, 'samples': 534720, 'steps': 11139, 'loss/train': 2.0093677043914795} +07/25/2024 12:17:55 - INFO - __main__ - Step 11141: {'lr': 0.000493991807005279, 'samples': 534768, 'steps': 11140, 'loss/train': 1.9553842544555664} +07/25/2024 12:17:56 - INFO - __main__ - Step 11142: {'lr': 0.0004939906605890149, 'samples': 534816, 'steps': 11141, 'loss/train': 1.687152624130249} +07/25/2024 12:17:56 - INFO - __main__ - Step 11143: {'lr': 0.0004939895140647188, 'samples': 534864, 'steps': 11142, 'loss/train': 1.4182698726654053} +07/25/2024 12:17:56 - INFO - __main__ - Step 11144: {'lr': 0.0004939883674323909, 'samples': 534912, 'steps': 11143, 'loss/train': 0.2661270499229431} +07/25/2024 12:17:57 - INFO - __main__ - Step 11145: {'lr': 0.0004939872206920318, 'samples': 534960, 'steps': 11144, 'loss/train': 1.4068206548690796} +07/25/2024 12:17:57 - INFO - __main__ - Step 11146: {'lr': 0.000493986073843642, 'samples': 535008, 'steps': 11145, 'loss/train': 2.582219123840332} +07/25/2024 12:17:57 - INFO - __main__ - Step 11147: {'lr': 0.0004939849268872221, 'samples': 535056, 'steps': 11146, 'loss/train': 2.1724495887756348} +07/25/2024 12:17:57 - INFO - __main__ - Step 11148: {'lr': 0.0004939837798227724, 'samples': 535104, 'steps': 11147, 'loss/train': 1.30497145652771} +07/25/2024 12:17:58 - INFO - __main__ - Step 11149: {'lr': 0.0004939826326502935, 'samples': 535152, 'steps': 11148, 'loss/train': 2.4912445545196533} +07/25/2024 12:17:58 - INFO - __main__ - Step 11150: {'lr': 0.0004939814853697859, 'samples': 535200, 'steps': 11149, 'loss/train': 2.0347611904144287} +07/25/2024 12:17:58 - INFO - __main__ - Step 11151: {'lr': 0.0004939803379812502, 'samples': 535248, 'steps': 11150, 'loss/train': 2.228398323059082} +07/25/2024 12:17:59 - INFO - __main__ - Step 11152: {'lr': 0.0004939791904846869, 'samples': 535296, 'steps': 11151, 'loss/train': 2.355072498321533} +07/25/2024 12:17:59 - INFO - __main__ - Step 11153: {'lr': 0.0004939780428800963, 'samples': 535344, 'steps': 11152, 'loss/train': 2.3571126461029053} +07/25/2024 12:17:59 - INFO - __main__ - Step 11154: {'lr': 0.0004939768951674791, 'samples': 535392, 'steps': 11153, 'loss/train': 1.9850395917892456} +07/25/2024 12:17:59 - INFO - __main__ - Step 11155: {'lr': 0.0004939757473468359, 'samples': 535440, 'steps': 11154, 'loss/train': 1.9230278730392456} +07/25/2024 12:18:00 - INFO - __main__ - Step 11156: {'lr': 0.000493974599418167, 'samples': 535488, 'steps': 11155, 'loss/train': 1.8968149423599243} +07/25/2024 12:18:00 - INFO - __main__ - Step 11157: {'lr': 0.000493973451381473, 'samples': 535536, 'steps': 11156, 'loss/train': 2.0283751487731934} +07/25/2024 12:18:00 - INFO - __main__ - Step 11158: {'lr': 0.0004939723032367542, 'samples': 535584, 'steps': 11157, 'loss/train': 1.6847418546676636} +07/25/2024 12:18:01 - INFO - __main__ - Step 11159: {'lr': 0.0004939711549840116, 'samples': 535632, 'steps': 11158, 'loss/train': 1.8825677633285522} +07/25/2024 12:18:01 - INFO - __main__ - Step 11160: {'lr': 0.0004939700066232452, 'samples': 535680, 'steps': 11159, 'loss/train': 2.0585250854492188} +07/25/2024 12:18:01 - INFO - __main__ - Step 11161: {'lr': 0.0004939688581544558, 'samples': 535728, 'steps': 11160, 'loss/train': 2.286466360092163} +07/25/2024 12:18:01 - INFO - __main__ - Step 11162: {'lr': 0.0004939677095776438, 'samples': 535776, 'steps': 11161, 'loss/train': 2.238456964492798} +07/25/2024 12:18:02 - INFO - __main__ - Step 11163: {'lr': 0.0004939665608928098, 'samples': 535824, 'steps': 11162, 'loss/train': 2.0783050060272217} +07/25/2024 12:18:02 - INFO - __main__ - Step 11164: {'lr': 0.0004939654120999542, 'samples': 535872, 'steps': 11163, 'loss/train': 2.19620943069458} +07/25/2024 12:18:02 - INFO - __main__ - Step 11165: {'lr': 0.0004939642631990775, 'samples': 535920, 'steps': 11164, 'loss/train': 2.1588361263275146} +07/25/2024 12:18:03 - INFO - __main__ - Step 11166: {'lr': 0.0004939631141901802, 'samples': 535968, 'steps': 11165, 'loss/train': 0.7321003079414368} +07/25/2024 12:18:03 - INFO - __main__ - Step 11167: {'lr': 0.0004939619650732631, 'samples': 536016, 'steps': 11166, 'loss/train': 2.558594226837158} +07/25/2024 12:18:03 - INFO - __main__ - Step 11168: {'lr': 0.0004939608158483263, 'samples': 536064, 'steps': 11167, 'loss/train': 0.16024799644947052} +07/25/2024 12:18:03 - INFO - __main__ - Step 11169: {'lr': 0.0004939596665153705, 'samples': 536112, 'steps': 11168, 'loss/train': 1.6303497552871704} +07/25/2024 12:18:04 - INFO - __main__ - Step 11170: {'lr': 0.0004939585170743962, 'samples': 536160, 'steps': 11169, 'loss/train': 2.766242742538452} +07/25/2024 12:18:04 - INFO - __main__ - Step 11171: {'lr': 0.0004939573675254039, 'samples': 536208, 'steps': 11170, 'loss/train': 2.2239925861358643} +07/25/2024 12:18:04 - INFO - __main__ - Step 11172: {'lr': 0.0004939562178683942, 'samples': 536256, 'steps': 11171, 'loss/train': 1.4924607276916504} +07/25/2024 12:18:05 - INFO - __main__ - Step 11173: {'lr': 0.0004939550681033675, 'samples': 536304, 'steps': 11172, 'loss/train': 1.734104037284851} +07/25/2024 12:18:05 - INFO - __main__ - Step 11174: {'lr': 0.0004939539182303242, 'samples': 536352, 'steps': 11173, 'loss/train': 1.5817432403564453} +07/25/2024 12:18:05 - INFO - __main__ - Step 11175: {'lr': 0.0004939527682492651, 'samples': 536400, 'steps': 11174, 'loss/train': 1.7067803144454956} +07/25/2024 12:18:05 - INFO - __main__ - Step 11176: {'lr': 0.0004939516181601905, 'samples': 536448, 'steps': 11175, 'loss/train': 1.9235401153564453} +07/25/2024 12:18:06 - INFO - __main__ - Step 11177: {'lr': 0.0004939504679631009, 'samples': 536496, 'steps': 11176, 'loss/train': 2.3516435623168945} +07/25/2024 12:18:06 - INFO - __main__ - Step 11178: {'lr': 0.000493949317657997, 'samples': 536544, 'steps': 11177, 'loss/train': 1.4569456577301025} +07/25/2024 12:18:06 - INFO - __main__ - Step 11179: {'lr': 0.000493948167244879, 'samples': 536592, 'steps': 11178, 'loss/train': 2.3289477825164795} +07/25/2024 12:18:07 - INFO - __main__ - Step 11180: {'lr': 0.0004939470167237477, 'samples': 536640, 'steps': 11179, 'loss/train': 1.9129916429519653} +07/25/2024 12:18:07 - INFO - __main__ - Step 11181: {'lr': 0.0004939458660946035, 'samples': 536688, 'steps': 11180, 'loss/train': 1.6204886436462402} +07/25/2024 12:18:07 - INFO - __main__ - Step 11182: {'lr': 0.0004939447153574468, 'samples': 536736, 'steps': 11181, 'loss/train': 1.7451698780059814} +07/25/2024 12:18:07 - INFO - __main__ - Step 11183: {'lr': 0.0004939435645122784, 'samples': 536784, 'steps': 11182, 'loss/train': 1.9382216930389404} +07/25/2024 12:18:08 - INFO - __main__ - Step 11184: {'lr': 0.0004939424135590986, 'samples': 536832, 'steps': 11183, 'loss/train': 2.364257574081421} +07/25/2024 12:18:08 - INFO - __main__ - Step 11185: {'lr': 0.0004939412624979078, 'samples': 536880, 'steps': 11184, 'loss/train': 1.6980420351028442} +07/25/2024 12:18:08 - INFO - __main__ - Step 11186: {'lr': 0.0004939401113287067, 'samples': 536928, 'steps': 11185, 'loss/train': 2.1975083351135254} +07/25/2024 12:18:08 - INFO - __main__ - Step 11187: {'lr': 0.0004939389600514957, 'samples': 536976, 'steps': 11186, 'loss/train': 1.5594329833984375} +07/25/2024 12:18:09 - INFO - __main__ - Step 11188: {'lr': 0.0004939378086662754, 'samples': 537024, 'steps': 11187, 'loss/train': 2.608205556869507} +07/25/2024 12:18:09 - INFO - __main__ - Step 11189: {'lr': 0.0004939366571730463, 'samples': 537072, 'steps': 11188, 'loss/train': 2.638375997543335} +07/25/2024 12:18:09 - INFO - __main__ - Step 11190: {'lr': 0.0004939355055718088, 'samples': 537120, 'steps': 11189, 'loss/train': 1.1372358798980713} +07/25/2024 12:18:10 - INFO - __main__ - Step 11191: {'lr': 0.0004939343538625636, 'samples': 537168, 'steps': 11190, 'loss/train': 1.8769723176956177} +07/25/2024 12:18:10 - INFO - __main__ - Step 11192: {'lr': 0.0004939332020453111, 'samples': 537216, 'steps': 11191, 'loss/train': 0.13528381288051605} +07/25/2024 12:18:10 - INFO - __main__ - Step 11193: {'lr': 0.0004939320501200518, 'samples': 537264, 'steps': 11192, 'loss/train': 1.3429185152053833} +07/25/2024 12:18:10 - INFO - __main__ - Step 11194: {'lr': 0.0004939308980867861, 'samples': 537312, 'steps': 11193, 'loss/train': 2.657897710800171} +07/25/2024 12:18:11 - INFO - __main__ - Step 11195: {'lr': 0.0004939297459455148, 'samples': 537360, 'steps': 11194, 'loss/train': 1.8271701335906982} +07/25/2024 12:18:11 - INFO - __main__ - Step 11196: {'lr': 0.0004939285936962383, 'samples': 537408, 'steps': 11195, 'loss/train': 1.7921470403671265} +07/25/2024 12:18:11 - INFO - __main__ - Step 11197: {'lr': 0.0004939274413389568, 'samples': 537456, 'steps': 11196, 'loss/train': 1.554137945175171} +07/25/2024 12:18:12 - INFO - __main__ - Step 11198: {'lr': 0.0004939262888736713, 'samples': 537504, 'steps': 11197, 'loss/train': 2.340303421020508} +07/25/2024 12:18:12 - INFO - __main__ - Step 11199: {'lr': 0.000493925136300382, 'samples': 537552, 'steps': 11198, 'loss/train': 2.016481637954712} +07/25/2024 12:18:12 - INFO - __main__ - Step 11200: {'lr': 0.0004939239836190893, 'samples': 537600, 'steps': 11199, 'loss/train': 1.9752498865127563} +07/25/2024 12:18:12 - INFO - __main__ - Step 11201: {'lr': 0.0004939228308297941, 'samples': 537648, 'steps': 11200, 'loss/train': 1.740605115890503} +07/25/2024 12:18:13 - INFO - __main__ - Step 11202: {'lr': 0.0004939216779324966, 'samples': 537696, 'steps': 11201, 'loss/train': 1.6322177648544312} +07/25/2024 12:18:13 - INFO - __main__ - Step 11203: {'lr': 0.0004939205249271975, 'samples': 537744, 'steps': 11202, 'loss/train': 1.938724398612976} +07/25/2024 12:18:13 - INFO - __main__ - Step 11204: {'lr': 0.0004939193718138973, 'samples': 537792, 'steps': 11203, 'loss/train': 2.1328158378601074} +07/25/2024 12:18:14 - INFO - __main__ - Step 11205: {'lr': 0.0004939182185925963, 'samples': 537840, 'steps': 11204, 'loss/train': 2.5045509338378906} +07/25/2024 12:18:14 - INFO - __main__ - Step 11206: {'lr': 0.0004939170652632952, 'samples': 537888, 'steps': 11205, 'loss/train': 1.5182703733444214} +07/25/2024 12:18:14 - INFO - __main__ - Step 11207: {'lr': 0.0004939159118259944, 'samples': 537936, 'steps': 11206, 'loss/train': 2.081024408340454} +07/25/2024 12:18:14 - INFO - __main__ - Step 11208: {'lr': 0.0004939147582806946, 'samples': 537984, 'steps': 11207, 'loss/train': 1.9017603397369385} +07/25/2024 12:18:15 - INFO - __main__ - Step 11209: {'lr': 0.000493913604627396, 'samples': 538032, 'steps': 11208, 'loss/train': 2.240799903869629} +07/25/2024 12:18:15 - INFO - __main__ - Step 11210: {'lr': 0.0004939124508660993, 'samples': 538080, 'steps': 11209, 'loss/train': 2.070603609085083} +07/25/2024 12:18:15 - INFO - __main__ - Step 11211: {'lr': 0.0004939112969968052, 'samples': 538128, 'steps': 11210, 'loss/train': 1.7874374389648438} +07/25/2024 12:18:16 - INFO - __main__ - Step 11212: {'lr': 0.0004939101430195138, 'samples': 538176, 'steps': 11211, 'loss/train': 1.7288645505905151} +07/25/2024 12:18:16 - INFO - __main__ - Step 11213: {'lr': 0.000493908988934226, 'samples': 538224, 'steps': 11212, 'loss/train': 1.923672080039978} +07/25/2024 12:18:16 - INFO - __main__ - Step 11214: {'lr': 0.000493907834740942, 'samples': 538272, 'steps': 11213, 'loss/train': 1.6012094020843506} +07/25/2024 12:18:16 - INFO - __main__ - Step 11215: {'lr': 0.0004939066804396624, 'samples': 538320, 'steps': 11214, 'loss/train': 2.2210328578948975} +07/25/2024 12:18:17 - INFO - __main__ - Step 11216: {'lr': 0.000493905526030388, 'samples': 538368, 'steps': 11215, 'loss/train': 0.13964740931987762} +07/25/2024 12:18:17 - INFO - __main__ - Step 11217: {'lr': 0.0004939043715131189, 'samples': 538416, 'steps': 11216, 'loss/train': 1.6132009029388428} +07/25/2024 12:18:17 - INFO - __main__ - Step 11218: {'lr': 0.0004939032168878557, 'samples': 538464, 'steps': 11217, 'loss/train': 2.6997196674346924} +07/25/2024 12:18:18 - INFO - __main__ - Step 11219: {'lr': 0.0004939020621545991, 'samples': 538512, 'steps': 11218, 'loss/train': 1.287286639213562} +07/25/2024 12:18:18 - INFO - __main__ - Step 11220: {'lr': 0.0004939009073133496, 'samples': 538560, 'steps': 11219, 'loss/train': 1.420274257659912} +07/25/2024 12:18:18 - INFO - __main__ - Step 11221: {'lr': 0.0004938997523641074, 'samples': 538608, 'steps': 11220, 'loss/train': 1.5114881992340088} +07/25/2024 12:18:18 - INFO - __main__ - Step 11222: {'lr': 0.0004938985973068733, 'samples': 538656, 'steps': 11221, 'loss/train': 1.8476295471191406} +07/25/2024 12:18:19 - INFO - __main__ - Step 11223: {'lr': 0.0004938974421416478, 'samples': 538704, 'steps': 11222, 'loss/train': 2.2166378498077393} +07/25/2024 12:18:19 - INFO - __main__ - Step 11224: {'lr': 0.0004938962868684313, 'samples': 538752, 'steps': 11223, 'loss/train': 2.2164273262023926} +07/25/2024 12:18:19 - INFO - __main__ - Step 11225: {'lr': 0.0004938951314872244, 'samples': 538800, 'steps': 11224, 'loss/train': 2.1800599098205566} +07/25/2024 12:18:20 - INFO - __main__ - Step 11226: {'lr': 0.0004938939759980275, 'samples': 538848, 'steps': 11225, 'loss/train': 1.3282963037490845} +07/25/2024 12:18:20 - INFO - __main__ - Step 11227: {'lr': 0.0004938928204008412, 'samples': 538896, 'steps': 11226, 'loss/train': 2.286886215209961} +07/25/2024 12:18:20 - INFO - __main__ - Step 11228: {'lr': 0.000493891664695666, 'samples': 538944, 'steps': 11227, 'loss/train': 2.0690057277679443} +07/25/2024 12:18:20 - INFO - __main__ - Step 11229: {'lr': 0.0004938905088825024, 'samples': 538992, 'steps': 11228, 'loss/train': 1.6296566724777222} +07/25/2024 12:18:21 - INFO - __main__ - Step 11230: {'lr': 0.000493889352961351, 'samples': 539040, 'steps': 11229, 'loss/train': 1.8720885515213013} +07/25/2024 12:18:21 - INFO - __main__ - Step 11231: {'lr': 0.0004938881969322121, 'samples': 539088, 'steps': 11230, 'loss/train': 1.899421215057373} +07/25/2024 12:18:21 - INFO - __main__ - Step 11232: {'lr': 0.0004938870407950864, 'samples': 539136, 'steps': 11231, 'loss/train': 2.023919105529785} +07/25/2024 12:18:22 - INFO - __main__ - Step 11233: {'lr': 0.0004938858845499744, 'samples': 539184, 'steps': 11232, 'loss/train': 1.7559974193572998} +07/25/2024 12:18:22 - INFO - __main__ - Step 11234: {'lr': 0.0004938847281968765, 'samples': 539232, 'steps': 11233, 'loss/train': 2.1308703422546387} +07/25/2024 12:18:22 - INFO - __main__ - Step 11235: {'lr': 0.0004938835717357932, 'samples': 539280, 'steps': 11234, 'loss/train': 1.9712105989456177} +07/25/2024 12:18:22 - INFO - __main__ - Step 11236: {'lr': 0.0004938824151667253, 'samples': 539328, 'steps': 11235, 'loss/train': 1.6524748802185059} +07/25/2024 12:18:23 - INFO - __main__ - Step 11237: {'lr': 0.0004938812584896729, 'samples': 539376, 'steps': 11236, 'loss/train': 1.5367014408111572} +07/25/2024 12:18:23 - INFO - __main__ - Step 11238: {'lr': 0.0004938801017046368, 'samples': 539424, 'steps': 11237, 'loss/train': 1.6327712535858154} +07/25/2024 12:18:23 - INFO - __main__ - Step 11239: {'lr': 0.0004938789448116174, 'samples': 539472, 'steps': 11238, 'loss/train': 1.409110426902771} +07/25/2024 12:18:24 - INFO - __main__ - Step 11240: {'lr': 0.0004938777878106152, 'samples': 539520, 'steps': 11239, 'loss/train': 0.18029096722602844} +07/25/2024 12:18:24 - INFO - __main__ - Step 11241: {'lr': 0.0004938766307016309, 'samples': 539568, 'steps': 11240, 'loss/train': 1.8078538179397583} +07/25/2024 12:18:24 - INFO - __main__ - Step 11242: {'lr': 0.0004938754734846647, 'samples': 539616, 'steps': 11241, 'loss/train': 2.242309093475342} +07/25/2024 12:18:24 - INFO - __main__ - Step 11243: {'lr': 0.0004938743161597174, 'samples': 539664, 'steps': 11242, 'loss/train': 1.9524524211883545} +07/25/2024 12:18:25 - INFO - __main__ - Step 11244: {'lr': 0.0004938731587267892, 'samples': 539712, 'steps': 11243, 'loss/train': 1.705798864364624} +07/25/2024 12:18:25 - INFO - __main__ - Step 11245: {'lr': 0.000493872001185881, 'samples': 539760, 'steps': 11244, 'loss/train': 1.4067718982696533} +07/25/2024 12:18:25 - INFO - __main__ - Step 11246: {'lr': 0.000493870843536993, 'samples': 539808, 'steps': 11245, 'loss/train': 0.6428976655006409} +07/25/2024 12:18:26 - INFO - __main__ - Step 11247: {'lr': 0.0004938696857801259, 'samples': 539856, 'steps': 11246, 'loss/train': 1.766912579536438} +07/25/2024 12:18:26 - INFO - __main__ - Step 11248: {'lr': 0.00049386852791528, 'samples': 539904, 'steps': 11247, 'loss/train': 2.4994211196899414} +07/25/2024 12:18:26 - INFO - __main__ - Step 11249: {'lr': 0.0004938673699424561, 'samples': 539952, 'steps': 11248, 'loss/train': 1.9705965518951416} +07/25/2024 12:18:26 - INFO - __main__ - Step 11250: {'lr': 0.0004938662118616544, 'samples': 540000, 'steps': 11249, 'loss/train': 1.6491063833236694} +07/25/2024 12:18:27 - INFO - __main__ - Step 11251: {'lr': 0.0004938650536728757, 'samples': 540048, 'steps': 11250, 'loss/train': 2.4677045345306396} +07/25/2024 12:18:27 - INFO - __main__ - Step 11252: {'lr': 0.0004938638953761205, 'samples': 540096, 'steps': 11251, 'loss/train': 0.4364350438117981} +07/25/2024 12:18:27 - INFO - __main__ - Step 11253: {'lr': 0.0004938627369713889, 'samples': 540144, 'steps': 11252, 'loss/train': 2.139087438583374} +07/25/2024 12:18:28 - INFO - __main__ - Step 11254: {'lr': 0.000493861578458682, 'samples': 540192, 'steps': 11253, 'loss/train': 1.5520453453063965} +07/25/2024 12:18:28 - INFO - __main__ - Step 11255: {'lr': 0.0004938604198379998, 'samples': 540240, 'steps': 11254, 'loss/train': 2.0559933185577393} +07/25/2024 12:18:28 - INFO - __main__ - Step 11256: {'lr': 0.0004938592611093432, 'samples': 540288, 'steps': 11255, 'loss/train': 1.6636881828308105} +07/25/2024 12:18:28 - INFO - __main__ - Step 11257: {'lr': 0.0004938581022727124, 'samples': 540336, 'steps': 11256, 'loss/train': 1.6668787002563477} +07/25/2024 12:18:29 - INFO - __main__ - Step 11258: {'lr': 0.0004938569433281082, 'samples': 540384, 'steps': 11257, 'loss/train': 2.0356414318084717} +07/25/2024 12:18:29 - INFO - __main__ - Step 11259: {'lr': 0.000493855784275531, 'samples': 540432, 'steps': 11258, 'loss/train': 2.136620044708252} +07/25/2024 12:18:29 - INFO - __main__ - Step 11260: {'lr': 0.0004938546251149811, 'samples': 540480, 'steps': 11259, 'loss/train': 2.1132748126983643} +07/25/2024 12:18:29 - INFO - __main__ - Step 11261: {'lr': 0.0004938534658464594, 'samples': 540528, 'steps': 11260, 'loss/train': 1.9148128032684326} +07/25/2024 12:18:30 - INFO - __main__ - Step 11262: {'lr': 0.0004938523064699662, 'samples': 540576, 'steps': 11261, 'loss/train': 2.018207311630249} +07/25/2024 12:18:30 - INFO - __main__ - Step 11263: {'lr': 0.0004938511469855019, 'samples': 540624, 'steps': 11262, 'loss/train': 1.337515950202942} +07/25/2024 12:18:30 - INFO - __main__ - Step 11264: {'lr': 0.0004938499873930672, 'samples': 540672, 'steps': 11263, 'loss/train': 2.4021360874176025} +07/25/2024 12:18:31 - INFO - __main__ - Step 11265: {'lr': 0.0004938488276926626, 'samples': 540720, 'steps': 11264, 'loss/train': 1.879702091217041} +07/25/2024 12:18:31 - INFO - __main__ - Step 11266: {'lr': 0.0004938476678842886, 'samples': 540768, 'steps': 11265, 'loss/train': 1.6222965717315674} +07/25/2024 12:18:31 - INFO - __main__ - Step 11267: {'lr': 0.0004938465079679456, 'samples': 540816, 'steps': 11266, 'loss/train': 1.7289425134658813} +07/25/2024 12:18:31 - INFO - __main__ - Step 11268: {'lr': 0.0004938453479436342, 'samples': 540864, 'steps': 11267, 'loss/train': 2.28914737701416} +07/25/2024 12:18:32 - INFO - __main__ - Step 11269: {'lr': 0.000493844187811355, 'samples': 540912, 'steps': 11268, 'loss/train': 1.3716524839401245} +07/25/2024 12:18:32 - INFO - __main__ - Step 11270: {'lr': 0.0004938430275711084, 'samples': 540960, 'steps': 11269, 'loss/train': 1.7861014604568481} +07/25/2024 12:18:32 - INFO - __main__ - Step 11271: {'lr': 0.0004938418672228949, 'samples': 541008, 'steps': 11270, 'loss/train': 2.6115076541900635} +07/25/2024 12:18:33 - INFO - __main__ - Step 11272: {'lr': 0.0004938407067667151, 'samples': 541056, 'steps': 11271, 'loss/train': 1.0153385400772095} +07/25/2024 12:18:33 - INFO - __main__ - Step 11273: {'lr': 0.0004938395462025694, 'samples': 541104, 'steps': 11272, 'loss/train': 2.057879686355591} +07/25/2024 12:18:33 - INFO - __main__ - Step 11274: {'lr': 0.0004938383855304585, 'samples': 541152, 'steps': 11273, 'loss/train': 1.2963775396347046} +07/25/2024 12:18:33 - INFO - __main__ - Step 11275: {'lr': 0.0004938372247503826, 'samples': 541200, 'steps': 11274, 'loss/train': 2.414553642272949} +07/25/2024 12:18:34 - INFO - __main__ - Step 11276: {'lr': 0.0004938360638623426, 'samples': 541248, 'steps': 11275, 'loss/train': 1.996977686882019} +07/25/2024 12:18:34 - INFO - __main__ - Step 11277: {'lr': 0.0004938349028663387, 'samples': 541296, 'steps': 11276, 'loss/train': 1.9721992015838623} +07/25/2024 12:18:34 - INFO - __main__ - Step 11278: {'lr': 0.0004938337417623716, 'samples': 541344, 'steps': 11277, 'loss/train': 1.6805622577667236} +07/25/2024 12:18:35 - INFO - __main__ - Step 11279: {'lr': 0.0004938325805504417, 'samples': 541392, 'steps': 11278, 'loss/train': 2.068690538406372} +07/25/2024 12:18:35 - INFO - __main__ - Step 11280: {'lr': 0.0004938314192305496, 'samples': 541440, 'steps': 11279, 'loss/train': 2.0915932655334473} +07/25/2024 12:18:35 - INFO - __main__ - Step 11281: {'lr': 0.0004938302578026958, 'samples': 541488, 'steps': 11280, 'loss/train': 2.081768035888672} +07/25/2024 12:18:35 - INFO - __main__ - Step 11282: {'lr': 0.0004938290962668807, 'samples': 541536, 'steps': 11281, 'loss/train': 2.1377458572387695} +07/25/2024 12:18:36 - INFO - __main__ - Step 11283: {'lr': 0.000493827934623105, 'samples': 541584, 'steps': 11282, 'loss/train': 1.6987193822860718} +07/25/2024 12:18:36 - INFO - __main__ - Step 11284: {'lr': 0.0004938267728713692, 'samples': 541632, 'steps': 11283, 'loss/train': 2.136579751968384} +07/25/2024 12:18:36 - INFO - __main__ - Step 11285: {'lr': 0.0004938256110116736, 'samples': 541680, 'steps': 11284, 'loss/train': 1.366882562637329} +07/25/2024 12:18:37 - INFO - __main__ - Step 11286: {'lr': 0.0004938244490440189, 'samples': 541728, 'steps': 11285, 'loss/train': 2.249403476715088} +07/25/2024 12:18:37 - INFO - __main__ - Step 11287: {'lr': 0.0004938232869684055, 'samples': 541776, 'steps': 11286, 'loss/train': 2.167147397994995} +07/25/2024 12:18:37 - INFO - __main__ - Step 11288: {'lr': 0.000493822124784834, 'samples': 541824, 'steps': 11287, 'loss/train': 2.7649073600769043} +07/25/2024 12:18:37 - INFO - __main__ - Step 11289: {'lr': 0.000493820962493305, 'samples': 541872, 'steps': 11288, 'loss/train': 1.7968878746032715} +07/25/2024 12:18:38 - INFO - __main__ - Step 11290: {'lr': 0.0004938198000938188, 'samples': 541920, 'steps': 11289, 'loss/train': 2.0280370712280273} +07/25/2024 12:18:38 - INFO - __main__ - Step 11291: {'lr': 0.0004938186375863761, 'samples': 541968, 'steps': 11290, 'loss/train': 2.039571523666382} +07/25/2024 12:18:38 - INFO - __main__ - Step 11292: {'lr': 0.0004938174749709773, 'samples': 542016, 'steps': 11291, 'loss/train': 1.611053705215454} +07/25/2024 12:18:39 - INFO - __main__ - Step 11293: {'lr': 0.0004938163122476229, 'samples': 542064, 'steps': 11292, 'loss/train': 1.3033862113952637} +07/25/2024 12:18:39 - INFO - __main__ - Step 11294: {'lr': 0.0004938151494163136, 'samples': 542112, 'steps': 11293, 'loss/train': 1.6299281120300293} +07/25/2024 12:18:39 - INFO - __main__ - Step 11295: {'lr': 0.0004938139864770497, 'samples': 542160, 'steps': 11294, 'loss/train': 1.8221169710159302} +07/25/2024 12:18:39 - INFO - __main__ - Step 11296: {'lr': 0.0004938128234298319, 'samples': 542208, 'steps': 11295, 'loss/train': 1.7045968770980835} +07/25/2024 12:18:40 - INFO - __main__ - Step 11297: {'lr': 0.0004938116602746605, 'samples': 542256, 'steps': 11296, 'loss/train': 1.6589305400848389} +07/25/2024 12:18:40 - INFO - __main__ - Step 11298: {'lr': 0.0004938104970115361, 'samples': 542304, 'steps': 11297, 'loss/train': 1.6735807657241821} +07/25/2024 12:18:40 - INFO - __main__ - Step 11299: {'lr': 0.0004938093336404593, 'samples': 542352, 'steps': 11298, 'loss/train': 2.018564462661743} +07/25/2024 12:18:41 - INFO - __main__ - Step 11300: {'lr': 0.0004938081701614305, 'samples': 542400, 'steps': 11299, 'loss/train': 2.114964008331299} +07/25/2024 12:18:41 - INFO - __main__ - Step 11301: {'lr': 0.0004938070065744504, 'samples': 542448, 'steps': 11300, 'loss/train': 1.896474003791809} +07/25/2024 12:18:41 - INFO - __main__ - Step 11302: {'lr': 0.0004938058428795193, 'samples': 542496, 'steps': 11301, 'loss/train': 2.192704916000366} +07/25/2024 12:18:41 - INFO - __main__ - Step 11303: {'lr': 0.0004938046790766378, 'samples': 542544, 'steps': 11302, 'loss/train': 2.261615753173828} +07/25/2024 12:18:42 - INFO - __main__ - Step 11304: {'lr': 0.0004938035151658065, 'samples': 542592, 'steps': 11303, 'loss/train': 2.039691209793091} +07/25/2024 12:18:42 - INFO - __main__ - Step 11305: {'lr': 0.0004938023511470257, 'samples': 542640, 'steps': 11304, 'loss/train': 1.7673522233963013} +07/25/2024 12:18:42 - INFO - __main__ - Step 11306: {'lr': 0.0004938011870202962, 'samples': 542688, 'steps': 11305, 'loss/train': 1.7503504753112793} +07/25/2024 12:18:43 - INFO - __main__ - Step 11307: {'lr': 0.0004938000227856182, 'samples': 542736, 'steps': 11306, 'loss/train': 2.2599213123321533} +07/25/2024 12:18:43 - INFO - __main__ - Step 11308: {'lr': 0.0004937988584429925, 'samples': 542784, 'steps': 11307, 'loss/train': 1.7256592512130737} +07/25/2024 12:18:43 - INFO - __main__ - Step 11309: {'lr': 0.0004937976939924194, 'samples': 542832, 'steps': 11308, 'loss/train': 1.5586026906967163} +07/25/2024 12:18:43 - INFO - __main__ - Step 11310: {'lr': 0.0004937965294338996, 'samples': 542880, 'steps': 11309, 'loss/train': 2.08494234085083} +07/25/2024 12:18:44 - INFO - __main__ - Step 11311: {'lr': 0.0004937953647674336, 'samples': 542928, 'steps': 11310, 'loss/train': 1.3150947093963623} +07/25/2024 12:18:44 - INFO - __main__ - Step 11312: {'lr': 0.0004937941999930217, 'samples': 542976, 'steps': 11311, 'loss/train': 2.2682948112487793} +07/25/2024 12:18:44 - INFO - __main__ - Step 11313: {'lr': 0.0004937930351106647, 'samples': 543024, 'steps': 11312, 'loss/train': 1.936286211013794} +07/25/2024 12:18:45 - INFO - __main__ - Step 11314: {'lr': 0.0004937918701203628, 'samples': 543072, 'steps': 11313, 'loss/train': 1.5892784595489502} +07/25/2024 12:18:45 - INFO - __main__ - Step 11315: {'lr': 0.0004937907050221168, 'samples': 543120, 'steps': 11314, 'loss/train': 1.904233694076538} +07/25/2024 12:18:45 - INFO - __main__ - Step 11316: {'lr': 0.0004937895398159271, 'samples': 543168, 'steps': 11315, 'loss/train': 2.1952149868011475} +07/25/2024 12:18:45 - INFO - __main__ - Step 11317: {'lr': 0.0004937883745017942, 'samples': 543216, 'steps': 11316, 'loss/train': 1.405128002166748} +07/25/2024 12:18:46 - INFO - __main__ - Step 11318: {'lr': 0.0004937872090797187, 'samples': 543264, 'steps': 11317, 'loss/train': 1.798761010169983} +07/25/2024 12:18:46 - INFO - __main__ - Step 11319: {'lr': 0.0004937860435497009, 'samples': 543312, 'steps': 11318, 'loss/train': 2.305211305618286} +07/25/2024 12:18:46 - INFO - __main__ - Step 11320: {'lr': 0.0004937848779117415, 'samples': 543360, 'steps': 11319, 'loss/train': 1.9729121923446655} +07/25/2024 12:18:47 - INFO - __main__ - Step 11321: {'lr': 0.0004937837121658411, 'samples': 543408, 'steps': 11320, 'loss/train': 2.1090941429138184} +07/25/2024 12:18:47 - INFO - __main__ - Step 11322: {'lr': 0.0004937825463120001, 'samples': 543456, 'steps': 11321, 'loss/train': 1.2783584594726562} +07/25/2024 12:18:47 - INFO - __main__ - Step 11323: {'lr': 0.000493781380350219, 'samples': 543504, 'steps': 11322, 'loss/train': 1.6968837976455688} +07/25/2024 12:18:47 - INFO - __main__ - Step 11324: {'lr': 0.0004937802142804983, 'samples': 543552, 'steps': 11323, 'loss/train': 2.779417037963867} +07/25/2024 12:18:48 - INFO - __main__ - Step 11325: {'lr': 0.0004937790481028386, 'samples': 543600, 'steps': 11324, 'loss/train': 1.4067745208740234} +07/25/2024 12:18:48 - INFO - __main__ - Step 11326: {'lr': 0.0004937778818172403, 'samples': 543648, 'steps': 11325, 'loss/train': 1.8646095991134644} +07/25/2024 12:18:48 - INFO - __main__ - Step 11327: {'lr': 0.000493776715423704, 'samples': 543696, 'steps': 11326, 'loss/train': 2.321410655975342} +07/25/2024 12:18:49 - INFO - __main__ - Step 11328: {'lr': 0.0004937755489222302, 'samples': 543744, 'steps': 11327, 'loss/train': 2.009248971939087} +07/25/2024 12:18:49 - INFO - __main__ - Step 11329: {'lr': 0.0004937743823128194, 'samples': 543792, 'steps': 11328, 'loss/train': 0.9436272382736206} +07/25/2024 12:18:49 - INFO - __main__ - Step 11330: {'lr': 0.0004937732155954722, 'samples': 543840, 'steps': 11329, 'loss/train': 1.5741463899612427} +07/25/2024 12:18:49 - INFO - __main__ - Step 11331: {'lr': 0.000493772048770189, 'samples': 543888, 'steps': 11330, 'loss/train': 1.4232029914855957} +07/25/2024 12:18:50 - INFO - __main__ - Step 11332: {'lr': 0.0004937708818369705, 'samples': 543936, 'steps': 11331, 'loss/train': 2.1782760620117188} +07/25/2024 12:18:50 - INFO - __main__ - Step 11333: {'lr': 0.0004937697147958169, 'samples': 543984, 'steps': 11332, 'loss/train': 2.1663808822631836} +07/25/2024 12:18:50 - INFO - __main__ - Step 11334: {'lr': 0.000493768547646729, 'samples': 544032, 'steps': 11333, 'loss/train': 1.7653319835662842} +07/25/2024 12:18:50 - INFO - __main__ - Step 11335: {'lr': 0.0004937673803897071, 'samples': 544080, 'steps': 11334, 'loss/train': 1.87458336353302} +07/25/2024 12:18:51 - INFO - __main__ - Step 11336: {'lr': 0.000493766213024752, 'samples': 544128, 'steps': 11335, 'loss/train': 1.7755753993988037} +07/25/2024 12:18:51 - INFO - __main__ - Step 11337: {'lr': 0.000493765045551864, 'samples': 544176, 'steps': 11336, 'loss/train': 1.6669493913650513} +07/25/2024 12:18:51 - INFO - __main__ - Step 11338: {'lr': 0.0004937638779710436, 'samples': 544224, 'steps': 11337, 'loss/train': 1.90084707736969} +07/25/2024 12:18:52 - INFO - __main__ - Step 11339: {'lr': 0.0004937627102822914, 'samples': 544272, 'steps': 11338, 'loss/train': 1.8408366441726685} +07/25/2024 12:18:52 - INFO - __main__ - Step 11340: {'lr': 0.000493761542485608, 'samples': 544320, 'steps': 11339, 'loss/train': 2.0289595127105713} +07/25/2024 12:18:52 - INFO - __main__ - Step 11341: {'lr': 0.0004937603745809937, 'samples': 544368, 'steps': 11340, 'loss/train': 1.2152073383331299} +07/25/2024 12:18:52 - INFO - __main__ - Step 11342: {'lr': 0.0004937592065684492, 'samples': 544416, 'steps': 11341, 'loss/train': 2.095066547393799} +07/25/2024 12:18:53 - INFO - __main__ - Step 11343: {'lr': 0.0004937580384479749, 'samples': 544464, 'steps': 11342, 'loss/train': 2.363121747970581} +07/25/2024 12:18:53 - INFO - __main__ - Step 11344: {'lr': 0.0004937568702195715, 'samples': 544512, 'steps': 11343, 'loss/train': 1.8198999166488647} +07/25/2024 12:18:53 - INFO - __main__ - Step 11345: {'lr': 0.0004937557018832393, 'samples': 544560, 'steps': 11344, 'loss/train': 1.944460153579712} +07/25/2024 12:18:54 - INFO - __main__ - Step 11346: {'lr': 0.0004937545334389789, 'samples': 544608, 'steps': 11345, 'loss/train': 1.6881794929504395} +07/25/2024 12:18:54 - INFO - __main__ - Step 11347: {'lr': 0.0004937533648867908, 'samples': 544656, 'steps': 11346, 'loss/train': 2.063854694366455} +07/25/2024 12:18:54 - INFO - __main__ - Step 11348: {'lr': 0.0004937521962266755, 'samples': 544704, 'steps': 11347, 'loss/train': 2.9250905513763428} +07/25/2024 12:18:54 - INFO - __main__ - Step 11349: {'lr': 0.0004937510274586336, 'samples': 544752, 'steps': 11348, 'loss/train': 2.421271800994873} +07/25/2024 12:18:55 - INFO - __main__ - Step 11350: {'lr': 0.0004937498585826656, 'samples': 544800, 'steps': 11349, 'loss/train': 1.9533405303955078} +07/25/2024 12:18:55 - INFO - __main__ - Step 11351: {'lr': 0.0004937486895987721, 'samples': 544848, 'steps': 11350, 'loss/train': 2.26395320892334} +07/25/2024 12:18:55 - INFO - __main__ - Step 11352: {'lr': 0.0004937475205069534, 'samples': 544896, 'steps': 11351, 'loss/train': 2.365250587463379} +07/25/2024 12:18:56 - INFO - __main__ - Step 11353: {'lr': 0.0004937463513072101, 'samples': 544944, 'steps': 11352, 'loss/train': 1.8226351737976074} +07/25/2024 12:18:56 - INFO - __main__ - Step 11354: {'lr': 0.0004937451819995428, 'samples': 544992, 'steps': 11353, 'loss/train': 1.6270108222961426} +07/25/2024 12:18:56 - INFO - __main__ - Step 11355: {'lr': 0.0004937440125839518, 'samples': 545040, 'steps': 11354, 'loss/train': 1.6366270780563354} +07/25/2024 12:18:56 - INFO - __main__ - Step 11356: {'lr': 0.000493742843060438, 'samples': 545088, 'steps': 11355, 'loss/train': 2.0177931785583496} +07/25/2024 12:18:57 - INFO - __main__ - Step 11357: {'lr': 0.0004937416734290016, 'samples': 545136, 'steps': 11356, 'loss/train': 1.7789409160614014} +07/25/2024 12:18:57 - INFO - __main__ - Step 11358: {'lr': 0.0004937405036896433, 'samples': 545184, 'steps': 11357, 'loss/train': 1.6621798276901245} +07/25/2024 12:18:57 - INFO - __main__ - Step 11359: {'lr': 0.0004937393338423635, 'samples': 545232, 'steps': 11358, 'loss/train': 0.9189757704734802} +07/25/2024 12:18:58 - INFO - __main__ - Step 11360: {'lr': 0.0004937381638871626, 'samples': 545280, 'steps': 11359, 'loss/train': 2.0776326656341553} +07/25/2024 12:18:58 - INFO - __main__ - Step 11361: {'lr': 0.0004937369938240414, 'samples': 545328, 'steps': 11360, 'loss/train': 1.4830015897750854} +07/25/2024 12:18:58 - INFO - __main__ - Step 11362: {'lr': 0.0004937358236530003, 'samples': 545376, 'steps': 11361, 'loss/train': 1.9360533952713013} +07/25/2024 12:18:58 - INFO - __main__ - Step 11363: {'lr': 0.0004937346533740398, 'samples': 545424, 'steps': 11362, 'loss/train': 2.0074222087860107} +07/25/2024 12:18:59 - INFO - __main__ - Step 11364: {'lr': 0.0004937334829871604, 'samples': 545472, 'steps': 11363, 'loss/train': 2.2951250076293945} +07/25/2024 12:18:59 - INFO - __main__ - Step 11365: {'lr': 0.0004937323124923626, 'samples': 545520, 'steps': 11364, 'loss/train': 2.039980411529541} +07/25/2024 12:18:59 - INFO - __main__ - Step 11366: {'lr': 0.000493731141889647, 'samples': 545568, 'steps': 11365, 'loss/train': 2.300882339477539} +07/25/2024 12:19:00 - INFO - __main__ - Step 11367: {'lr': 0.0004937299711790142, 'samples': 545616, 'steps': 11366, 'loss/train': 1.8776781558990479} +07/25/2024 12:19:00 - INFO - __main__ - Step 11368: {'lr': 0.0004937288003604645, 'samples': 545664, 'steps': 11367, 'loss/train': 1.3106247186660767} +07/25/2024 12:19:00 - INFO - __main__ - Step 11369: {'lr': 0.0004937276294339984, 'samples': 545712, 'steps': 11368, 'loss/train': 1.978468418121338} +07/25/2024 12:19:00 - INFO - __main__ - Step 11370: {'lr': 0.0004937264583996168, 'samples': 545760, 'steps': 11369, 'loss/train': 1.8036857843399048} +07/25/2024 12:19:01 - INFO - __main__ - Step 11371: {'lr': 0.0004937252872573197, 'samples': 545808, 'steps': 11370, 'loss/train': 2.114375114440918} +07/25/2024 12:19:01 - INFO - __main__ - Step 11372: {'lr': 0.000493724116007108, 'samples': 545856, 'steps': 11371, 'loss/train': 2.845255136489868} +07/25/2024 12:19:01 - INFO - __main__ - Step 11373: {'lr': 0.000493722944648982, 'samples': 545904, 'steps': 11372, 'loss/train': 1.6736140251159668} +07/25/2024 12:19:02 - INFO - __main__ - Step 11374: {'lr': 0.0004937217731829424, 'samples': 545952, 'steps': 11373, 'loss/train': 2.163414478302002} +07/25/2024 12:19:02 - INFO - __main__ - Step 11375: {'lr': 0.0004937206016089895, 'samples': 546000, 'steps': 11374, 'loss/train': 1.796369194984436} +07/25/2024 12:19:02 - INFO - __main__ - Step 11376: {'lr': 0.0004937194299271242, 'samples': 546048, 'steps': 11375, 'loss/train': 1.9171572923660278} +07/25/2024 12:19:02 - INFO - __main__ - Step 11377: {'lr': 0.0004937182581373465, 'samples': 546096, 'steps': 11376, 'loss/train': 1.5838407278060913} +07/25/2024 12:19:03 - INFO - __main__ - Step 11378: {'lr': 0.0004937170862396573, 'samples': 546144, 'steps': 11377, 'loss/train': 1.3011294603347778} +07/25/2024 12:19:03 - INFO - __main__ - Step 11379: {'lr': 0.000493715914234057, 'samples': 546192, 'steps': 11378, 'loss/train': 1.7442233562469482} +07/25/2024 12:19:03 - INFO - __main__ - Step 11380: {'lr': 0.0004937147421205461, 'samples': 546240, 'steps': 11379, 'loss/train': 1.9949910640716553} +07/25/2024 12:19:04 - INFO - __main__ - Step 11381: {'lr': 0.0004937135698991252, 'samples': 546288, 'steps': 11380, 'loss/train': 1.9484808444976807} +07/25/2024 12:19:04 - INFO - __main__ - Step 11382: {'lr': 0.0004937123975697947, 'samples': 546336, 'steps': 11381, 'loss/train': 1.4659135341644287} +07/25/2024 12:19:04 - INFO - __main__ - Step 11383: {'lr': 0.0004937112251325552, 'samples': 546384, 'steps': 11382, 'loss/train': 1.599247932434082} +07/25/2024 12:19:04 - INFO - __main__ - Step 11384: {'lr': 0.0004937100525874072, 'samples': 546432, 'steps': 11383, 'loss/train': 2.034899950027466} +07/25/2024 12:19:05 - INFO - __main__ - Step 11385: {'lr': 0.0004937088799343512, 'samples': 546480, 'steps': 11384, 'loss/train': 2.267782688140869} +07/25/2024 12:19:05 - INFO - __main__ - Step 11386: {'lr': 0.0004937077071733878, 'samples': 546528, 'steps': 11385, 'loss/train': 1.892149806022644} +07/25/2024 12:19:05 - INFO - __main__ - Step 11387: {'lr': 0.0004937065343045174, 'samples': 546576, 'steps': 11386, 'loss/train': 1.9898631572723389} +07/25/2024 12:19:06 - INFO - __main__ - Step 11388: {'lr': 0.0004937053613277406, 'samples': 546624, 'steps': 11387, 'loss/train': 2.0295610427856445} +07/25/2024 12:19:06 - INFO - __main__ - Step 11389: {'lr': 0.0004937041882430578, 'samples': 546672, 'steps': 11388, 'loss/train': 1.72440767288208} +07/25/2024 12:19:06 - INFO - __main__ - Step 11390: {'lr': 0.0004937030150504697, 'samples': 546720, 'steps': 11389, 'loss/train': 1.222494125366211} +07/25/2024 12:19:06 - INFO - __main__ - Step 11391: {'lr': 0.0004937018417499767, 'samples': 546768, 'steps': 11390, 'loss/train': 1.5319151878356934} +07/25/2024 12:19:07 - INFO - __main__ - Step 11392: {'lr': 0.0004937006683415794, 'samples': 546816, 'steps': 11391, 'loss/train': 2.0525963306427} +07/25/2024 12:19:07 - INFO - __main__ - Step 11393: {'lr': 0.0004936994948252783, 'samples': 546864, 'steps': 11392, 'loss/train': 2.411499261856079} +07/25/2024 12:19:07 - INFO - __main__ - Step 11394: {'lr': 0.0004936983212010739, 'samples': 546912, 'steps': 11393, 'loss/train': 2.002727746963501} +07/25/2024 12:19:08 - INFO - __main__ - Step 11395: {'lr': 0.0004936971474689666, 'samples': 546960, 'steps': 11394, 'loss/train': 1.977837085723877} +07/25/2024 12:19:08 - INFO - __main__ - Step 11396: {'lr': 0.000493695973628957, 'samples': 547008, 'steps': 11395, 'loss/train': 2.3727142810821533} +07/25/2024 12:19:08 - INFO - __main__ - Step 11397: {'lr': 0.0004936947996810457, 'samples': 547056, 'steps': 11396, 'loss/train': 1.9858113527297974} +07/25/2024 12:19:08 - INFO - __main__ - Step 11398: {'lr': 0.0004936936256252333, 'samples': 547104, 'steps': 11397, 'loss/train': 2.61946964263916} +07/25/2024 12:19:09 - INFO - __main__ - Step 11399: {'lr': 0.0004936924514615201, 'samples': 547152, 'steps': 11398, 'loss/train': 2.017848253250122} +07/25/2024 12:19:09 - INFO - __main__ - Step 11400: {'lr': 0.0004936912771899066, 'samples': 547200, 'steps': 11399, 'loss/train': 1.834221601486206} +07/25/2024 12:19:09 - INFO - __main__ - Step 11401: {'lr': 0.0004936901028103936, 'samples': 547248, 'steps': 11400, 'loss/train': 1.8728052377700806} +07/25/2024 12:19:10 - INFO - __main__ - Step 11402: {'lr': 0.0004936889283229814, 'samples': 547296, 'steps': 11401, 'loss/train': 1.8117210865020752} +07/25/2024 12:19:10 - INFO - __main__ - Step 11403: {'lr': 0.0004936877537276706, 'samples': 547344, 'steps': 11402, 'loss/train': 2.1811037063598633} +07/25/2024 12:19:10 - INFO - __main__ - Step 11404: {'lr': 0.0004936865790244615, 'samples': 547392, 'steps': 11403, 'loss/train': 1.6521364450454712} +07/25/2024 12:19:10 - INFO - __main__ - Step 11405: {'lr': 0.0004936854042133551, 'samples': 547440, 'steps': 11404, 'loss/train': 1.8783137798309326} +07/25/2024 12:19:11 - INFO - __main__ - Step 11406: {'lr': 0.0004936842292943514, 'samples': 547488, 'steps': 11405, 'loss/train': 1.7807278633117676} +07/25/2024 12:19:11 - INFO - __main__ - Step 11407: {'lr': 0.0004936830542674513, 'samples': 547536, 'steps': 11406, 'loss/train': 2.266333818435669} +07/25/2024 12:19:11 - INFO - __main__ - Step 11408: {'lr': 0.0004936818791326551, 'samples': 547584, 'steps': 11407, 'loss/train': 1.9050532579421997} +07/25/2024 12:19:12 - INFO - __main__ - Step 11409: {'lr': 0.0004936807038899634, 'samples': 547632, 'steps': 11408, 'loss/train': 1.4031178951263428} +07/25/2024 12:19:12 - INFO - __main__ - Step 11410: {'lr': 0.0004936795285393767, 'samples': 547680, 'steps': 11409, 'loss/train': 1.7609165906906128} +07/25/2024 12:19:12 - INFO - __main__ - Step 11411: {'lr': 0.0004936783530808956, 'samples': 547728, 'steps': 11410, 'loss/train': 1.7017890214920044} +07/25/2024 12:19:12 - INFO - __main__ - Step 11412: {'lr': 0.0004936771775145204, 'samples': 547776, 'steps': 11411, 'loss/train': 1.1491578817367554} +07/25/2024 12:19:13 - INFO - __main__ - Step 11413: {'lr': 0.000493676001840252, 'samples': 547824, 'steps': 11412, 'loss/train': 1.8621301651000977} +07/25/2024 12:19:13 - INFO - __main__ - Step 11414: {'lr': 0.0004936748260580905, 'samples': 547872, 'steps': 11413, 'loss/train': 1.5440137386322021} +07/25/2024 12:19:13 - INFO - __main__ - Step 11415: {'lr': 0.0004936736501680366, 'samples': 547920, 'steps': 11414, 'loss/train': 1.5233442783355713} +07/25/2024 12:19:13 - INFO - __main__ - Step 11416: {'lr': 0.000493672474170091, 'samples': 547968, 'steps': 11415, 'loss/train': 1.8369948863983154} +07/25/2024 12:19:14 - INFO - __main__ - Step 11417: {'lr': 0.0004936712980642539, 'samples': 548016, 'steps': 11416, 'loss/train': 2.0837762355804443} +07/25/2024 12:19:14 - INFO - __main__ - Step 11418: {'lr': 0.0004936701218505262, 'samples': 548064, 'steps': 11417, 'loss/train': 2.046025276184082} +07/25/2024 12:19:14 - INFO - __main__ - Step 11419: {'lr': 0.000493668945528908, 'samples': 548112, 'steps': 11418, 'loss/train': 2.0421085357666016} +07/25/2024 12:19:15 - INFO - __main__ - Step 11420: {'lr': 0.0004936677690994, 'samples': 548160, 'steps': 11419, 'loss/train': 2.5586130619049072} +07/25/2024 12:19:15 - INFO - __main__ - Step 11421: {'lr': 0.0004936665925620029, 'samples': 548208, 'steps': 11420, 'loss/train': 1.807647466659546} +07/25/2024 12:19:15 - INFO - __main__ - Step 11422: {'lr': 0.000493665415916717, 'samples': 548256, 'steps': 11421, 'loss/train': 1.5379160642623901} +07/25/2024 12:19:15 - INFO - __main__ - Step 11423: {'lr': 0.0004936642391635428, 'samples': 548304, 'steps': 11422, 'loss/train': 1.5242081880569458} +07/25/2024 12:19:16 - INFO - __main__ - Step 11424: {'lr': 0.000493663062302481, 'samples': 548352, 'steps': 11423, 'loss/train': 2.034515619277954} +07/25/2024 12:19:16 - INFO - __main__ - Step 11425: {'lr': 0.0004936618853335319, 'samples': 548400, 'steps': 11424, 'loss/train': 2.1371612548828125} +07/25/2024 12:19:16 - INFO - __main__ - Step 11426: {'lr': 0.0004936607082566963, 'samples': 548448, 'steps': 11425, 'loss/train': 1.9939656257629395} +07/25/2024 12:19:17 - INFO - __main__ - Step 11427: {'lr': 0.0004936595310719744, 'samples': 548496, 'steps': 11426, 'loss/train': 1.834157109260559} +07/25/2024 12:19:17 - INFO - __main__ - Step 11428: {'lr': 0.0004936583537793669, 'samples': 548544, 'steps': 11427, 'loss/train': 1.7631651163101196} +07/25/2024 12:19:17 - INFO - __main__ - Step 11429: {'lr': 0.0004936571763788745, 'samples': 548592, 'steps': 11428, 'loss/train': 1.905903935432434} +07/25/2024 12:19:17 - INFO - __main__ - Step 11430: {'lr': 0.0004936559988704973, 'samples': 548640, 'steps': 11429, 'loss/train': 1.7944068908691406} +07/25/2024 12:19:18 - INFO - __main__ - Step 11431: {'lr': 0.0004936548212542362, 'samples': 548688, 'steps': 11430, 'loss/train': 2.447518825531006} +07/25/2024 12:19:18 - INFO - __main__ - Step 11432: {'lr': 0.0004936536435300916, 'samples': 548736, 'steps': 11431, 'loss/train': 1.636064887046814} +07/25/2024 12:19:18 - INFO - __main__ - Step 11433: {'lr': 0.0004936524656980639, 'samples': 548784, 'steps': 11432, 'loss/train': 1.7438642978668213} +07/25/2024 12:19:19 - INFO - __main__ - Step 11434: {'lr': 0.0004936512877581537, 'samples': 548832, 'steps': 11433, 'loss/train': 2.2386536598205566} +07/25/2024 12:19:19 - INFO - __main__ - Step 11435: {'lr': 0.0004936501097103617, 'samples': 548880, 'steps': 11434, 'loss/train': 1.9790852069854736} +07/25/2024 12:19:19 - INFO - __main__ - Step 11436: {'lr': 0.0004936489315546881, 'samples': 548928, 'steps': 11435, 'loss/train': 2.188676357269287} +07/25/2024 12:19:19 - INFO - __main__ - Step 11437: {'lr': 0.0004936477532911336, 'samples': 548976, 'steps': 11436, 'loss/train': 1.8733073472976685} +07/25/2024 12:19:20 - INFO - __main__ - Step 11438: {'lr': 0.0004936465749196987, 'samples': 549024, 'steps': 11437, 'loss/train': 2.1320831775665283} +07/25/2024 12:19:20 - INFO - __main__ - Step 11439: {'lr': 0.000493645396440384, 'samples': 549072, 'steps': 11438, 'loss/train': 1.5574579238891602} +07/25/2024 12:19:20 - INFO - __main__ - Step 11440: {'lr': 0.0004936442178531898, 'samples': 549120, 'steps': 11439, 'loss/train': 1.7829835414886475} +07/25/2024 12:19:21 - INFO - __main__ - Step 11441: {'lr': 0.000493643039158117, 'samples': 549168, 'steps': 11440, 'loss/train': 1.9888579845428467} +07/25/2024 12:19:21 - INFO - __main__ - Step 11442: {'lr': 0.0004936418603551656, 'samples': 549216, 'steps': 11441, 'loss/train': 2.4953513145446777} +07/25/2024 12:19:21 - INFO - __main__ - Step 11443: {'lr': 0.0004936406814443366, 'samples': 549264, 'steps': 11442, 'loss/train': 2.2150137424468994} +07/25/2024 12:19:21 - INFO - __main__ - Step 11444: {'lr': 0.0004936395024256303, 'samples': 549312, 'steps': 11443, 'loss/train': 1.8369436264038086} +07/25/2024 12:19:22 - INFO - __main__ - Step 11445: {'lr': 0.0004936383232990473, 'samples': 549360, 'steps': 11444, 'loss/train': 2.449014186859131} +07/25/2024 12:19:22 - INFO - __main__ - Step 11446: {'lr': 0.000493637144064588, 'samples': 549408, 'steps': 11445, 'loss/train': 1.9757732152938843} +07/25/2024 12:19:22 - INFO - __main__ - Step 11447: {'lr': 0.000493635964722253, 'samples': 549456, 'steps': 11446, 'loss/train': 1.5389013290405273} +07/25/2024 12:19:23 - INFO - __main__ - Step 11448: {'lr': 0.0004936347852720428, 'samples': 549504, 'steps': 11447, 'loss/train': 2.2359023094177246} +07/25/2024 12:19:23 - INFO - __main__ - Step 11449: {'lr': 0.000493633605713958, 'samples': 549552, 'steps': 11448, 'loss/train': 1.4614065885543823} +07/25/2024 12:19:23 - INFO - __main__ - Step 11450: {'lr': 0.0004936324260479991, 'samples': 549600, 'steps': 11449, 'loss/train': 1.95305335521698} +07/25/2024 12:19:23 - INFO - __main__ - Step 11451: {'lr': 0.0004936312462741664, 'samples': 549648, 'steps': 11450, 'loss/train': 1.1188896894454956} +07/25/2024 12:19:24 - INFO - __main__ - Step 11452: {'lr': 0.0004936300663924609, 'samples': 549696, 'steps': 11451, 'loss/train': 2.1169674396514893} +07/25/2024 12:19:24 - INFO - __main__ - Step 11453: {'lr': 0.0004936288864028826, 'samples': 549744, 'steps': 11452, 'loss/train': 1.8876780271530151} +07/25/2024 12:19:24 - INFO - __main__ - Step 11454: {'lr': 0.0004936277063054324, 'samples': 549792, 'steps': 11453, 'loss/train': 1.8622674942016602} +07/25/2024 12:19:25 - INFO - __main__ - Step 11455: {'lr': 0.0004936265261001106, 'samples': 549840, 'steps': 11454, 'loss/train': 1.8998181819915771} +07/25/2024 12:19:25 - INFO - __main__ - Step 11456: {'lr': 0.0004936253457869178, 'samples': 549888, 'steps': 11455, 'loss/train': 1.8915834426879883} +07/25/2024 12:19:25 - INFO - __main__ - Step 11457: {'lr': 0.0004936241653658545, 'samples': 549936, 'steps': 11456, 'loss/train': 1.8591737747192383} +07/25/2024 12:19:25 - INFO - __main__ - Step 11458: {'lr': 0.0004936229848369213, 'samples': 549984, 'steps': 11457, 'loss/train': 1.9906646013259888} +07/25/2024 12:19:26 - INFO - __main__ - Step 11459: {'lr': 0.0004936218042001187, 'samples': 550032, 'steps': 11458, 'loss/train': 1.7640403509140015} +07/25/2024 12:19:26 - INFO - __main__ - Step 11460: {'lr': 0.0004936206234554472, 'samples': 550080, 'steps': 11459, 'loss/train': 2.4021828174591064} +07/25/2024 12:19:26 - INFO - __main__ - Step 11461: {'lr': 0.0004936194426029072, 'samples': 550128, 'steps': 11460, 'loss/train': 2.119253635406494} +07/25/2024 12:19:27 - INFO - __main__ - Step 11462: {'lr': 0.0004936182616424994, 'samples': 550176, 'steps': 11461, 'loss/train': 1.4630980491638184} +07/25/2024 12:19:27 - INFO - __main__ - Step 11463: {'lr': 0.0004936170805742242, 'samples': 550224, 'steps': 11462, 'loss/train': 2.0112051963806152} +07/25/2024 12:19:27 - INFO - __main__ - Step 11464: {'lr': 0.0004936158993980823, 'samples': 550272, 'steps': 11463, 'loss/train': 1.388871431350708} +07/25/2024 12:19:27 - INFO - __main__ - Step 11465: {'lr': 0.000493614718114074, 'samples': 550320, 'steps': 11464, 'loss/train': 1.7643309831619263} +07/25/2024 12:19:28 - INFO - __main__ - Step 11466: {'lr': 0.0004936135367222, 'samples': 550368, 'steps': 11465, 'loss/train': 1.8127995729446411} +07/25/2024 12:19:28 - INFO - __main__ - Step 11467: {'lr': 0.0004936123552224608, 'samples': 550416, 'steps': 11466, 'loss/train': 2.091418504714966} +07/25/2024 12:19:28 - INFO - __main__ - Step 11468: {'lr': 0.0004936111736148567, 'samples': 550464, 'steps': 11467, 'loss/train': 2.0242221355438232} +07/25/2024 12:19:29 - INFO - __main__ - Step 11469: {'lr': 0.0004936099918993885, 'samples': 550512, 'steps': 11468, 'loss/train': 1.4457868337631226} +07/25/2024 12:19:29 - INFO - __main__ - Step 11470: {'lr': 0.0004936088100760567, 'samples': 550560, 'steps': 11469, 'loss/train': 1.5582902431488037} +07/25/2024 12:19:29 - INFO - __main__ - Step 11471: {'lr': 0.0004936076281448616, 'samples': 550608, 'steps': 11470, 'loss/train': 2.058232545852661} +07/25/2024 12:19:29 - INFO - __main__ - Step 11472: {'lr': 0.0004936064461058039, 'samples': 550656, 'steps': 11471, 'loss/train': 1.913331151008606} +07/25/2024 12:19:30 - INFO - __main__ - Step 11473: {'lr': 0.0004936052639588842, 'samples': 550704, 'steps': 11472, 'loss/train': 1.5006604194641113} +07/25/2024 12:19:30 - INFO - __main__ - Step 11474: {'lr': 0.0004936040817041027, 'samples': 550752, 'steps': 11473, 'loss/train': 1.8061128854751587} +07/25/2024 12:19:30 - INFO - __main__ - Step 11475: {'lr': 0.0004936028993414603, 'samples': 550800, 'steps': 11474, 'loss/train': 1.2631653547286987} +07/25/2024 12:19:31 - INFO - __main__ - Step 11476: {'lr': 0.0004936017168709574, 'samples': 550848, 'steps': 11475, 'loss/train': 2.2126920223236084} +07/25/2024 12:19:31 - INFO - __main__ - Step 11477: {'lr': 0.0004936005342925944, 'samples': 550896, 'steps': 11476, 'loss/train': 1.817429542541504} +07/25/2024 12:19:31 - INFO - __main__ - Step 11478: {'lr': 0.0004935993516063719, 'samples': 550944, 'steps': 11477, 'loss/train': 2.2216813564300537} +07/25/2024 12:19:31 - INFO - __main__ - Step 11479: {'lr': 0.0004935981688122905, 'samples': 550992, 'steps': 11478, 'loss/train': 2.379631757736206} +07/25/2024 12:19:32 - INFO - __main__ - Step 11480: {'lr': 0.0004935969859103507, 'samples': 551040, 'steps': 11479, 'loss/train': 2.2061080932617188} +07/25/2024 12:19:32 - INFO - __main__ - Step 11481: {'lr': 0.0004935958029005528, 'samples': 551088, 'steps': 11480, 'loss/train': 1.878494381904602} +07/25/2024 12:19:32 - INFO - __main__ - Step 11482: {'lr': 0.0004935946197828976, 'samples': 551136, 'steps': 11481, 'loss/train': 1.0899230241775513} +07/25/2024 12:19:33 - INFO - __main__ - Step 11483: {'lr': 0.0004935934365573856, 'samples': 551184, 'steps': 11482, 'loss/train': 1.931835412979126} +07/25/2024 12:19:33 - INFO - __main__ - Step 11484: {'lr': 0.0004935922532240171, 'samples': 551232, 'steps': 11483, 'loss/train': 2.2115442752838135} +07/25/2024 12:19:33 - INFO - __main__ - Step 11485: {'lr': 0.0004935910697827929, 'samples': 551280, 'steps': 11484, 'loss/train': 2.0761218070983887} +07/25/2024 12:19:33 - INFO - __main__ - Step 11486: {'lr': 0.0004935898862337134, 'samples': 551328, 'steps': 11485, 'loss/train': 1.4813406467437744} +07/25/2024 12:19:34 - INFO - __main__ - Step 11487: {'lr': 0.0004935887025767791, 'samples': 551376, 'steps': 11486, 'loss/train': 1.911556601524353} +07/25/2024 12:19:34 - INFO - __main__ - Step 11488: {'lr': 0.0004935875188119905, 'samples': 551424, 'steps': 11487, 'loss/train': 1.719336748123169} +07/25/2024 12:19:34 - INFO - __main__ - Step 11489: {'lr': 0.0004935863349393482, 'samples': 551472, 'steps': 11488, 'loss/train': 1.7489458322525024} +07/25/2024 12:19:35 - INFO - __main__ - Step 11490: {'lr': 0.0004935851509588527, 'samples': 551520, 'steps': 11489, 'loss/train': 1.5939843654632568} +07/25/2024 12:19:35 - INFO - __main__ - Step 11491: {'lr': 0.0004935839668705046, 'samples': 551568, 'steps': 11490, 'loss/train': 2.3921210765838623} +07/25/2024 12:19:35 - INFO - __main__ - Step 11492: {'lr': 0.0004935827826743043, 'samples': 551616, 'steps': 11491, 'loss/train': 2.2711119651794434} +07/25/2024 12:19:35 - INFO - __main__ - Step 11493: {'lr': 0.0004935815983702522, 'samples': 551664, 'steps': 11492, 'loss/train': 1.9103944301605225} +07/25/2024 12:19:36 - INFO - __main__ - Step 11494: {'lr': 0.0004935804139583492, 'samples': 551712, 'steps': 11493, 'loss/train': 1.2937101125717163} +07/25/2024 12:19:36 - INFO - __main__ - Step 11495: {'lr': 0.0004935792294385955, 'samples': 551760, 'steps': 11494, 'loss/train': 2.1804897785186768} +07/25/2024 12:19:36 - INFO - __main__ - Step 11496: {'lr': 0.0004935780448109918, 'samples': 551808, 'steps': 11495, 'loss/train': 1.9628902673721313} +07/25/2024 12:19:37 - INFO - __main__ - Step 11497: {'lr': 0.0004935768600755386, 'samples': 551856, 'steps': 11496, 'loss/train': 1.7200100421905518} +07/25/2024 12:19:37 - INFO - __main__ - Step 11498: {'lr': 0.0004935756752322363, 'samples': 551904, 'steps': 11497, 'loss/train': 2.068880796432495} +07/25/2024 12:19:37 - INFO - __main__ - Step 11499: {'lr': 0.0004935744902810856, 'samples': 551952, 'steps': 11498, 'loss/train': 0.9772506356239319} +07/25/2024 12:19:37 - INFO - __main__ - Step 11500: {'lr': 0.0004935733052220868, 'samples': 552000, 'steps': 11499, 'loss/train': 1.6423685550689697} +07/25/2024 12:19:38 - INFO - __main__ - Step 11501: {'lr': 0.0004935721200552408, 'samples': 552048, 'steps': 11500, 'loss/train': 2.2700278759002686} +07/25/2024 12:19:38 - INFO - __main__ - Step 11502: {'lr': 0.0004935709347805477, 'samples': 552096, 'steps': 11501, 'loss/train': 1.7871429920196533} +07/25/2024 12:19:38 - INFO - __main__ - Step 11503: {'lr': 0.0004935697493980083, 'samples': 552144, 'steps': 11502, 'loss/train': 2.094240427017212} +07/25/2024 12:19:38 - INFO - __main__ - Step 11504: {'lr': 0.000493568563907623, 'samples': 552192, 'steps': 11503, 'loss/train': 2.0258543491363525} +07/25/2024 12:19:39 - INFO - __main__ - Step 11505: {'lr': 0.0004935673783093924, 'samples': 552240, 'steps': 11504, 'loss/train': 1.584610104560852} +07/25/2024 12:19:39 - INFO - __main__ - Step 11506: {'lr': 0.000493566192603317, 'samples': 552288, 'steps': 11505, 'loss/train': 1.8211909532546997} +07/25/2024 12:19:39 - INFO - __main__ - Step 11507: {'lr': 0.0004935650067893972, 'samples': 552336, 'steps': 11506, 'loss/train': 2.166975975036621} +07/25/2024 12:19:40 - INFO - __main__ - Step 11508: {'lr': 0.0004935638208676338, 'samples': 552384, 'steps': 11507, 'loss/train': 1.9941383600234985} +07/25/2024 12:19:40 - INFO - __main__ - Step 11509: {'lr': 0.000493562634838027, 'samples': 552432, 'steps': 11508, 'loss/train': 1.6023955345153809} +07/25/2024 12:19:40 - INFO - __main__ - Step 11510: {'lr': 0.0004935614487005777, 'samples': 552480, 'steps': 11509, 'loss/train': 1.6510518789291382} +07/25/2024 12:19:40 - INFO - __main__ - Step 11511: {'lr': 0.000493560262455286, 'samples': 552528, 'steps': 11510, 'loss/train': 1.1409803628921509} +07/25/2024 12:19:41 - INFO - __main__ - Step 11512: {'lr': 0.0004935590761021527, 'samples': 552576, 'steps': 11511, 'loss/train': 1.8193949460983276} +07/25/2024 12:19:41 - INFO - __main__ - Step 11513: {'lr': 0.0004935578896411783, 'samples': 552624, 'steps': 11512, 'loss/train': 0.7636163234710693} +07/25/2024 12:19:41 - INFO - __main__ - Step 11514: {'lr': 0.0004935567030723633, 'samples': 552672, 'steps': 11513, 'loss/train': 1.5657488107681274} +07/25/2024 12:19:42 - INFO - __main__ - Step 11515: {'lr': 0.0004935555163957082, 'samples': 552720, 'steps': 11514, 'loss/train': 2.230672836303711} +07/25/2024 12:19:42 - INFO - __main__ - Step 11516: {'lr': 0.0004935543296112135, 'samples': 552768, 'steps': 11515, 'loss/train': 2.3082897663116455} +07/25/2024 12:19:42 - INFO - __main__ - Step 11517: {'lr': 0.0004935531427188799, 'samples': 552816, 'steps': 11516, 'loss/train': 1.8439428806304932} +07/25/2024 12:19:42 - INFO - __main__ - Step 11518: {'lr': 0.0004935519557187077, 'samples': 552864, 'steps': 11517, 'loss/train': 1.8047584295272827} +07/25/2024 12:19:43 - INFO - __main__ - Step 11519: {'lr': 0.0004935507686106975, 'samples': 552912, 'steps': 11518, 'loss/train': 2.1127257347106934} +07/25/2024 12:19:43 - INFO - __main__ - Step 11520: {'lr': 0.0004935495813948498, 'samples': 552960, 'steps': 11519, 'loss/train': 2.081296443939209} +07/25/2024 12:19:43 - INFO - __main__ - Step 11521: {'lr': 0.0004935483940711653, 'samples': 553008, 'steps': 11520, 'loss/train': 1.7987462282180786} +07/25/2024 12:19:44 - INFO - __main__ - Step 11522: {'lr': 0.0004935472066396443, 'samples': 553056, 'steps': 11521, 'loss/train': 1.846293568611145} +07/25/2024 12:19:44 - INFO - __main__ - Step 11523: {'lr': 0.0004935460191002874, 'samples': 553104, 'steps': 11522, 'loss/train': 2.125113010406494} +07/25/2024 12:19:44 - INFO - __main__ - Step 11524: {'lr': 0.0004935448314530952, 'samples': 553152, 'steps': 11523, 'loss/train': 2.1316165924072266} +07/25/2024 12:19:44 - INFO - __main__ - Step 11525: {'lr': 0.0004935436436980683, 'samples': 553200, 'steps': 11524, 'loss/train': 1.6517608165740967} +07/25/2024 12:19:45 - INFO - __main__ - Step 11526: {'lr': 0.000493542455835207, 'samples': 553248, 'steps': 11525, 'loss/train': 1.1710606813430786} +07/25/2024 12:19:45 - INFO - __main__ - Step 11527: {'lr': 0.0004935412678645118, 'samples': 553296, 'steps': 11526, 'loss/train': 2.5775442123413086} +07/25/2024 12:19:45 - INFO - __main__ - Step 11528: {'lr': 0.0004935400797859835, 'samples': 553344, 'steps': 11527, 'loss/train': 1.8457286357879639} +07/25/2024 12:19:46 - INFO - __main__ - Step 11529: {'lr': 0.0004935388915996224, 'samples': 553392, 'steps': 11528, 'loss/train': 1.4722824096679688} +07/25/2024 12:19:46 - INFO - __main__ - Step 11530: {'lr': 0.0004935377033054291, 'samples': 553440, 'steps': 11529, 'loss/train': 2.078334331512451} +07/25/2024 12:19:46 - INFO - __main__ - Step 11531: {'lr': 0.0004935365149034041, 'samples': 553488, 'steps': 11530, 'loss/train': 1.8314266204833984} +07/25/2024 12:19:46 - INFO - __main__ - Step 11532: {'lr': 0.0004935353263935481, 'samples': 553536, 'steps': 11531, 'loss/train': 2.257777452468872} +07/25/2024 12:19:47 - INFO - __main__ - Step 11533: {'lr': 0.0004935341377758614, 'samples': 553584, 'steps': 11532, 'loss/train': 1.9264758825302124} +07/25/2024 12:19:47 - INFO - __main__ - Step 11534: {'lr': 0.0004935329490503445, 'samples': 553632, 'steps': 11533, 'loss/train': 1.9527242183685303} +07/25/2024 12:19:47 - INFO - __main__ - Step 11535: {'lr': 0.0004935317602169981, 'samples': 553680, 'steps': 11534, 'loss/train': 2.0268776416778564} +07/25/2024 12:19:48 - INFO - __main__ - Step 11536: {'lr': 0.0004935305712758228, 'samples': 553728, 'steps': 11535, 'loss/train': 2.1884448528289795} +07/25/2024 12:19:48 - INFO - __main__ - Step 11537: {'lr': 0.0004935293822268189, 'samples': 553776, 'steps': 11536, 'loss/train': 2.4256911277770996} +07/25/2024 12:19:48 - INFO - __main__ - Step 11538: {'lr': 0.0004935281930699869, 'samples': 553824, 'steps': 11537, 'loss/train': 1.5896888971328735} +07/25/2024 12:19:48 - INFO - __main__ - Step 11539: {'lr': 0.0004935270038053276, 'samples': 553872, 'steps': 11538, 'loss/train': 1.5409096479415894} +07/25/2024 12:19:49 - INFO - __main__ - Step 11540: {'lr': 0.0004935258144328412, 'samples': 553920, 'steps': 11539, 'loss/train': 2.0535337924957275} +07/25/2024 12:19:49 - INFO - __main__ - Step 11541: {'lr': 0.0004935246249525286, 'samples': 553968, 'steps': 11540, 'loss/train': 1.8960891962051392} +07/25/2024 12:19:49 - INFO - __main__ - Step 11542: {'lr': 0.0004935234353643899, 'samples': 554016, 'steps': 11541, 'loss/train': 1.7234206199645996} +07/25/2024 12:19:50 - INFO - __main__ - Step 11543: {'lr': 0.0004935222456684261, 'samples': 554064, 'steps': 11542, 'loss/train': 2.027841329574585} +07/25/2024 12:19:50 - INFO - __main__ - Step 11544: {'lr': 0.0004935210558646373, 'samples': 554112, 'steps': 11543, 'loss/train': 1.7364318370819092} +07/25/2024 12:19:50 - INFO - __main__ - Step 11545: {'lr': 0.0004935198659530241, 'samples': 554160, 'steps': 11544, 'loss/train': 2.1207401752471924} +07/25/2024 12:19:50 - INFO - __main__ - Step 11546: {'lr': 0.0004935186759335874, 'samples': 554208, 'steps': 11545, 'loss/train': 1.6801273822784424} +07/25/2024 12:19:51 - INFO - __main__ - Step 11547: {'lr': 0.0004935174858063273, 'samples': 554256, 'steps': 11546, 'loss/train': 2.3526976108551025} +07/25/2024 12:19:51 - INFO - __main__ - Step 11548: {'lr': 0.0004935162955712444, 'samples': 554304, 'steps': 11547, 'loss/train': 2.644277572631836} +07/25/2024 12:19:51 - INFO - __main__ - Step 11549: {'lr': 0.0004935151052283394, 'samples': 554352, 'steps': 11548, 'loss/train': 1.9676655530929565} +07/25/2024 12:19:52 - INFO - __main__ - Step 11550: {'lr': 0.0004935139147776127, 'samples': 554400, 'steps': 11549, 'loss/train': 1.9222230911254883} +07/25/2024 12:19:52 - INFO - __main__ - Step 11551: {'lr': 0.0004935127242190649, 'samples': 554448, 'steps': 11550, 'loss/train': 1.7834514379501343} +07/25/2024 12:19:52 - INFO - __main__ - Step 11552: {'lr': 0.0004935115335526965, 'samples': 554496, 'steps': 11551, 'loss/train': 1.7242457866668701} +07/25/2024 12:19:52 - INFO - __main__ - Step 11553: {'lr': 0.000493510342778508, 'samples': 554544, 'steps': 11552, 'loss/train': 1.2084940671920776} +07/25/2024 12:19:53 - INFO - __main__ - Step 11554: {'lr': 0.0004935091518965, 'samples': 554592, 'steps': 11553, 'loss/train': 2.3988089561462402} +07/25/2024 12:19:53 - INFO - __main__ - Step 11555: {'lr': 0.0004935079609066729, 'samples': 554640, 'steps': 11554, 'loss/train': 1.7331902980804443} +07/25/2024 12:19:53 - INFO - __main__ - Step 11556: {'lr': 0.0004935067698090273, 'samples': 554688, 'steps': 11555, 'loss/train': 2.290447235107422} +07/25/2024 12:19:54 - INFO - __main__ - Step 11557: {'lr': 0.0004935055786035637, 'samples': 554736, 'steps': 11556, 'loss/train': 2.035874128341675} +07/25/2024 12:19:54 - INFO - __main__ - Step 11558: {'lr': 0.0004935043872902827, 'samples': 554784, 'steps': 11557, 'loss/train': 2.218093156814575} +07/25/2024 12:19:54 - INFO - __main__ - Step 11559: {'lr': 0.0004935031958691848, 'samples': 554832, 'steps': 11558, 'loss/train': 1.9359960556030273} +07/25/2024 12:19:54 - INFO - __main__ - Step 11560: {'lr': 0.0004935020043402705, 'samples': 554880, 'steps': 11559, 'loss/train': 1.3856117725372314} +07/25/2024 12:19:55 - INFO - __main__ - Step 11561: {'lr': 0.0004935008127035403, 'samples': 554928, 'steps': 11560, 'loss/train': 2.628851890563965} +07/25/2024 12:19:55 - INFO - __main__ - Step 11562: {'lr': 0.0004934996209589947, 'samples': 554976, 'steps': 11561, 'loss/train': 1.8776038885116577} +07/25/2024 12:19:55 - INFO - __main__ - Step 11563: {'lr': 0.0004934984291066344, 'samples': 555024, 'steps': 11562, 'loss/train': 1.8977824449539185} +07/25/2024 12:19:56 - INFO - __main__ - Step 11564: {'lr': 0.0004934972371464598, 'samples': 555072, 'steps': 11563, 'loss/train': 0.747747540473938} +07/25/2024 12:19:56 - INFO - __main__ - Step 11565: {'lr': 0.0004934960450784715, 'samples': 555120, 'steps': 11564, 'loss/train': 2.016387462615967} +07/25/2024 12:19:56 - INFO - __main__ - Step 11566: {'lr': 0.0004934948529026699, 'samples': 555168, 'steps': 11565, 'loss/train': 2.3074400424957275} +07/25/2024 12:19:56 - INFO - __main__ - Step 11567: {'lr': 0.0004934936606190556, 'samples': 555216, 'steps': 11566, 'loss/train': 1.7984976768493652} +07/25/2024 12:19:57 - INFO - __main__ - Step 11568: {'lr': 0.0004934924682276291, 'samples': 555264, 'steps': 11567, 'loss/train': 1.6622976064682007} +07/25/2024 12:19:57 - INFO - __main__ - Step 11569: {'lr': 0.000493491275728391, 'samples': 555312, 'steps': 11568, 'loss/train': 2.086601495742798} +07/25/2024 12:19:57 - INFO - __main__ - Step 11570: {'lr': 0.0004934900831213417, 'samples': 555360, 'steps': 11569, 'loss/train': 2.089937210083008} +07/25/2024 12:19:57 - INFO - __main__ - Step 11571: {'lr': 0.0004934888904064819, 'samples': 555408, 'steps': 11570, 'loss/train': 1.9320058822631836} +07/25/2024 12:19:58 - INFO - __main__ - Step 11572: {'lr': 0.000493487697583812, 'samples': 555456, 'steps': 11571, 'loss/train': 2.0946571826934814} +07/25/2024 12:19:58 - INFO - __main__ - Step 11573: {'lr': 0.0004934865046533327, 'samples': 555504, 'steps': 11572, 'loss/train': 1.8905000686645508} +07/25/2024 12:19:58 - INFO - __main__ - Step 11574: {'lr': 0.0004934853116150443, 'samples': 555552, 'steps': 11573, 'loss/train': 1.783522129058838} +07/25/2024 12:19:59 - INFO - __main__ - Step 11575: {'lr': 0.0004934841184689474, 'samples': 555600, 'steps': 11574, 'loss/train': 1.7310491800308228} +07/25/2024 12:19:59 - INFO - __main__ - Step 11576: {'lr': 0.0004934829252150425, 'samples': 555648, 'steps': 11575, 'loss/train': 2.242832660675049} +07/25/2024 12:19:59 - INFO - __main__ - Step 11577: {'lr': 0.0004934817318533304, 'samples': 555696, 'steps': 11576, 'loss/train': 1.823548674583435} +07/25/2024 12:19:59 - INFO - __main__ - Step 11578: {'lr': 0.0004934805383838112, 'samples': 555744, 'steps': 11577, 'loss/train': 2.234703302383423} +07/25/2024 12:20:00 - INFO - __main__ - Step 11579: {'lr': 0.0004934793448064857, 'samples': 555792, 'steps': 11578, 'loss/train': 0.9859910011291504} +07/25/2024 12:20:00 - INFO - __main__ - Step 11580: {'lr': 0.0004934781511213545, 'samples': 555840, 'steps': 11579, 'loss/train': 2.0937440395355225} +07/25/2024 12:20:00 - INFO - __main__ - Step 11581: {'lr': 0.0004934769573284178, 'samples': 555888, 'steps': 11580, 'loss/train': 1.7604568004608154} +07/25/2024 12:20:01 - INFO - __main__ - Step 11582: {'lr': 0.0004934757634276764, 'samples': 555936, 'steps': 11581, 'loss/train': 2.435167074203491} +07/25/2024 12:20:01 - INFO - __main__ - Step 11583: {'lr': 0.0004934745694191308, 'samples': 555984, 'steps': 11582, 'loss/train': 2.407585859298706} +07/25/2024 12:20:01 - INFO - __main__ - Step 11584: {'lr': 0.0004934733753027815, 'samples': 556032, 'steps': 11583, 'loss/train': 2.090940475463867} +07/25/2024 12:20:01 - INFO - __main__ - Step 11585: {'lr': 0.0004934721810786289, 'samples': 556080, 'steps': 11584, 'loss/train': 1.7240545749664307} +07/25/2024 12:20:02 - INFO - __main__ - Step 11586: {'lr': 0.0004934709867466737, 'samples': 556128, 'steps': 11585, 'loss/train': 1.3721801042556763} +07/25/2024 12:20:02 - INFO - __main__ - Step 11587: {'lr': 0.0004934697923069163, 'samples': 556176, 'steps': 11586, 'loss/train': 2.0079851150512695} +07/25/2024 12:20:02 - INFO - __main__ - Step 11588: {'lr': 0.0004934685977593574, 'samples': 556224, 'steps': 11587, 'loss/train': 0.14597785472869873} +07/25/2024 12:20:03 - INFO - __main__ - Step 11589: {'lr': 0.0004934674031039974, 'samples': 556272, 'steps': 11588, 'loss/train': 1.432761788368225} +07/25/2024 12:20:03 - INFO - __main__ - Step 11590: {'lr': 0.0004934662083408368, 'samples': 556320, 'steps': 11589, 'loss/train': 2.248730421066284} +07/25/2024 12:20:03 - INFO - __main__ - Step 11591: {'lr': 0.0004934650134698763, 'samples': 556368, 'steps': 11590, 'loss/train': 1.5910556316375732} +07/25/2024 12:20:03 - INFO - __main__ - Step 11592: {'lr': 0.0004934638184911162, 'samples': 556416, 'steps': 11591, 'loss/train': 1.0446290969848633} +07/25/2024 12:20:04 - INFO - __main__ - Step 11593: {'lr': 0.0004934626234045572, 'samples': 556464, 'steps': 11592, 'loss/train': 1.9613277912139893} +07/25/2024 12:20:04 - INFO - __main__ - Step 11594: {'lr': 0.0004934614282101998, 'samples': 556512, 'steps': 11593, 'loss/train': 1.9899922609329224} +07/25/2024 12:20:04 - INFO - __main__ - Step 11595: {'lr': 0.0004934602329080444, 'samples': 556560, 'steps': 11594, 'loss/train': 2.4176859855651855} +07/25/2024 12:20:05 - INFO - __main__ - Step 11596: {'lr': 0.0004934590374980918, 'samples': 556608, 'steps': 11595, 'loss/train': 1.5496816635131836} +07/25/2024 12:20:05 - INFO - __main__ - Step 11597: {'lr': 0.0004934578419803422, 'samples': 556656, 'steps': 11596, 'loss/train': 1.0048480033874512} +07/25/2024 12:20:05 - INFO - __main__ - Step 11598: {'lr': 0.0004934566463547963, 'samples': 556704, 'steps': 11597, 'loss/train': 2.0770113468170166} +07/25/2024 12:20:05 - INFO - __main__ - Step 11599: {'lr': 0.0004934554506214547, 'samples': 556752, 'steps': 11598, 'loss/train': 1.7456821203231812} +07/25/2024 12:20:06 - INFO - __main__ - Step 11600: {'lr': 0.0004934542547803178, 'samples': 556800, 'steps': 11599, 'loss/train': 2.3160901069641113} +07/25/2024 12:20:06 - INFO - __main__ - Step 11601: {'lr': 0.0004934530588313861, 'samples': 556848, 'steps': 11600, 'loss/train': 1.2564717531204224} +07/25/2024 12:20:06 - INFO - __main__ - Step 11602: {'lr': 0.0004934518627746603, 'samples': 556896, 'steps': 11601, 'loss/train': 1.5571565628051758} +07/25/2024 12:20:07 - INFO - __main__ - Step 11603: {'lr': 0.0004934506666101409, 'samples': 556944, 'steps': 11602, 'loss/train': 1.3976211547851562} +07/25/2024 12:20:07 - INFO - __main__ - Step 11604: {'lr': 0.0004934494703378282, 'samples': 556992, 'steps': 11603, 'loss/train': 1.7465300559997559} +07/25/2024 12:20:07 - INFO - __main__ - Step 11605: {'lr': 0.0004934482739577231, 'samples': 557040, 'steps': 11604, 'loss/train': 1.895577073097229} +07/25/2024 12:20:07 - INFO - __main__ - Step 11606: {'lr': 0.0004934470774698257, 'samples': 557088, 'steps': 11605, 'loss/train': 3.1126487255096436} +07/25/2024 12:20:08 - INFO - __main__ - Step 11607: {'lr': 0.000493445880874137, 'samples': 557136, 'steps': 11606, 'loss/train': 1.7292015552520752} +07/25/2024 12:20:08 - INFO - __main__ - Step 11608: {'lr': 0.0004934446841706571, 'samples': 557184, 'steps': 11607, 'loss/train': 1.7084710597991943} +07/25/2024 12:20:08 - INFO - __main__ - Step 11609: {'lr': 0.0004934434873593868, 'samples': 557232, 'steps': 11608, 'loss/train': 1.4122064113616943} +07/25/2024 12:20:09 - INFO - __main__ - Step 11610: {'lr': 0.0004934422904403265, 'samples': 557280, 'steps': 11609, 'loss/train': 1.8389180898666382} +07/25/2024 12:20:09 - INFO - __main__ - Step 11611: {'lr': 0.0004934410934134768, 'samples': 557328, 'steps': 11610, 'loss/train': 2.2953336238861084} +07/25/2024 12:20:09 - INFO - __main__ - Step 11612: {'lr': 0.0004934398962788382, 'samples': 557376, 'steps': 11611, 'loss/train': 0.1840335875749588} +07/25/2024 12:20:09 - INFO - __main__ - Step 11613: {'lr': 0.0004934386990364112, 'samples': 557424, 'steps': 11612, 'loss/train': 2.139627456665039} +07/25/2024 12:20:10 - INFO - __main__ - Step 11614: {'lr': 0.0004934375016861964, 'samples': 557472, 'steps': 11613, 'loss/train': 2.078068256378174} +07/25/2024 12:20:10 - INFO - __main__ - Step 11615: {'lr': 0.0004934363042281943, 'samples': 557520, 'steps': 11614, 'loss/train': 1.987134575843811} +07/25/2024 12:20:10 - INFO - __main__ - Step 11616: {'lr': 0.0004934351066624054, 'samples': 557568, 'steps': 11615, 'loss/train': 1.7299238443374634} +07/25/2024 12:20:11 - INFO - __main__ - Step 11617: {'lr': 0.0004934339089888303, 'samples': 557616, 'steps': 11616, 'loss/train': 1.9277135133743286} +07/25/2024 12:20:11 - INFO - __main__ - Step 11618: {'lr': 0.0004934327112074695, 'samples': 557664, 'steps': 11617, 'loss/train': 1.9752689599990845} +07/25/2024 12:20:11 - INFO - __main__ - Step 11619: {'lr': 0.0004934315133183234, 'samples': 557712, 'steps': 11618, 'loss/train': 2.2924227714538574} +07/25/2024 12:20:11 - INFO - __main__ - Step 11620: {'lr': 0.0004934303153213927, 'samples': 557760, 'steps': 11619, 'loss/train': 1.737365484237671} +07/25/2024 12:20:12 - INFO - __main__ - Step 11621: {'lr': 0.000493429117216678, 'samples': 557808, 'steps': 11620, 'loss/train': 1.8264704942703247} +07/25/2024 12:20:12 - INFO - __main__ - Step 11622: {'lr': 0.0004934279190041796, 'samples': 557856, 'steps': 11621, 'loss/train': 2.470993757247925} +07/25/2024 12:20:12 - INFO - __main__ - Step 11623: {'lr': 0.0004934267206838981, 'samples': 557904, 'steps': 11622, 'loss/train': 2.070197105407715} +07/25/2024 12:20:13 - INFO - __main__ - Step 11624: {'lr': 0.0004934255222558342, 'samples': 557952, 'steps': 11623, 'loss/train': 1.9606229066848755} +07/25/2024 12:20:13 - INFO - __main__ - Step 11625: {'lr': 0.0004934243237199881, 'samples': 558000, 'steps': 11624, 'loss/train': 1.5811574459075928} +07/25/2024 12:20:13 - INFO - __main__ - Step 11626: {'lr': 0.0004934231250763607, 'samples': 558048, 'steps': 11625, 'loss/train': 2.0547800064086914} +07/25/2024 12:20:13 - INFO - __main__ - Step 11627: {'lr': 0.0004934219263249523, 'samples': 558096, 'steps': 11626, 'loss/train': 1.616385817527771} +07/25/2024 12:20:14 - INFO - __main__ - Step 11628: {'lr': 0.0004934207274657635, 'samples': 558144, 'steps': 11627, 'loss/train': 2.0102005004882812} +07/25/2024 12:20:14 - INFO - __main__ - Step 11629: {'lr': 0.0004934195284987948, 'samples': 558192, 'steps': 11628, 'loss/train': 2.037097454071045} +07/25/2024 12:20:14 - INFO - __main__ - Step 11630: {'lr': 0.0004934183294240468, 'samples': 558240, 'steps': 11629, 'loss/train': 2.4113616943359375} +07/25/2024 12:20:15 - INFO - __main__ - Step 11631: {'lr': 0.0004934171302415199, 'samples': 558288, 'steps': 11630, 'loss/train': 1.5031622648239136} +07/25/2024 12:20:15 - INFO - __main__ - Step 11632: {'lr': 0.0004934159309512147, 'samples': 558336, 'steps': 11631, 'loss/train': 1.4314186573028564} +07/25/2024 12:20:15 - INFO - __main__ - Step 11633: {'lr': 0.0004934147315531318, 'samples': 558384, 'steps': 11632, 'loss/train': 1.779997706413269} +07/25/2024 12:20:15 - INFO - __main__ - Step 11634: {'lr': 0.0004934135320472717, 'samples': 558432, 'steps': 11633, 'loss/train': 1.872788667678833} +07/25/2024 12:20:16 - INFO - __main__ - Step 11635: {'lr': 0.000493412332433635, 'samples': 558480, 'steps': 11634, 'loss/train': 2.044440269470215} +07/25/2024 12:20:16 - INFO - __main__ - Step 11636: {'lr': 0.0004934111327122221, 'samples': 558528, 'steps': 11635, 'loss/train': 0.218181774020195} +07/25/2024 12:20:16 - INFO - __main__ - Step 11637: {'lr': 0.0004934099328830335, 'samples': 558576, 'steps': 11636, 'loss/train': 2.505239248275757} +07/25/2024 12:20:17 - INFO - __main__ - Step 11638: {'lr': 0.0004934087329460698, 'samples': 558624, 'steps': 11637, 'loss/train': 2.1681511402130127} +07/25/2024 12:20:17 - INFO - __main__ - Step 11639: {'lr': 0.0004934075329013315, 'samples': 558672, 'steps': 11638, 'loss/train': 1.9089761972427368} +07/25/2024 12:20:17 - INFO - __main__ - Step 11640: {'lr': 0.0004934063327488192, 'samples': 558720, 'steps': 11639, 'loss/train': 1.7041376829147339} +07/25/2024 12:20:17 - INFO - __main__ - Step 11641: {'lr': 0.0004934051324885334, 'samples': 558768, 'steps': 11640, 'loss/train': 1.7749614715576172} +07/25/2024 12:20:18 - INFO - __main__ - Step 11642: {'lr': 0.0004934039321204747, 'samples': 558816, 'steps': 11641, 'loss/train': 2.136859178543091} +07/25/2024 12:20:18 - INFO - __main__ - Step 11643: {'lr': 0.0004934027316446434, 'samples': 558864, 'steps': 11642, 'loss/train': 1.8910276889801025} +07/25/2024 12:20:18 - INFO - __main__ - Step 11644: {'lr': 0.0004934015310610404, 'samples': 558912, 'steps': 11643, 'loss/train': 1.884437084197998} +07/25/2024 12:20:19 - INFO - __main__ - Step 11645: {'lr': 0.0004934003303696658, 'samples': 558960, 'steps': 11644, 'loss/train': 1.702440857887268} +07/25/2024 12:20:19 - INFO - __main__ - Step 11646: {'lr': 0.0004933991295705205, 'samples': 559008, 'steps': 11645, 'loss/train': 1.863425374031067} +07/25/2024 12:20:19 - INFO - __main__ - Step 11647: {'lr': 0.0004933979286636049, 'samples': 559056, 'steps': 11646, 'loss/train': 2.1120810508728027} +07/25/2024 12:20:19 - INFO - __main__ - Step 11648: {'lr': 0.0004933967276489194, 'samples': 559104, 'steps': 11647, 'loss/train': 1.9051841497421265} +07/25/2024 12:20:20 - INFO - __main__ - Step 11649: {'lr': 0.0004933955265264648, 'samples': 559152, 'steps': 11648, 'loss/train': 1.4765657186508179} +07/25/2024 12:20:20 - INFO - __main__ - Step 11650: {'lr': 0.0004933943252962413, 'samples': 559200, 'steps': 11649, 'loss/train': 1.9459609985351562} +07/25/2024 12:20:20 - INFO - __main__ - Step 11651: {'lr': 0.0004933931239582498, 'samples': 559248, 'steps': 11650, 'loss/train': 1.9808423519134521} +07/25/2024 12:20:21 - INFO - __main__ - Step 11652: {'lr': 0.0004933919225124905, 'samples': 559296, 'steps': 11651, 'loss/train': 1.9170893430709839} +07/25/2024 12:20:21 - INFO - __main__ - Step 11653: {'lr': 0.0004933907209589641, 'samples': 559344, 'steps': 11652, 'loss/train': 1.6707991361618042} +07/25/2024 12:20:21 - INFO - __main__ - Step 11654: {'lr': 0.0004933895192976712, 'samples': 559392, 'steps': 11653, 'loss/train': 1.945453405380249} +07/25/2024 12:20:21 - INFO - __main__ - Step 11655: {'lr': 0.0004933883175286121, 'samples': 559440, 'steps': 11654, 'loss/train': 2.109267234802246} +07/25/2024 12:20:22 - INFO - __main__ - Step 11656: {'lr': 0.0004933871156517876, 'samples': 559488, 'steps': 11655, 'loss/train': 1.0416288375854492} +07/25/2024 12:20:22 - INFO - __main__ - Step 11657: {'lr': 0.000493385913667198, 'samples': 559536, 'steps': 11656, 'loss/train': 1.9704835414886475} +07/25/2024 12:20:22 - INFO - __main__ - Step 11658: {'lr': 0.000493384711574844, 'samples': 559584, 'steps': 11657, 'loss/train': 2.058689594268799} +07/25/2024 12:20:22 - INFO - __main__ - Step 11659: {'lr': 0.0004933835093747261, 'samples': 559632, 'steps': 11658, 'loss/train': 2.1815972328186035} +07/25/2024 12:20:23 - INFO - __main__ - Step 11660: {'lr': 0.0004933823070668447, 'samples': 559680, 'steps': 11659, 'loss/train': 0.9232041835784912} +07/25/2024 12:20:23 - INFO - __main__ - Step 11661: {'lr': 0.0004933811046512006, 'samples': 559728, 'steps': 11660, 'loss/train': 2.0500526428222656} +07/25/2024 12:20:23 - INFO - __main__ - Step 11662: {'lr': 0.0004933799021277941, 'samples': 559776, 'steps': 11661, 'loss/train': 2.1430158615112305} +07/25/2024 12:20:24 - INFO - __main__ - Step 11663: {'lr': 0.0004933786994966257, 'samples': 559824, 'steps': 11662, 'loss/train': 1.8849858045578003} +07/25/2024 12:20:24 - INFO - __main__ - Step 11664: {'lr': 0.0004933774967576961, 'samples': 559872, 'steps': 11663, 'loss/train': 1.8267370462417603} +07/25/2024 12:20:24 - INFO - __main__ - Step 11665: {'lr': 0.0004933762939110057, 'samples': 559920, 'steps': 11664, 'loss/train': 2.2442049980163574} +07/25/2024 12:20:24 - INFO - __main__ - Step 11666: {'lr': 0.0004933750909565552, 'samples': 559968, 'steps': 11665, 'loss/train': 2.239074468612671} +07/25/2024 12:20:25 - INFO - __main__ - Step 11667: {'lr': 0.0004933738878943449, 'samples': 560016, 'steps': 11666, 'loss/train': 2.0483546257019043} +07/25/2024 12:20:25 - INFO - __main__ - Step 11668: {'lr': 0.0004933726847243756, 'samples': 560064, 'steps': 11667, 'loss/train': 1.6363673210144043} +07/25/2024 12:20:25 - INFO - __main__ - Step 11669: {'lr': 0.0004933714814466476, 'samples': 560112, 'steps': 11668, 'loss/train': 0.9356234669685364} +07/25/2024 12:20:26 - INFO - __main__ - Step 11670: {'lr': 0.0004933702780611616, 'samples': 560160, 'steps': 11669, 'loss/train': 1.906383752822876} +07/25/2024 12:20:26 - INFO - __main__ - Step 11671: {'lr': 0.000493369074567918, 'samples': 560208, 'steps': 11670, 'loss/train': 1.6060512065887451} +07/25/2024 12:20:26 - INFO - __main__ - Step 11672: {'lr': 0.0004933678709669174, 'samples': 560256, 'steps': 11671, 'loss/train': 1.695281744003296} +07/25/2024 12:20:26 - INFO - __main__ - Step 11673: {'lr': 0.0004933666672581604, 'samples': 560304, 'steps': 11672, 'loss/train': 1.2743679285049438} +07/25/2024 12:20:27 - INFO - __main__ - Step 11674: {'lr': 0.0004933654634416473, 'samples': 560352, 'steps': 11673, 'loss/train': 1.7924357652664185} +07/25/2024 12:20:27 - INFO - __main__ - Step 11675: {'lr': 0.000493364259517379, 'samples': 560400, 'steps': 11674, 'loss/train': 1.941179871559143} +07/25/2024 12:20:27 - INFO - __main__ - Step 11676: {'lr': 0.0004933630554853557, 'samples': 560448, 'steps': 11675, 'loss/train': 1.9713743925094604} +07/25/2024 12:20:28 - INFO - __main__ - Step 11677: {'lr': 0.0004933618513455781, 'samples': 560496, 'steps': 11676, 'loss/train': 1.3170006275177002} +07/25/2024 12:20:28 - INFO - __main__ - Step 11678: {'lr': 0.0004933606470980467, 'samples': 560544, 'steps': 11677, 'loss/train': 1.5962544679641724} +07/25/2024 12:20:28 - INFO - __main__ - Step 11679: {'lr': 0.0004933594427427619, 'samples': 560592, 'steps': 11678, 'loss/train': 2.006566047668457} +07/25/2024 12:20:28 - INFO - __main__ - Step 11680: {'lr': 0.0004933582382797245, 'samples': 560640, 'steps': 11679, 'loss/train': 1.2030633687973022} +07/25/2024 12:20:29 - INFO - __main__ - Step 11681: {'lr': 0.0004933570337089348, 'samples': 560688, 'steps': 11680, 'loss/train': 1.722786545753479} +07/25/2024 12:20:29 - INFO - __main__ - Step 11682: {'lr': 0.0004933558290303935, 'samples': 560736, 'steps': 11681, 'loss/train': 1.8823671340942383} +07/25/2024 12:20:29 - INFO - __main__ - Step 11683: {'lr': 0.000493354624244101, 'samples': 560784, 'steps': 11682, 'loss/train': 1.9875080585479736} +07/25/2024 12:20:30 - INFO - __main__ - Step 11684: {'lr': 0.0004933534193500579, 'samples': 560832, 'steps': 11683, 'loss/train': 1.9359339475631714} +07/25/2024 12:20:30 - INFO - __main__ - Step 11685: {'lr': 0.0004933522143482647, 'samples': 560880, 'steps': 11684, 'loss/train': 1.9455584287643433} +07/25/2024 12:20:30 - INFO - __main__ - Step 11686: {'lr': 0.000493351009238722, 'samples': 560928, 'steps': 11685, 'loss/train': 1.51952064037323} +07/25/2024 12:20:30 - INFO - __main__ - Step 11687: {'lr': 0.0004933498040214304, 'samples': 560976, 'steps': 11686, 'loss/train': 1.3148829936981201} +07/25/2024 12:20:31 - INFO - __main__ - Step 11688: {'lr': 0.0004933485986963901, 'samples': 561024, 'steps': 11687, 'loss/train': 1.8762527704238892} +07/25/2024 12:20:31 - INFO - __main__ - Step 11689: {'lr': 0.000493347393263602, 'samples': 561072, 'steps': 11688, 'loss/train': 1.8810557126998901} +07/25/2024 12:20:31 - INFO - __main__ - Step 11690: {'lr': 0.0004933461877230665, 'samples': 561120, 'steps': 11689, 'loss/train': 1.8961321115493774} +07/25/2024 12:20:32 - INFO - __main__ - Step 11691: {'lr': 0.0004933449820747841, 'samples': 561168, 'steps': 11690, 'loss/train': 2.0593817234039307} +07/25/2024 12:20:32 - INFO - __main__ - Step 11692: {'lr': 0.0004933437763187553, 'samples': 561216, 'steps': 11691, 'loss/train': 1.9393413066864014} +07/25/2024 12:20:32 - INFO - __main__ - Step 11693: {'lr': 0.0004933425704549808, 'samples': 561264, 'steps': 11692, 'loss/train': 2.7830803394317627} +07/25/2024 12:20:32 - INFO - __main__ - Step 11694: {'lr': 0.000493341364483461, 'samples': 561312, 'steps': 11693, 'loss/train': 1.7647173404693604} +07/25/2024 12:20:33 - INFO - __main__ - Step 11695: {'lr': 0.0004933401584041964, 'samples': 561360, 'steps': 11694, 'loss/train': 1.293666124343872} +07/25/2024 12:20:33 - INFO - __main__ - Step 11696: {'lr': 0.0004933389522171877, 'samples': 561408, 'steps': 11695, 'loss/train': 1.0786195993423462} +07/25/2024 12:20:33 - INFO - __main__ - Step 11697: {'lr': 0.0004933377459224352, 'samples': 561456, 'steps': 11696, 'loss/train': 1.6217014789581299} +07/25/2024 12:20:34 - INFO - __main__ - Step 11698: {'lr': 0.0004933365395199396, 'samples': 561504, 'steps': 11697, 'loss/train': 2.32440447807312} +07/25/2024 12:20:34 - INFO - __main__ - Step 11699: {'lr': 0.0004933353330097014, 'samples': 561552, 'steps': 11698, 'loss/train': 1.7443633079528809} +07/25/2024 12:20:34 - INFO - __main__ - Step 11700: {'lr': 0.0004933341263917212, 'samples': 561600, 'steps': 11699, 'loss/train': 2.1745171546936035} +07/25/2024 12:20:34 - INFO - __main__ - Step 11701: {'lr': 0.0004933329196659994, 'samples': 561648, 'steps': 11700, 'loss/train': 1.4917067289352417} +07/25/2024 12:20:35 - INFO - __main__ - Step 11702: {'lr': 0.0004933317128325367, 'samples': 561696, 'steps': 11701, 'loss/train': 1.9013423919677734} +07/25/2024 12:20:35 - INFO - __main__ - Step 11703: {'lr': 0.0004933305058913334, 'samples': 561744, 'steps': 11702, 'loss/train': 2.1482176780700684} +07/25/2024 12:20:35 - INFO - __main__ - Step 11704: {'lr': 0.0004933292988423902, 'samples': 561792, 'steps': 11703, 'loss/train': 1.9341962337493896} +07/25/2024 12:20:36 - INFO - __main__ - Step 11705: {'lr': 0.0004933280916857077, 'samples': 561840, 'steps': 11704, 'loss/train': 2.3034324645996094} +07/25/2024 12:20:36 - INFO - __main__ - Step 11706: {'lr': 0.0004933268844212863, 'samples': 561888, 'steps': 11705, 'loss/train': 2.072694778442383} +07/25/2024 12:20:36 - INFO - __main__ - Step 11707: {'lr': 0.0004933256770491265, 'samples': 561936, 'steps': 11706, 'loss/train': 2.6834046840667725} +07/25/2024 12:20:36 - INFO - __main__ - Step 11708: {'lr': 0.0004933244695692289, 'samples': 561984, 'steps': 11707, 'loss/train': 1.7831860780715942} +07/25/2024 12:20:37 - INFO - __main__ - Step 11709: {'lr': 0.0004933232619815941, 'samples': 562032, 'steps': 11708, 'loss/train': 2.16441011428833} +07/25/2024 12:20:37 - INFO - __main__ - Step 11710: {'lr': 0.0004933220542862226, 'samples': 562080, 'steps': 11709, 'loss/train': 1.4670487642288208} +07/25/2024 12:20:37 - INFO - __main__ - Step 11711: {'lr': 0.0004933208464831149, 'samples': 562128, 'steps': 11710, 'loss/train': 1.7923321723937988} +07/25/2024 12:20:38 - INFO - __main__ - Step 11712: {'lr': 0.0004933196385722715, 'samples': 562176, 'steps': 11711, 'loss/train': 1.964859962463379} +07/25/2024 12:20:38 - INFO - __main__ - Step 11713: {'lr': 0.000493318430553693, 'samples': 562224, 'steps': 11712, 'loss/train': 1.9454923868179321} +07/25/2024 12:20:38 - INFO - __main__ - Step 11714: {'lr': 0.00049331722242738, 'samples': 562272, 'steps': 11713, 'loss/train': 1.7717987298965454} +07/25/2024 12:20:38 - INFO - __main__ - Step 11715: {'lr': 0.000493316014193333, 'samples': 562320, 'steps': 11714, 'loss/train': 1.9090923070907593} +07/25/2024 12:20:39 - INFO - __main__ - Step 11716: {'lr': 0.0004933148058515523, 'samples': 562368, 'steps': 11715, 'loss/train': 1.9971967935562134} +07/25/2024 12:20:39 - INFO - __main__ - Step 11717: {'lr': 0.0004933135974020386, 'samples': 562416, 'steps': 11716, 'loss/train': 1.8738969564437866} +07/25/2024 12:20:39 - INFO - __main__ - Step 11718: {'lr': 0.0004933123888447926, 'samples': 562464, 'steps': 11717, 'loss/train': 1.6283029317855835} +07/25/2024 12:20:40 - INFO - __main__ - Step 11719: {'lr': 0.0004933111801798147, 'samples': 562512, 'steps': 11718, 'loss/train': 1.072256326675415} +07/25/2024 12:20:40 - INFO - __main__ - Step 11720: {'lr': 0.0004933099714071053, 'samples': 562560, 'steps': 11719, 'loss/train': 1.4312635660171509} +07/25/2024 12:20:40 - INFO - __main__ - Step 11721: {'lr': 0.0004933087625266651, 'samples': 562608, 'steps': 11720, 'loss/train': 1.798698902130127} +07/25/2024 12:20:40 - INFO - __main__ - Step 11722: {'lr': 0.0004933075535384947, 'samples': 562656, 'steps': 11721, 'loss/train': 2.3788840770721436} +07/25/2024 12:20:41 - INFO - __main__ - Step 11723: {'lr': 0.0004933063444425945, 'samples': 562704, 'steps': 11722, 'loss/train': 1.7611626386642456} +07/25/2024 12:20:41 - INFO - __main__ - Step 11724: {'lr': 0.000493305135238965, 'samples': 562752, 'steps': 11723, 'loss/train': 2.2833900451660156} +07/25/2024 12:20:41 - INFO - __main__ - Step 11725: {'lr': 0.0004933039259276068, 'samples': 562800, 'steps': 11724, 'loss/train': 1.8610329627990723} +07/25/2024 12:20:42 - INFO - __main__ - Step 11726: {'lr': 0.0004933027165085205, 'samples': 562848, 'steps': 11725, 'loss/train': 2.129030466079712} +07/25/2024 12:20:42 - INFO - __main__ - Step 11727: {'lr': 0.0004933015069817065, 'samples': 562896, 'steps': 11726, 'loss/train': 1.7210073471069336} +07/25/2024 12:20:42 - INFO - __main__ - Step 11728: {'lr': 0.0004933002973471656, 'samples': 562944, 'steps': 11727, 'loss/train': 1.6450912952423096} +07/25/2024 12:20:42 - INFO - __main__ - Step 11729: {'lr': 0.0004932990876048979, 'samples': 562992, 'steps': 11728, 'loss/train': 1.7139614820480347} +07/25/2024 12:20:43 - INFO - __main__ - Step 11730: {'lr': 0.0004932978777549043, 'samples': 563040, 'steps': 11729, 'loss/train': 1.523999810218811} +07/25/2024 12:20:43 - INFO - __main__ - Step 11731: {'lr': 0.0004932966677971852, 'samples': 563088, 'steps': 11730, 'loss/train': 2.288360595703125} +07/25/2024 12:20:43 - INFO - __main__ - Step 11732: {'lr': 0.0004932954577317411, 'samples': 563136, 'steps': 11731, 'loss/train': 2.1483397483825684} +07/25/2024 12:20:44 - INFO - __main__ - Step 11733: {'lr': 0.0004932942475585726, 'samples': 563184, 'steps': 11732, 'loss/train': 1.8836345672607422} +07/25/2024 12:20:44 - INFO - __main__ - Step 11734: {'lr': 0.0004932930372776804, 'samples': 563232, 'steps': 11733, 'loss/train': 1.912014126777649} +07/25/2024 12:20:44 - INFO - __main__ - Step 11735: {'lr': 0.0004932918268890647, 'samples': 563280, 'steps': 11734, 'loss/train': 1.64262056350708} +07/25/2024 12:20:44 - INFO - __main__ - Step 11736: {'lr': 0.0004932906163927262, 'samples': 563328, 'steps': 11735, 'loss/train': 1.789346694946289} +07/25/2024 12:20:45 - INFO - __main__ - Step 11737: {'lr': 0.0004932894057886654, 'samples': 563376, 'steps': 11736, 'loss/train': 2.5699124336242676} +07/25/2024 12:20:45 - INFO - __main__ - Step 11738: {'lr': 0.000493288195076883, 'samples': 563424, 'steps': 11737, 'loss/train': 1.3427056074142456} +07/25/2024 12:20:45 - INFO - __main__ - Step 11739: {'lr': 0.0004932869842573793, 'samples': 563472, 'steps': 11738, 'loss/train': 1.97315514087677} +07/25/2024 12:20:46 - INFO - __main__ - Step 11740: {'lr': 0.000493285773330155, 'samples': 563520, 'steps': 11739, 'loss/train': 1.4474904537200928} +07/25/2024 12:20:46 - INFO - __main__ - Step 11741: {'lr': 0.0004932845622952105, 'samples': 563568, 'steps': 11740, 'loss/train': 2.0461299419403076} +07/25/2024 12:20:46 - INFO - __main__ - Step 11742: {'lr': 0.0004932833511525465, 'samples': 563616, 'steps': 11741, 'loss/train': 1.950208067893982} +07/25/2024 12:20:46 - INFO - __main__ - Step 11743: {'lr': 0.0004932821399021634, 'samples': 563664, 'steps': 11742, 'loss/train': 1.9698882102966309} +07/25/2024 12:20:47 - INFO - __main__ - Step 11744: {'lr': 0.0004932809285440618, 'samples': 563712, 'steps': 11743, 'loss/train': 2.400278329849243} +07/25/2024 12:20:47 - INFO - __main__ - Step 11745: {'lr': 0.0004932797170782422, 'samples': 563760, 'steps': 11744, 'loss/train': 2.1245675086975098} +07/25/2024 12:20:47 - INFO - __main__ - Step 11746: {'lr': 0.0004932785055047051, 'samples': 563808, 'steps': 11745, 'loss/train': 1.988987684249878} +07/25/2024 12:20:48 - INFO - __main__ - Step 11747: {'lr': 0.0004932772938234512, 'samples': 563856, 'steps': 11746, 'loss/train': 2.2349579334259033} +07/25/2024 12:20:48 - INFO - __main__ - Step 11748: {'lr': 0.0004932760820344808, 'samples': 563904, 'steps': 11747, 'loss/train': 2.101663827896118} +07/25/2024 12:20:48 - INFO - __main__ - Step 11749: {'lr': 0.0004932748701377948, 'samples': 563952, 'steps': 11748, 'loss/train': 1.6588469743728638} +07/25/2024 12:20:48 - INFO - __main__ - Step 11750: {'lr': 0.0004932736581333933, 'samples': 564000, 'steps': 11749, 'loss/train': 1.7149235010147095} +07/25/2024 12:20:49 - INFO - __main__ - Step 11751: {'lr': 0.0004932724460212771, 'samples': 564048, 'steps': 11750, 'loss/train': 2.0009241104125977} +07/25/2024 12:20:49 - INFO - __main__ - Step 11752: {'lr': 0.0004932712338014467, 'samples': 564096, 'steps': 11751, 'loss/train': 2.2881133556365967} +07/25/2024 12:20:49 - INFO - __main__ - Step 11753: {'lr': 0.0004932700214739025, 'samples': 564144, 'steps': 11752, 'loss/train': 2.2417540550231934} +07/25/2024 12:20:49 - INFO - __main__ - Step 11754: {'lr': 0.0004932688090386454, 'samples': 564192, 'steps': 11753, 'loss/train': 2.0181376934051514} +07/25/2024 12:20:50 - INFO - __main__ - Step 11755: {'lr': 0.0004932675964956755, 'samples': 564240, 'steps': 11754, 'loss/train': 1.6883243322372437} +07/25/2024 12:20:50 - INFO - __main__ - Step 11756: {'lr': 0.0004932663838449936, 'samples': 564288, 'steps': 11755, 'loss/train': 1.9457684755325317} +07/25/2024 12:20:50 - INFO - __main__ - Step 11757: {'lr': 0.0004932651710866001, 'samples': 564336, 'steps': 11756, 'loss/train': 2.4115850925445557} +07/25/2024 12:20:51 - INFO - __main__ - Step 11758: {'lr': 0.0004932639582204955, 'samples': 564384, 'steps': 11757, 'loss/train': 1.9057080745697021} +07/25/2024 12:20:51 - INFO - __main__ - Step 11759: {'lr': 0.0004932627452466806, 'samples': 564432, 'steps': 11758, 'loss/train': 2.16243314743042} +07/25/2024 12:20:51 - INFO - __main__ - Step 11760: {'lr': 0.0004932615321651557, 'samples': 564480, 'steps': 11759, 'loss/train': 1.7396656274795532} +07/25/2024 12:20:51 - INFO - __main__ - Step 11761: {'lr': 0.0004932603189759214, 'samples': 564528, 'steps': 11760, 'loss/train': 1.742767095565796} +07/25/2024 12:20:52 - INFO - __main__ - Step 11762: {'lr': 0.0004932591056789784, 'samples': 564576, 'steps': 11761, 'loss/train': 1.5921614170074463} +07/25/2024 12:20:52 - INFO - __main__ - Step 11763: {'lr': 0.0004932578922743269, 'samples': 564624, 'steps': 11762, 'loss/train': 2.753122091293335} +07/25/2024 12:20:52 - INFO - __main__ - Step 11764: {'lr': 0.0004932566787619677, 'samples': 564672, 'steps': 11763, 'loss/train': 1.8009496927261353} +07/25/2024 12:20:53 - INFO - __main__ - Step 11765: {'lr': 0.0004932554651419012, 'samples': 564720, 'steps': 11764, 'loss/train': 1.7723934650421143} +07/25/2024 12:20:53 - INFO - __main__ - Step 11766: {'lr': 0.0004932542514141281, 'samples': 564768, 'steps': 11765, 'loss/train': 2.9432897567749023} +07/25/2024 12:20:53 - INFO - __main__ - Step 11767: {'lr': 0.0004932530375786488, 'samples': 564816, 'steps': 11766, 'loss/train': 1.6826883554458618} +07/25/2024 12:20:53 - INFO - __main__ - Step 11768: {'lr': 0.0004932518236354639, 'samples': 564864, 'steps': 11767, 'loss/train': 1.7999383211135864} +07/25/2024 12:20:54 - INFO - __main__ - Step 11769: {'lr': 0.0004932506095845738, 'samples': 564912, 'steps': 11768, 'loss/train': 1.9116520881652832} +07/25/2024 12:20:54 - INFO - __main__ - Step 11770: {'lr': 0.0004932493954259791, 'samples': 564960, 'steps': 11769, 'loss/train': 1.8727017641067505} +07/25/2024 12:20:54 - INFO - __main__ - Step 11771: {'lr': 0.0004932481811596806, 'samples': 565008, 'steps': 11770, 'loss/train': 1.8880534172058105} +07/25/2024 12:20:55 - INFO - __main__ - Step 11772: {'lr': 0.0004932469667856785, 'samples': 565056, 'steps': 11771, 'loss/train': 1.7724086046218872} +07/25/2024 12:20:55 - INFO - __main__ - Step 11773: {'lr': 0.0004932457523039735, 'samples': 565104, 'steps': 11772, 'loss/train': 1.6095589399337769} +07/25/2024 12:20:55 - INFO - __main__ - Step 11774: {'lr': 0.000493244537714566, 'samples': 565152, 'steps': 11773, 'loss/train': 1.396053671836853} +07/25/2024 12:20:55 - INFO - __main__ - Step 11775: {'lr': 0.0004932433230174566, 'samples': 565200, 'steps': 11774, 'loss/train': 1.716753602027893} +07/25/2024 12:20:56 - INFO - __main__ - Step 11776: {'lr': 0.0004932421082126461, 'samples': 565248, 'steps': 11775, 'loss/train': 1.8727428913116455} +07/25/2024 12:20:56 - INFO - __main__ - Step 11777: {'lr': 0.0004932408933001346, 'samples': 565296, 'steps': 11776, 'loss/train': 1.7302950620651245} +07/25/2024 12:20:56 - INFO - __main__ - Step 11778: {'lr': 0.000493239678279923, 'samples': 565344, 'steps': 11777, 'loss/train': 1.936086654663086} +07/25/2024 12:20:57 - INFO - __main__ - Step 11779: {'lr': 0.0004932384631520115, 'samples': 565392, 'steps': 11778, 'loss/train': 2.105380058288574} +07/25/2024 12:20:57 - INFO - __main__ - Step 11780: {'lr': 0.000493237247916401, 'samples': 565440, 'steps': 11779, 'loss/train': 2.185645341873169} +07/25/2024 12:20:57 - INFO - __main__ - Step 11781: {'lr': 0.0004932360325730918, 'samples': 565488, 'steps': 11780, 'loss/train': 1.5406137704849243} +07/25/2024 12:20:57 - INFO - __main__ - Step 11782: {'lr': 0.0004932348171220845, 'samples': 565536, 'steps': 11781, 'loss/train': 2.0881214141845703} +07/25/2024 12:20:58 - INFO - __main__ - Step 11783: {'lr': 0.0004932336015633797, 'samples': 565584, 'steps': 11782, 'loss/train': 2.201287269592285} +07/25/2024 12:20:58 - INFO - __main__ - Step 11784: {'lr': 0.0004932323858969778, 'samples': 565632, 'steps': 11783, 'loss/train': 1.5656812191009521} +07/25/2024 12:20:58 - INFO - __main__ - Step 11785: {'lr': 0.0004932311701228794, 'samples': 565680, 'steps': 11784, 'loss/train': 2.5236611366271973} +07/25/2024 12:20:59 - INFO - __main__ - Step 11786: {'lr': 0.0004932299542410851, 'samples': 565728, 'steps': 11785, 'loss/train': 2.1486873626708984} +07/25/2024 12:20:59 - INFO - __main__ - Step 11787: {'lr': 0.0004932287382515953, 'samples': 565776, 'steps': 11786, 'loss/train': 1.8861972093582153} +07/25/2024 12:20:59 - INFO - __main__ - Step 11788: {'lr': 0.0004932275221544108, 'samples': 565824, 'steps': 11787, 'loss/train': 1.7190731763839722} +07/25/2024 12:20:59 - INFO - __main__ - Step 11789: {'lr': 0.0004932263059495318, 'samples': 565872, 'steps': 11788, 'loss/train': 1.803928256034851} +07/25/2024 12:21:00 - INFO - __main__ - Step 11790: {'lr': 0.0004932250896369591, 'samples': 565920, 'steps': 11789, 'loss/train': 3.349125623703003} +07/25/2024 12:21:00 - INFO - __main__ - Step 11791: {'lr': 0.0004932238732166932, 'samples': 565968, 'steps': 11790, 'loss/train': 1.8658009767532349} +07/25/2024 12:21:00 - INFO - __main__ - Step 11792: {'lr': 0.0004932226566887344, 'samples': 566016, 'steps': 11791, 'loss/train': 1.6811442375183105} +07/25/2024 12:21:01 - INFO - __main__ - Step 11793: {'lr': 0.0004932214400530836, 'samples': 566064, 'steps': 11792, 'loss/train': 1.8763470649719238} +07/25/2024 12:21:01 - INFO - __main__ - Step 11794: {'lr': 0.0004932202233097411, 'samples': 566112, 'steps': 11793, 'loss/train': 2.2175168991088867} +07/25/2024 12:21:01 - INFO - __main__ - Step 11795: {'lr': 0.0004932190064587074, 'samples': 566160, 'steps': 11794, 'loss/train': 1.030419945716858} +07/25/2024 12:21:01 - INFO - __main__ - Step 11796: {'lr': 0.0004932177894999833, 'samples': 566208, 'steps': 11795, 'loss/train': 1.9671605825424194} +07/25/2024 12:21:02 - INFO - __main__ - Step 11797: {'lr': 0.0004932165724335691, 'samples': 566256, 'steps': 11796, 'loss/train': 1.9514905214309692} +07/25/2024 12:21:02 - INFO - __main__ - Step 11798: {'lr': 0.0004932153552594654, 'samples': 566304, 'steps': 11797, 'loss/train': 1.3849031925201416} +07/25/2024 12:21:02 - INFO - __main__ - Step 11799: {'lr': 0.0004932141379776728, 'samples': 566352, 'steps': 11798, 'loss/train': 1.5123008489608765} +07/25/2024 12:21:03 - INFO - __main__ - Step 11800: {'lr': 0.0004932129205881918, 'samples': 566400, 'steps': 11799, 'loss/train': 1.850860595703125} +07/25/2024 12:21:03 - INFO - __main__ - Step 11801: {'lr': 0.0004932117030910229, 'samples': 566448, 'steps': 11800, 'loss/train': 1.9639227390289307} +07/25/2024 12:21:03 - INFO - __main__ - Step 11802: {'lr': 0.0004932104854861667, 'samples': 566496, 'steps': 11801, 'loss/train': 1.8454395532608032} +07/25/2024 12:21:03 - INFO - __main__ - Step 11803: {'lr': 0.0004932092677736236, 'samples': 566544, 'steps': 11802, 'loss/train': 0.3293021619319916} +07/25/2024 12:21:04 - INFO - __main__ - Step 11804: {'lr': 0.0004932080499533944, 'samples': 566592, 'steps': 11803, 'loss/train': 2.2613186836242676} +07/25/2024 12:21:04 - INFO - __main__ - Step 11805: {'lr': 0.0004932068320254795, 'samples': 566640, 'steps': 11804, 'loss/train': 1.6437994241714478} +07/25/2024 12:21:04 - INFO - __main__ - Step 11806: {'lr': 0.0004932056139898793, 'samples': 566688, 'steps': 11805, 'loss/train': 2.507862091064453} +07/25/2024 12:21:05 - INFO - __main__ - Step 11807: {'lr': 0.0004932043958465945, 'samples': 566736, 'steps': 11806, 'loss/train': 1.7591607570648193} +07/25/2024 12:21:05 - INFO - __main__ - Step 11808: {'lr': 0.0004932031775956257, 'samples': 566784, 'steps': 11807, 'loss/train': 1.5496786832809448} +07/25/2024 12:21:05 - INFO - __main__ - Step 11809: {'lr': 0.0004932019592369732, 'samples': 566832, 'steps': 11808, 'loss/train': 1.9537537097930908} +07/25/2024 12:21:05 - INFO - __main__ - Step 11810: {'lr': 0.0004932007407706378, 'samples': 566880, 'steps': 11809, 'loss/train': 1.774775505065918} +07/25/2024 12:21:06 - INFO - __main__ - Step 11811: {'lr': 0.0004931995221966199, 'samples': 566928, 'steps': 11810, 'loss/train': 1.7109061479568481} +07/25/2024 12:21:06 - INFO - __main__ - Step 11812: {'lr': 0.0004931983035149202, 'samples': 566976, 'steps': 11811, 'loss/train': 1.5313689708709717} +07/25/2024 12:21:06 - INFO - __main__ - Step 11813: {'lr': 0.0004931970847255388, 'samples': 567024, 'steps': 11812, 'loss/train': 1.7528878450393677} +07/25/2024 12:21:07 - INFO - __main__ - Step 11814: {'lr': 0.0004931958658284767, 'samples': 567072, 'steps': 11813, 'loss/train': 1.5310136079788208} +07/25/2024 12:21:07 - INFO - __main__ - Step 11815: {'lr': 0.0004931946468237344, 'samples': 567120, 'steps': 11814, 'loss/train': 2.3170008659362793} +07/25/2024 12:21:07 - INFO - __main__ - Step 11816: {'lr': 0.0004931934277113122, 'samples': 567168, 'steps': 11815, 'loss/train': 1.860227346420288} +07/25/2024 12:21:07 - INFO - __main__ - Step 11817: {'lr': 0.0004931922084912108, 'samples': 567216, 'steps': 11816, 'loss/train': 2.069981098175049} +07/25/2024 12:21:08 - INFO - __main__ - Step 11818: {'lr': 0.0004931909891634306, 'samples': 567264, 'steps': 11817, 'loss/train': 1.6943738460540771} +07/25/2024 12:21:08 - INFO - __main__ - Step 11819: {'lr': 0.0004931897697279724, 'samples': 567312, 'steps': 11818, 'loss/train': 1.6525410413742065} +07/25/2024 12:21:08 - INFO - __main__ - Step 11820: {'lr': 0.0004931885501848366, 'samples': 567360, 'steps': 11819, 'loss/train': 1.8670285940170288} +07/25/2024 12:21:09 - INFO - __main__ - Step 11821: {'lr': 0.0004931873305340236, 'samples': 567408, 'steps': 11820, 'loss/train': 1.8153473138809204} +07/25/2024 12:21:09 - INFO - __main__ - Step 11822: {'lr': 0.0004931861107755341, 'samples': 567456, 'steps': 11821, 'loss/train': 1.7094088792800903} +07/25/2024 12:21:09 - INFO - __main__ - Step 11823: {'lr': 0.0004931848909093685, 'samples': 567504, 'steps': 11822, 'loss/train': 1.591957688331604} +07/25/2024 12:21:09 - INFO - __main__ - Step 11824: {'lr': 0.0004931836709355276, 'samples': 567552, 'steps': 11823, 'loss/train': 2.363591432571411} +07/25/2024 12:21:10 - INFO - __main__ - Step 11825: {'lr': 0.0004931824508540118, 'samples': 567600, 'steps': 11824, 'loss/train': 1.9650899171829224} +07/25/2024 12:21:10 - INFO - __main__ - Step 11826: {'lr': 0.0004931812306648215, 'samples': 567648, 'steps': 11825, 'loss/train': 2.6532764434814453} +07/25/2024 12:21:10 - INFO - __main__ - Step 11827: {'lr': 0.0004931800103679575, 'samples': 567696, 'steps': 11826, 'loss/train': 0.1907958835363388} +07/25/2024 12:21:11 - INFO - __main__ - Step 11828: {'lr': 0.0004931787899634201, 'samples': 567744, 'steps': 11827, 'loss/train': 1.771510124206543} +07/25/2024 12:21:11 - INFO - __main__ - Step 11829: {'lr': 0.0004931775694512101, 'samples': 567792, 'steps': 11828, 'loss/train': 1.9091696739196777} +07/25/2024 12:21:11 - INFO - __main__ - Step 11830: {'lr': 0.0004931763488313278, 'samples': 567840, 'steps': 11829, 'loss/train': 1.9849740266799927} +07/25/2024 12:21:11 - INFO - __main__ - Step 11831: {'lr': 0.0004931751281037738, 'samples': 567888, 'steps': 11830, 'loss/train': 1.6069334745407104} +07/25/2024 12:21:12 - INFO - __main__ - Step 11832: {'lr': 0.0004931739072685486, 'samples': 567936, 'steps': 11831, 'loss/train': 1.7859140634536743} +07/25/2024 12:21:12 - INFO - __main__ - Step 11833: {'lr': 0.0004931726863256528, 'samples': 567984, 'steps': 11832, 'loss/train': 2.986663818359375} +07/25/2024 12:21:12 - INFO - __main__ - Step 11834: {'lr': 0.000493171465275087, 'samples': 568032, 'steps': 11833, 'loss/train': 1.6549816131591797} +07/25/2024 12:21:13 - INFO - __main__ - Step 11835: {'lr': 0.0004931702441168518, 'samples': 568080, 'steps': 11834, 'loss/train': 1.7365429401397705} +07/25/2024 12:21:13 - INFO - __main__ - Step 11836: {'lr': 0.0004931690228509475, 'samples': 568128, 'steps': 11835, 'loss/train': 0.6375657916069031} +07/25/2024 12:21:13 - INFO - __main__ - Step 11837: {'lr': 0.0004931678014773748, 'samples': 568176, 'steps': 11836, 'loss/train': 1.5446057319641113} +07/25/2024 12:21:13 - INFO - __main__ - Step 11838: {'lr': 0.0004931665799961342, 'samples': 568224, 'steps': 11837, 'loss/train': 2.128584623336792} +07/25/2024 12:21:14 - INFO - __main__ - Step 11839: {'lr': 0.0004931653584072264, 'samples': 568272, 'steps': 11838, 'loss/train': 1.7039545774459839} +07/25/2024 12:21:14 - INFO - __main__ - Step 11840: {'lr': 0.0004931641367106516, 'samples': 568320, 'steps': 11839, 'loss/train': 2.265246868133545} +07/25/2024 12:21:14 - INFO - __main__ - Step 11841: {'lr': 0.0004931629149064105, 'samples': 568368, 'steps': 11840, 'loss/train': 1.4277747869491577} +07/25/2024 12:21:15 - INFO - __main__ - Step 11842: {'lr': 0.0004931616929945039, 'samples': 568416, 'steps': 11841, 'loss/train': 1.9968312978744507} +07/25/2024 12:21:15 - INFO - __main__ - Step 11843: {'lr': 0.0004931604709749319, 'samples': 568464, 'steps': 11842, 'loss/train': 1.399852991104126} +07/25/2024 12:21:15 - INFO - __main__ - Step 11844: {'lr': 0.0004931592488476953, 'samples': 568512, 'steps': 11843, 'loss/train': 2.247061014175415} +07/25/2024 12:21:15 - INFO - __main__ - Step 11845: {'lr': 0.0004931580266127947, 'samples': 568560, 'steps': 11844, 'loss/train': 1.8067612648010254} +07/25/2024 12:21:16 - INFO - __main__ - Step 11846: {'lr': 0.0004931568042702305, 'samples': 568608, 'steps': 11845, 'loss/train': 0.5852899551391602} +07/25/2024 12:21:16 - INFO - __main__ - Step 11847: {'lr': 0.0004931555818200032, 'samples': 568656, 'steps': 11846, 'loss/train': 2.141728639602661} +07/25/2024 12:21:16 - INFO - __main__ - Step 11848: {'lr': 0.0004931543592621135, 'samples': 568704, 'steps': 11847, 'loss/train': 2.0022754669189453} +07/25/2024 12:21:16 - INFO - __main__ - Step 11849: {'lr': 0.0004931531365965618, 'samples': 568752, 'steps': 11848, 'loss/train': 2.199887275695801} +07/25/2024 12:21:17 - INFO - __main__ - Step 11850: {'lr': 0.0004931519138233487, 'samples': 568800, 'steps': 11849, 'loss/train': 2.773231029510498} +07/25/2024 12:21:17 - INFO - __main__ - Step 11851: {'lr': 0.0004931506909424749, 'samples': 568848, 'steps': 11850, 'loss/train': 0.14155739545822144} +07/25/2024 12:21:17 - INFO - __main__ - Step 11852: {'lr': 0.0004931494679539407, 'samples': 568896, 'steps': 11851, 'loss/train': 2.258336305618286} +07/25/2024 12:21:18 - INFO - __main__ - Step 11853: {'lr': 0.0004931482448577467, 'samples': 568944, 'steps': 11852, 'loss/train': 1.7386455535888672} +07/25/2024 12:21:18 - INFO - __main__ - Step 11854: {'lr': 0.0004931470216538935, 'samples': 568992, 'steps': 11853, 'loss/train': 1.9199339151382446} +07/25/2024 12:21:18 - INFO - __main__ - Step 11855: {'lr': 0.0004931457983423816, 'samples': 569040, 'steps': 11854, 'loss/train': 1.9380148649215698} +07/25/2024 12:21:18 - INFO - __main__ - Step 11856: {'lr': 0.0004931445749232115, 'samples': 569088, 'steps': 11855, 'loss/train': 1.5212552547454834} +07/25/2024 12:21:19 - INFO - __main__ - Step 11857: {'lr': 0.0004931433513963839, 'samples': 569136, 'steps': 11856, 'loss/train': 2.052586555480957} +07/25/2024 12:21:19 - INFO - __main__ - Step 11858: {'lr': 0.0004931421277618992, 'samples': 569184, 'steps': 11857, 'loss/train': 1.9647183418273926} +07/25/2024 12:21:19 - INFO - __main__ - Step 11859: {'lr': 0.0004931409040197579, 'samples': 569232, 'steps': 11858, 'loss/train': 1.95028555393219} +07/25/2024 12:21:20 - INFO - __main__ - Step 11860: {'lr': 0.0004931396801699607, 'samples': 569280, 'steps': 11859, 'loss/train': 1.648290753364563} +07/25/2024 12:21:20 - INFO - __main__ - Step 11861: {'lr': 0.0004931384562125079, 'samples': 569328, 'steps': 11860, 'loss/train': 1.8426978588104248} +07/25/2024 12:21:20 - INFO - __main__ - Step 11862: {'lr': 0.0004931372321474005, 'samples': 569376, 'steps': 11861, 'loss/train': 1.8684296607971191} +07/25/2024 12:21:20 - INFO - __main__ - Step 11863: {'lr': 0.0004931360079746386, 'samples': 569424, 'steps': 11862, 'loss/train': 1.4385402202606201} +07/25/2024 12:21:21 - INFO - __main__ - Step 11864: {'lr': 0.0004931347836942228, 'samples': 569472, 'steps': 11863, 'loss/train': 1.617484211921692} +07/25/2024 12:21:21 - INFO - __main__ - Step 11865: {'lr': 0.0004931335593061538, 'samples': 569520, 'steps': 11864, 'loss/train': 1.4083985090255737} +07/25/2024 12:21:21 - INFO - __main__ - Step 11866: {'lr': 0.0004931323348104321, 'samples': 569568, 'steps': 11865, 'loss/train': 0.3590013086795807} +07/25/2024 12:21:22 - INFO - __main__ - Step 11867: {'lr': 0.0004931311102070582, 'samples': 569616, 'steps': 11866, 'loss/train': 1.5307199954986572} +07/25/2024 12:21:22 - INFO - __main__ - Step 11868: {'lr': 0.0004931298854960326, 'samples': 569664, 'steps': 11867, 'loss/train': 1.7761638164520264} +07/25/2024 12:21:22 - INFO - __main__ - Step 11869: {'lr': 0.000493128660677356, 'samples': 569712, 'steps': 11868, 'loss/train': 2.066560983657837} +07/25/2024 12:21:22 - INFO - __main__ - Step 11870: {'lr': 0.0004931274357510288, 'samples': 569760, 'steps': 11869, 'loss/train': 0.1652727872133255} +07/25/2024 12:21:23 - INFO - __main__ - Step 11871: {'lr': 0.0004931262107170515, 'samples': 569808, 'steps': 11870, 'loss/train': 1.3768813610076904} +07/25/2024 12:21:23 - INFO - __main__ - Step 11872: {'lr': 0.0004931249855754248, 'samples': 569856, 'steps': 11871, 'loss/train': 2.126457691192627} +07/25/2024 12:21:23 - INFO - __main__ - Step 11873: {'lr': 0.0004931237603261493, 'samples': 569904, 'steps': 11872, 'loss/train': 2.0071542263031006} +07/25/2024 12:21:24 - INFO - __main__ - Step 11874: {'lr': 0.0004931225349692252, 'samples': 569952, 'steps': 11873, 'loss/train': 2.2878737449645996} +07/25/2024 12:21:24 - INFO - __main__ - Step 11875: {'lr': 0.0004931213095046534, 'samples': 570000, 'steps': 11874, 'loss/train': 0.13592098653316498} +07/25/2024 12:21:24 - INFO - __main__ - Step 11876: {'lr': 0.0004931200839324341, 'samples': 570048, 'steps': 11875, 'loss/train': 2.1331188678741455} +07/25/2024 12:21:24 - INFO - __main__ - Step 11877: {'lr': 0.0004931188582525683, 'samples': 570096, 'steps': 11876, 'loss/train': 1.89518404006958} +07/25/2024 12:21:25 - INFO - __main__ - Step 11878: {'lr': 0.0004931176324650562, 'samples': 570144, 'steps': 11877, 'loss/train': 1.801713466644287} +07/25/2024 12:21:25 - INFO - __main__ - Step 11879: {'lr': 0.0004931164065698983, 'samples': 570192, 'steps': 11878, 'loss/train': 1.4995367527008057} +07/25/2024 12:21:25 - INFO - __main__ - Step 11880: {'lr': 0.0004931151805670954, 'samples': 570240, 'steps': 11879, 'loss/train': 1.5632976293563843} +07/25/2024 12:21:26 - INFO - __main__ - Step 11881: {'lr': 0.0004931139544566477, 'samples': 570288, 'steps': 11880, 'loss/train': 1.7192825078964233} +07/25/2024 12:21:26 - INFO - __main__ - Step 11882: {'lr': 0.0004931127282385561, 'samples': 570336, 'steps': 11881, 'loss/train': 2.366957664489746} +07/25/2024 12:21:26 - INFO - __main__ - Step 11883: {'lr': 0.000493111501912821, 'samples': 570384, 'steps': 11882, 'loss/train': 1.2616355419158936} +07/25/2024 12:21:26 - INFO - __main__ - Step 11884: {'lr': 0.000493110275479443, 'samples': 570432, 'steps': 11883, 'loss/train': 1.2237881422042847} +07/25/2024 12:21:27 - INFO - __main__ - Step 11885: {'lr': 0.0004931090489384226, 'samples': 570480, 'steps': 11884, 'loss/train': 0.8977165222167969} +07/25/2024 12:21:27 - INFO - __main__ - Step 11886: {'lr': 0.0004931078222897602, 'samples': 570528, 'steps': 11885, 'loss/train': 2.0481600761413574} +07/25/2024 12:21:27 - INFO - __main__ - Step 11887: {'lr': 0.0004931065955334565, 'samples': 570576, 'steps': 11886, 'loss/train': 2.5280349254608154} +07/25/2024 12:21:28 - INFO - __main__ - Step 11888: {'lr': 0.0004931053686695121, 'samples': 570624, 'steps': 11887, 'loss/train': 1.3194924592971802} +07/25/2024 12:21:28 - INFO - __main__ - Step 11889: {'lr': 0.0004931041416979273, 'samples': 570672, 'steps': 11888, 'loss/train': 1.5146983861923218} +07/25/2024 12:21:28 - INFO - __main__ - Step 11890: {'lr': 0.000493102914618703, 'samples': 570720, 'steps': 11889, 'loss/train': 1.8391467332839966} +07/25/2024 12:21:28 - INFO - __main__ - Step 11891: {'lr': 0.0004931016874318393, 'samples': 570768, 'steps': 11890, 'loss/train': 2.365671396255493} +07/25/2024 12:21:29 - INFO - __main__ - Step 11892: {'lr': 0.0004931004601373371, 'samples': 570816, 'steps': 11891, 'loss/train': 1.7641297578811646} +07/25/2024 12:21:29 - INFO - __main__ - Step 11893: {'lr': 0.0004930992327351969, 'samples': 570864, 'steps': 11892, 'loss/train': 2.0266757011413574} +07/25/2024 12:21:29 - INFO - __main__ - Step 11894: {'lr': 0.0004930980052254191, 'samples': 570912, 'steps': 11893, 'loss/train': 0.14056596159934998} +07/25/2024 12:21:30 - INFO - __main__ - Step 11895: {'lr': 0.0004930967776080043, 'samples': 570960, 'steps': 11894, 'loss/train': 1.1101981401443481} +07/25/2024 12:21:30 - INFO - __main__ - Step 11896: {'lr': 0.0004930955498829531, 'samples': 571008, 'steps': 11895, 'loss/train': 2.51519775390625} +07/25/2024 12:21:30 - INFO - __main__ - Step 11897: {'lr': 0.0004930943220502659, 'samples': 571056, 'steps': 11896, 'loss/train': 1.7284971475601196} +07/25/2024 12:21:30 - INFO - __main__ - Step 11898: {'lr': 0.0004930930941099435, 'samples': 571104, 'steps': 11897, 'loss/train': 1.709783673286438} +07/25/2024 12:21:31 - INFO - __main__ - Step 11899: {'lr': 0.0004930918660619862, 'samples': 571152, 'steps': 11898, 'loss/train': 0.11098919808864594} +07/25/2024 12:21:31 - INFO - __main__ - Step 11900: {'lr': 0.0004930906379063947, 'samples': 571200, 'steps': 11899, 'loss/train': 2.1606791019439697} +07/25/2024 12:21:31 - INFO - __main__ - Step 11901: {'lr': 0.0004930894096431694, 'samples': 571248, 'steps': 11900, 'loss/train': 1.1796406507492065} +07/25/2024 12:21:32 - INFO - __main__ - Step 11902: {'lr': 0.0004930881812723109, 'samples': 571296, 'steps': 11901, 'loss/train': 1.4704265594482422} +07/25/2024 12:21:32 - INFO - __main__ - Step 11903: {'lr': 0.0004930869527938199, 'samples': 571344, 'steps': 11902, 'loss/train': 1.625558614730835} +07/25/2024 12:21:32 - INFO - __main__ - Step 11904: {'lr': 0.0004930857242076966, 'samples': 571392, 'steps': 11903, 'loss/train': 1.5868922472000122} +07/25/2024 12:21:32 - INFO - __main__ - Step 11905: {'lr': 0.0004930844955139419, 'samples': 571440, 'steps': 11904, 'loss/train': 2.202064037322998} +07/25/2024 12:21:33 - INFO - __main__ - Step 11906: {'lr': 0.0004930832667125562, 'samples': 571488, 'steps': 11905, 'loss/train': 1.6347852945327759} +07/25/2024 12:21:33 - INFO - __main__ - Step 11907: {'lr': 0.00049308203780354, 'samples': 571536, 'steps': 11906, 'loss/train': 1.7269142866134644} +07/25/2024 12:21:33 - INFO - __main__ - Step 11908: {'lr': 0.0004930808087868939, 'samples': 571584, 'steps': 11907, 'loss/train': 1.6927214860916138} +07/25/2024 12:21:34 - INFO - __main__ - Step 11909: {'lr': 0.0004930795796626184, 'samples': 571632, 'steps': 11908, 'loss/train': 1.725147008895874} +07/25/2024 12:21:34 - INFO - __main__ - Step 11910: {'lr': 0.0004930783504307142, 'samples': 571680, 'steps': 11909, 'loss/train': 2.1616179943084717} +07/25/2024 12:21:34 - INFO - __main__ - Step 11911: {'lr': 0.0004930771210911815, 'samples': 571728, 'steps': 11910, 'loss/train': 1.436301350593567} +07/25/2024 12:21:34 - INFO - __main__ - Step 11912: {'lr': 0.0004930758916440212, 'samples': 571776, 'steps': 11911, 'loss/train': 1.8078227043151855} +07/25/2024 12:21:35 - INFO - __main__ - Step 11913: {'lr': 0.0004930746620892337, 'samples': 571824, 'steps': 11912, 'loss/train': 1.6380969285964966} +07/25/2024 12:21:35 - INFO - __main__ - Step 11914: {'lr': 0.0004930734324268194, 'samples': 571872, 'steps': 11913, 'loss/train': 1.8773393630981445} +07/25/2024 12:21:35 - INFO - __main__ - Step 11915: {'lr': 0.0004930722026567792, 'samples': 571920, 'steps': 11914, 'loss/train': 1.6529624462127686} +07/25/2024 12:21:36 - INFO - __main__ - Step 11916: {'lr': 0.0004930709727791134, 'samples': 571968, 'steps': 11915, 'loss/train': 2.246830463409424} +07/25/2024 12:21:36 - INFO - __main__ - Step 11917: {'lr': 0.0004930697427938226, 'samples': 572016, 'steps': 11916, 'loss/train': 1.9523487091064453} +07/25/2024 12:21:36 - INFO - __main__ - Step 11918: {'lr': 0.0004930685127009073, 'samples': 572064, 'steps': 11917, 'loss/train': 0.09495527297258377} +07/25/2024 12:21:36 - INFO - __main__ - Step 11919: {'lr': 0.000493067282500368, 'samples': 572112, 'steps': 11918, 'loss/train': 1.629539132118225} +07/25/2024 12:21:37 - INFO - __main__ - Step 11920: {'lr': 0.0004930660521922053, 'samples': 572160, 'steps': 11919, 'loss/train': 2.2054007053375244} +07/25/2024 12:21:37 - INFO - __main__ - Step 11921: {'lr': 0.0004930648217764199, 'samples': 572208, 'steps': 11920, 'loss/train': 1.322874903678894} +07/25/2024 12:21:37 - INFO - __main__ - Step 11922: {'lr': 0.0004930635912530121, 'samples': 572256, 'steps': 11921, 'loss/train': 2.2246696949005127} +07/25/2024 12:21:38 - INFO - __main__ - Step 11923: {'lr': 0.0004930623606219826, 'samples': 572304, 'steps': 11922, 'loss/train': 0.1452205628156662} +07/25/2024 12:21:38 - INFO - __main__ - Step 11924: {'lr': 0.0004930611298833319, 'samples': 572352, 'steps': 11923, 'loss/train': 2.131986379623413} +07/25/2024 12:21:38 - INFO - __main__ - Step 11925: {'lr': 0.0004930598990370605, 'samples': 572400, 'steps': 11924, 'loss/train': 0.9769951701164246} +07/25/2024 12:21:38 - INFO - __main__ - Step 11926: {'lr': 0.000493058668083169, 'samples': 572448, 'steps': 11925, 'loss/train': 1.565988302230835} +07/25/2024 12:21:39 - INFO - __main__ - Step 11927: {'lr': 0.000493057437021658, 'samples': 572496, 'steps': 11926, 'loss/train': 1.590244174003601} +07/25/2024 12:21:39 - INFO - __main__ - Step 11928: {'lr': 0.0004930562058525279, 'samples': 572544, 'steps': 11927, 'loss/train': 0.7961946129798889} +07/25/2024 12:21:39 - INFO - __main__ - Step 11929: {'lr': 0.0004930549745757794, 'samples': 572592, 'steps': 11928, 'loss/train': 0.8754050135612488} +07/25/2024 12:21:39 - INFO - __main__ - Step 11930: {'lr': 0.0004930537431914128, 'samples': 572640, 'steps': 11929, 'loss/train': 1.6974396705627441} +07/25/2024 12:21:40 - INFO - __main__ - Step 11931: {'lr': 0.000493052511699429, 'samples': 572688, 'steps': 11930, 'loss/train': 2.4639697074890137} +07/25/2024 12:21:40 - INFO - __main__ - Step 11932: {'lr': 0.0004930512800998283, 'samples': 572736, 'steps': 11931, 'loss/train': 2.9791653156280518} +07/25/2024 12:21:40 - INFO - __main__ - Step 11933: {'lr': 0.0004930500483926113, 'samples': 572784, 'steps': 11932, 'loss/train': 1.4071052074432373} +07/25/2024 12:21:41 - INFO - __main__ - Step 11934: {'lr': 0.0004930488165777785, 'samples': 572832, 'steps': 11933, 'loss/train': 1.8219366073608398} +07/25/2024 12:21:41 - INFO - __main__ - Step 11935: {'lr': 0.0004930475846553306, 'samples': 572880, 'steps': 11934, 'loss/train': 2.4818003177642822} +07/25/2024 12:21:41 - INFO - __main__ - Step 11936: {'lr': 0.0004930463526252679, 'samples': 572928, 'steps': 11935, 'loss/train': 1.6611989736557007} +07/25/2024 12:21:41 - INFO - __main__ - Step 11937: {'lr': 0.0004930451204875912, 'samples': 572976, 'steps': 11936, 'loss/train': 1.8676385879516602} +07/25/2024 12:21:42 - INFO - __main__ - Step 11938: {'lr': 0.0004930438882423009, 'samples': 573024, 'steps': 11937, 'loss/train': 1.569215178489685} +07/25/2024 12:21:42 - INFO - __main__ - Step 11939: {'lr': 0.0004930426558893975, 'samples': 573072, 'steps': 11938, 'loss/train': 1.6713297367095947} +07/25/2024 12:21:42 - INFO - __main__ - Step 11940: {'lr': 0.0004930414234288817, 'samples': 573120, 'steps': 11939, 'loss/train': 2.2268056869506836} +07/25/2024 12:21:43 - INFO - __main__ - Step 11941: {'lr': 0.000493040190860754, 'samples': 573168, 'steps': 11940, 'loss/train': 1.9430066347122192} +07/25/2024 12:21:43 - INFO - __main__ - Step 11942: {'lr': 0.0004930389581850148, 'samples': 573216, 'steps': 11941, 'loss/train': 0.22876881062984467} +07/25/2024 12:21:43 - INFO - __main__ - Step 11943: {'lr': 0.0004930377254016649, 'samples': 573264, 'steps': 11942, 'loss/train': 1.9475594758987427} +07/25/2024 12:21:43 - INFO - __main__ - Step 11944: {'lr': 0.0004930364925107046, 'samples': 573312, 'steps': 11943, 'loss/train': 2.0470151901245117} +07/25/2024 12:21:44 - INFO - __main__ - Step 11945: {'lr': 0.0004930352595121346, 'samples': 573360, 'steps': 11944, 'loss/train': 1.7956453561782837} +07/25/2024 12:21:44 - INFO - __main__ - Step 11946: {'lr': 0.0004930340264059554, 'samples': 573408, 'steps': 11945, 'loss/train': 1.4172043800354004} +07/25/2024 12:21:44 - INFO - __main__ - Step 11947: {'lr': 0.0004930327931921676, 'samples': 573456, 'steps': 11946, 'loss/train': 0.11795798689126968} +07/25/2024 12:21:45 - INFO - __main__ - Step 11948: {'lr': 0.0004930315598707715, 'samples': 573504, 'steps': 11947, 'loss/train': 0.8977493643760681} +07/25/2024 12:21:45 - INFO - __main__ - Step 11949: {'lr': 0.000493030326441768, 'samples': 573552, 'steps': 11948, 'loss/train': 1.4115469455718994} +07/25/2024 12:21:45 - INFO - __main__ - Step 11950: {'lr': 0.0004930290929051574, 'samples': 573600, 'steps': 11949, 'loss/train': 1.393617868423462} +07/25/2024 12:21:45 - INFO - __main__ - Step 11951: {'lr': 0.0004930278592609403, 'samples': 573648, 'steps': 11950, 'loss/train': 1.576287031173706} +07/25/2024 12:21:46 - INFO - __main__ - Step 11952: {'lr': 0.0004930266255091174, 'samples': 573696, 'steps': 11951, 'loss/train': 1.6424425840377808} +07/25/2024 12:21:46 - INFO - __main__ - Step 11953: {'lr': 0.000493025391649689, 'samples': 573744, 'steps': 11952, 'loss/train': 1.3047834634780884} +07/25/2024 12:21:46 - INFO - __main__ - Step 11954: {'lr': 0.0004930241576826559, 'samples': 573792, 'steps': 11953, 'loss/train': 1.4990590810775757} +07/25/2024 12:21:47 - INFO - __main__ - Step 11955: {'lr': 0.0004930229236080184, 'samples': 573840, 'steps': 11954, 'loss/train': 1.9888378381729126} +07/25/2024 12:21:47 - INFO - __main__ - Step 11956: {'lr': 0.0004930216894257772, 'samples': 573888, 'steps': 11955, 'loss/train': 1.491159439086914} +07/25/2024 12:21:47 - INFO - __main__ - Step 11957: {'lr': 0.0004930204551359328, 'samples': 573936, 'steps': 11956, 'loss/train': 1.8909212350845337} +07/25/2024 12:21:47 - INFO - __main__ - Step 11958: {'lr': 0.0004930192207384857, 'samples': 573984, 'steps': 11957, 'loss/train': 2.20792293548584} +07/25/2024 12:21:48 - INFO - __main__ - Step 11959: {'lr': 0.0004930179862334365, 'samples': 574032, 'steps': 11958, 'loss/train': 1.9696615934371948} +07/25/2024 12:21:48 - INFO - __main__ - Step 11960: {'lr': 0.0004930167516207859, 'samples': 574080, 'steps': 11959, 'loss/train': 1.5529690980911255} +07/25/2024 12:21:48 - INFO - __main__ - Step 11961: {'lr': 0.0004930155169005342, 'samples': 574128, 'steps': 11960, 'loss/train': 2.1296205520629883} +07/25/2024 12:21:49 - INFO - __main__ - Step 11962: {'lr': 0.000493014282072682, 'samples': 574176, 'steps': 11961, 'loss/train': 1.5612599849700928} +07/25/2024 12:21:49 - INFO - __main__ - Step 11963: {'lr': 0.0004930130471372298, 'samples': 574224, 'steps': 11962, 'loss/train': 1.4808900356292725} +07/25/2024 12:21:49 - INFO - __main__ - Step 11964: {'lr': 0.0004930118120941784, 'samples': 574272, 'steps': 11963, 'loss/train': 1.643744945526123} +07/25/2024 12:21:49 - INFO - __main__ - Step 11965: {'lr': 0.0004930105769435281, 'samples': 574320, 'steps': 11964, 'loss/train': 1.6203289031982422} +07/25/2024 12:21:50 - INFO - __main__ - Step 11966: {'lr': 0.0004930093416852796, 'samples': 574368, 'steps': 11965, 'loss/train': 0.2235625833272934} +07/25/2024 12:21:50 - INFO - __main__ - Step 11967: {'lr': 0.0004930081063194333, 'samples': 574416, 'steps': 11966, 'loss/train': 1.5565465688705444} +07/25/2024 12:21:50 - INFO - __main__ - Step 11968: {'lr': 0.0004930068708459898, 'samples': 574464, 'steps': 11967, 'loss/train': 1.8697184324264526} +07/25/2024 12:21:51 - INFO - __main__ - Step 11969: {'lr': 0.0004930056352649497, 'samples': 574512, 'steps': 11968, 'loss/train': 2.122079610824585} +07/25/2024 12:21:51 - INFO - __main__ - Step 11970: {'lr': 0.0004930043995763135, 'samples': 574560, 'steps': 11969, 'loss/train': 1.8275068998336792} +07/25/2024 12:21:51 - INFO - __main__ - Step 11971: {'lr': 0.0004930031637800817, 'samples': 574608, 'steps': 11970, 'loss/train': 0.20211344957351685} +07/25/2024 12:21:51 - INFO - __main__ - Step 11972: {'lr': 0.000493001927876255, 'samples': 574656, 'steps': 11971, 'loss/train': 1.2683563232421875} +07/25/2024 12:21:52 - INFO - __main__ - Step 11973: {'lr': 0.0004930006918648338, 'samples': 574704, 'steps': 11972, 'loss/train': 1.4918755292892456} +07/25/2024 12:21:52 - INFO - __main__ - Step 11974: {'lr': 0.0004929994557458187, 'samples': 574752, 'steps': 11973, 'loss/train': 1.998854160308838} +07/25/2024 12:21:52 - INFO - __main__ - Step 11975: {'lr': 0.0004929982195192103, 'samples': 574800, 'steps': 11974, 'loss/train': 0.589489221572876} +07/25/2024 12:21:53 - INFO - __main__ - Step 11976: {'lr': 0.0004929969831850092, 'samples': 574848, 'steps': 11975, 'loss/train': 1.660925030708313} +07/25/2024 12:21:53 - INFO - __main__ - Step 11977: {'lr': 0.0004929957467432156, 'samples': 574896, 'steps': 11976, 'loss/train': 1.7856621742248535} +07/25/2024 12:21:53 - INFO - __main__ - Step 11978: {'lr': 0.0004929945101938305, 'samples': 574944, 'steps': 11977, 'loss/train': 2.315364360809326} +07/25/2024 12:21:53 - INFO - __main__ - Step 11979: {'lr': 0.0004929932735368541, 'samples': 574992, 'steps': 11978, 'loss/train': 1.673525094985962} +07/25/2024 12:21:54 - INFO - __main__ - Step 11980: {'lr': 0.0004929920367722872, 'samples': 575040, 'steps': 11979, 'loss/train': 1.5594096183776855} +07/25/2024 12:21:54 - INFO - __main__ - Step 11981: {'lr': 0.0004929907999001302, 'samples': 575088, 'steps': 11980, 'loss/train': 2.015855312347412} +07/25/2024 12:21:54 - INFO - __main__ - Step 11982: {'lr': 0.0004929895629203837, 'samples': 575136, 'steps': 11981, 'loss/train': 1.430886149406433} +07/25/2024 12:21:55 - INFO - __main__ - Step 11983: {'lr': 0.0004929883258330481, 'samples': 575184, 'steps': 11982, 'loss/train': 2.233525276184082} +07/25/2024 12:21:55 - INFO - __main__ - Step 11984: {'lr': 0.0004929870886381243, 'samples': 575232, 'steps': 11983, 'loss/train': 1.5048013925552368} +07/25/2024 12:21:55 - INFO - __main__ - Step 11985: {'lr': 0.0004929858513356124, 'samples': 575280, 'steps': 11984, 'loss/train': 1.7959156036376953} +07/25/2024 12:21:55 - INFO - __main__ - Step 11986: {'lr': 0.0004929846139255133, 'samples': 575328, 'steps': 11985, 'loss/train': 1.5760393142700195} +07/25/2024 12:21:56 - INFO - __main__ - Step 11987: {'lr': 0.0004929833764078273, 'samples': 575376, 'steps': 11986, 'loss/train': 2.2368175983428955} +07/25/2024 12:21:56 - INFO - __main__ - Step 11988: {'lr': 0.0004929821387825553, 'samples': 575424, 'steps': 11987, 'loss/train': 1.6338831186294556} +07/25/2024 12:21:56 - INFO - __main__ - Step 11989: {'lr': 0.0004929809010496974, 'samples': 575472, 'steps': 11988, 'loss/train': 2.191838026046753} +07/25/2024 12:21:57 - INFO - __main__ - Step 11990: {'lr': 0.0004929796632092545, 'samples': 575520, 'steps': 11989, 'loss/train': 0.1235431507229805} +07/25/2024 12:21:57 - INFO - __main__ - Step 11991: {'lr': 0.000492978425261227, 'samples': 575568, 'steps': 11990, 'loss/train': 0.3722284138202667} +07/25/2024 12:21:57 - INFO - __main__ - Step 11992: {'lr': 0.0004929771872056154, 'samples': 575616, 'steps': 11991, 'loss/train': 2.144763708114624} +07/25/2024 12:21:57 - INFO - __main__ - Step 11993: {'lr': 0.0004929759490424203, 'samples': 575664, 'steps': 11992, 'loss/train': 1.901485562324524} +07/25/2024 12:21:58 - INFO - __main__ - Step 11994: {'lr': 0.0004929747107716423, 'samples': 575712, 'steps': 11993, 'loss/train': 1.9186129570007324} +07/25/2024 12:21:58 - INFO - __main__ - Step 11995: {'lr': 0.0004929734723932818, 'samples': 575760, 'steps': 11994, 'loss/train': 0.1818327009677887} +07/25/2024 12:21:58 - INFO - __main__ - Step 11996: {'lr': 0.0004929722339073396, 'samples': 575808, 'steps': 11995, 'loss/train': 1.8706516027450562} +07/25/2024 12:21:59 - INFO - __main__ - Step 11997: {'lr': 0.0004929709953138161, 'samples': 575856, 'steps': 11996, 'loss/train': 1.2489113807678223} +07/25/2024 12:21:59 - INFO - __main__ - Step 11998: {'lr': 0.0004929697566127117, 'samples': 575904, 'steps': 11997, 'loss/train': 2.0468997955322266} +07/25/2024 12:21:59 - INFO - __main__ - Step 11999: {'lr': 0.0004929685178040273, 'samples': 575952, 'steps': 11998, 'loss/train': 1.3820785284042358} +07/25/2024 12:21:59 - INFO - __main__ - Step 12000: {'lr': 0.000492967278887763, 'samples': 576000, 'steps': 11999, 'loss/train': 1.5636752843856812} +07/25/2024 12:22:00 - INFO - __main__ - Step 12001: {'lr': 0.0004929660398639198, 'samples': 576048, 'steps': 12000, 'loss/train': 1.6362295150756836} +07/25/2024 12:22:00 - INFO - __main__ - Step 12002: {'lr': 0.000492964800732498, 'samples': 576096, 'steps': 12001, 'loss/train': 1.753753662109375} +07/25/2024 12:22:00 - INFO - __main__ - Step 12003: {'lr': 0.0004929635614934982, 'samples': 576144, 'steps': 12002, 'loss/train': 1.3954979181289673} +07/25/2024 12:22:01 - INFO - __main__ - Step 12004: {'lr': 0.0004929623221469209, 'samples': 576192, 'steps': 12003, 'loss/train': 1.2736459970474243} +07/25/2024 12:22:01 - INFO - __main__ - Step 12005: {'lr': 0.0004929610826927668, 'samples': 576240, 'steps': 12004, 'loss/train': 1.9579249620437622} +07/25/2024 12:22:01 - INFO - __main__ - Step 12006: {'lr': 0.0004929598431310362, 'samples': 576288, 'steps': 12005, 'loss/train': 2.1736903190612793} +07/25/2024 12:22:01 - INFO - __main__ - Step 12007: {'lr': 0.0004929586034617299, 'samples': 576336, 'steps': 12006, 'loss/train': 1.3203102350234985} +07/25/2024 12:22:02 - INFO - __main__ - Step 12008: {'lr': 0.0004929573636848482, 'samples': 576384, 'steps': 12007, 'loss/train': 1.6980676651000977} +07/25/2024 12:22:02 - INFO - __main__ - Step 12009: {'lr': 0.0004929561238003919, 'samples': 576432, 'steps': 12008, 'loss/train': 2.052063465118408} +07/25/2024 12:22:02 - INFO - __main__ - Step 12010: {'lr': 0.0004929548838083614, 'samples': 576480, 'steps': 12009, 'loss/train': 2.0429675579071045} +07/25/2024 12:22:03 - INFO - __main__ - Step 12011: {'lr': 0.0004929536437087573, 'samples': 576528, 'steps': 12010, 'loss/train': 2.1469991207122803} +07/25/2024 12:22:03 - INFO - __main__ - Step 12012: {'lr': 0.0004929524035015801, 'samples': 576576, 'steps': 12011, 'loss/train': 1.6213676929473877} +07/25/2024 12:22:03 - INFO - __main__ - Step 12013: {'lr': 0.0004929511631868304, 'samples': 576624, 'steps': 12012, 'loss/train': 2.1852471828460693} +07/25/2024 12:22:03 - INFO - __main__ - Step 12014: {'lr': 0.0004929499227645088, 'samples': 576672, 'steps': 12013, 'loss/train': 0.09276222437620163} +07/25/2024 12:22:04 - INFO - __main__ - Step 12015: {'lr': 0.0004929486822346158, 'samples': 576720, 'steps': 12014, 'loss/train': 1.768279790878296} +07/25/2024 12:22:04 - INFO - __main__ - Step 12016: {'lr': 0.0004929474415971517, 'samples': 576768, 'steps': 12015, 'loss/train': 2.183974027633667} +07/25/2024 12:22:04 - INFO - __main__ - Step 12017: {'lr': 0.0004929462008521175, 'samples': 576816, 'steps': 12016, 'loss/train': 1.7092790603637695} +07/25/2024 12:22:05 - INFO - __main__ - Step 12018: {'lr': 0.0004929449599995135, 'samples': 576864, 'steps': 12017, 'loss/train': 1.669635534286499} +07/25/2024 12:22:05 - INFO - __main__ - Step 12019: {'lr': 0.0004929437190393402, 'samples': 576912, 'steps': 12018, 'loss/train': 0.34090670943260193} +07/25/2024 12:22:05 - INFO - __main__ - Step 12020: {'lr': 0.0004929424779715983, 'samples': 576960, 'steps': 12019, 'loss/train': 1.5148403644561768} +07/25/2024 12:22:05 - INFO - __main__ - Step 12021: {'lr': 0.0004929412367962882, 'samples': 577008, 'steps': 12020, 'loss/train': 2.136350631713867} +07/25/2024 12:22:06 - INFO - __main__ - Step 12022: {'lr': 0.0004929399955134106, 'samples': 577056, 'steps': 12021, 'loss/train': 1.7986154556274414} +07/25/2024 12:22:06 - INFO - __main__ - Step 12023: {'lr': 0.000492938754122966, 'samples': 577104, 'steps': 12022, 'loss/train': 1.5889694690704346} +07/25/2024 12:22:06 - INFO - __main__ - Step 12024: {'lr': 0.0004929375126249549, 'samples': 577152, 'steps': 12023, 'loss/train': 1.9381177425384521} +07/25/2024 12:22:06 - INFO - __main__ - Step 12025: {'lr': 0.0004929362710193779, 'samples': 577200, 'steps': 12024, 'loss/train': 1.8990727663040161} +07/25/2024 12:22:07 - INFO - __main__ - Step 12026: {'lr': 0.0004929350293062354, 'samples': 577248, 'steps': 12025, 'loss/train': 1.7097111940383911} +07/25/2024 12:22:07 - INFO - __main__ - Step 12027: {'lr': 0.0004929337874855282, 'samples': 577296, 'steps': 12026, 'loss/train': 1.6107348203659058} +07/25/2024 12:22:07 - INFO - __main__ - Step 12028: {'lr': 0.0004929325455572567, 'samples': 577344, 'steps': 12027, 'loss/train': 1.2921065092086792} +07/25/2024 12:22:08 - INFO - __main__ - Step 12029: {'lr': 0.0004929313035214214, 'samples': 577392, 'steps': 12028, 'loss/train': 1.2187689542770386} +07/25/2024 12:22:08 - INFO - __main__ - Step 12030: {'lr': 0.000492930061378023, 'samples': 577440, 'steps': 12029, 'loss/train': 1.6710485219955444} +07/25/2024 12:22:08 - INFO - __main__ - Step 12031: {'lr': 0.000492928819127062, 'samples': 577488, 'steps': 12030, 'loss/train': 1.4133168458938599} +07/25/2024 12:22:09 - INFO - __main__ - Step 12032: {'lr': 0.0004929275767685388, 'samples': 577536, 'steps': 12031, 'loss/train': 2.0462403297424316} +07/25/2024 12:22:09 - INFO - __main__ - Step 12033: {'lr': 0.0004929263343024542, 'samples': 577584, 'steps': 12032, 'loss/train': 2.0905508995056152} +07/25/2024 12:22:09 - INFO - __main__ - Step 12034: {'lr': 0.0004929250917288086, 'samples': 577632, 'steps': 12033, 'loss/train': 1.8472139835357666} +07/25/2024 12:22:09 - INFO - __main__ - Step 12035: {'lr': 0.0004929238490476026, 'samples': 577680, 'steps': 12034, 'loss/train': 1.8412795066833496} +07/25/2024 12:22:10 - INFO - __main__ - Step 12036: {'lr': 0.0004929226062588367, 'samples': 577728, 'steps': 12035, 'loss/train': 1.7184224128723145} +07/25/2024 12:22:10 - INFO - __main__ - Step 12037: {'lr': 0.0004929213633625114, 'samples': 577776, 'steps': 12036, 'loss/train': 2.1850554943084717} +07/25/2024 12:22:10 - INFO - __main__ - Step 12038: {'lr': 0.0004929201203586275, 'samples': 577824, 'steps': 12037, 'loss/train': 0.07922831922769547} +07/25/2024 12:22:10 - INFO - __main__ - Step 12039: {'lr': 0.0004929188772471852, 'samples': 577872, 'steps': 12038, 'loss/train': 1.7831590175628662} +07/25/2024 12:22:11 - INFO - __main__ - Step 12040: {'lr': 0.0004929176340281853, 'samples': 577920, 'steps': 12039, 'loss/train': 1.5438342094421387} +07/25/2024 12:22:11 - INFO - __main__ - Step 12041: {'lr': 0.0004929163907016283, 'samples': 577968, 'steps': 12040, 'loss/train': 2.156270980834961} +07/25/2024 12:22:11 - INFO - __main__ - Step 12042: {'lr': 0.0004929151472675147, 'samples': 578016, 'steps': 12041, 'loss/train': 1.0016462802886963} +07/25/2024 12:22:12 - INFO - __main__ - Step 12043: {'lr': 0.0004929139037258451, 'samples': 578064, 'steps': 12042, 'loss/train': 2.4812419414520264} +07/25/2024 12:22:12 - INFO - __main__ - Step 12044: {'lr': 0.00049291266007662, 'samples': 578112, 'steps': 12043, 'loss/train': 1.7802659273147583} +07/25/2024 12:22:12 - INFO - __main__ - Step 12045: {'lr': 0.00049291141631984, 'samples': 578160, 'steps': 12044, 'loss/train': 1.8389408588409424} +07/25/2024 12:22:12 - INFO - __main__ - Step 12046: {'lr': 0.0004929101724555057, 'samples': 578208, 'steps': 12045, 'loss/train': 1.2371246814727783} +07/25/2024 12:22:13 - INFO - __main__ - Step 12047: {'lr': 0.0004929089284836175, 'samples': 578256, 'steps': 12046, 'loss/train': 1.4069538116455078} +07/25/2024 12:22:13 - INFO - __main__ - Step 12048: {'lr': 0.000492907684404176, 'samples': 578304, 'steps': 12047, 'loss/train': 2.138291120529175} +07/25/2024 12:22:13 - INFO - __main__ - Step 12049: {'lr': 0.0004929064402171818, 'samples': 578352, 'steps': 12048, 'loss/train': 1.3843064308166504} +07/25/2024 12:22:14 - INFO - __main__ - Step 12050: {'lr': 0.0004929051959226355, 'samples': 578400, 'steps': 12049, 'loss/train': 1.7052267789840698} +07/25/2024 12:22:14 - INFO - __main__ - Step 12051: {'lr': 0.0004929039515205374, 'samples': 578448, 'steps': 12050, 'loss/train': 1.435092806816101} +07/25/2024 12:22:14 - INFO - __main__ - Step 12052: {'lr': 0.0004929027070108885, 'samples': 578496, 'steps': 12051, 'loss/train': 1.8034275770187378} +07/25/2024 12:22:14 - INFO - __main__ - Step 12053: {'lr': 0.0004929014623936889, 'samples': 578544, 'steps': 12052, 'loss/train': 1.4120885133743286} +07/25/2024 12:22:15 - INFO - __main__ - Step 12054: {'lr': 0.0004929002176689394, 'samples': 578592, 'steps': 12053, 'loss/train': 2.1006288528442383} +07/25/2024 12:22:15 - INFO - __main__ - Step 12055: {'lr': 0.0004928989728366405, 'samples': 578640, 'steps': 12054, 'loss/train': 1.7702614068984985} +07/25/2024 12:22:15 - INFO - __main__ - Step 12056: {'lr': 0.0004928977278967928, 'samples': 578688, 'steps': 12055, 'loss/train': 2.1263020038604736} +07/25/2024 12:22:16 - INFO - __main__ - Step 12057: {'lr': 0.0004928964828493967, 'samples': 578736, 'steps': 12056, 'loss/train': 1.574865698814392} +07/25/2024 12:22:16 - INFO - __main__ - Step 12058: {'lr': 0.0004928952376944529, 'samples': 578784, 'steps': 12057, 'loss/train': 2.050424814224243} +07/25/2024 12:22:16 - INFO - __main__ - Step 12059: {'lr': 0.0004928939924319619, 'samples': 578832, 'steps': 12058, 'loss/train': 1.0702751874923706} +07/25/2024 12:22:16 - INFO - __main__ - Step 12060: {'lr': 0.0004928927470619242, 'samples': 578880, 'steps': 12059, 'loss/train': 1.602980375289917} +07/25/2024 12:22:17 - INFO - __main__ - Step 12061: {'lr': 0.0004928915015843403, 'samples': 578928, 'steps': 12060, 'loss/train': 1.6248760223388672} +07/25/2024 12:22:17 - INFO - __main__ - Step 12062: {'lr': 0.000492890255999211, 'samples': 578976, 'steps': 12061, 'loss/train': 0.0825016126036644} +07/25/2024 12:22:17 - INFO - __main__ - Step 12063: {'lr': 0.0004928890103065367, 'samples': 579024, 'steps': 12062, 'loss/train': 1.5963776111602783} +07/25/2024 12:22:18 - INFO - __main__ - Step 12064: {'lr': 0.0004928877645063179, 'samples': 579072, 'steps': 12063, 'loss/train': 1.6308212280273438} +07/25/2024 12:22:18 - INFO - __main__ - Step 12065: {'lr': 0.0004928865185985553, 'samples': 579120, 'steps': 12064, 'loss/train': 1.819576621055603} +07/25/2024 12:22:18 - INFO - __main__ - Step 12066: {'lr': 0.0004928852725832492, 'samples': 579168, 'steps': 12065, 'loss/train': 1.5756199359893799} +07/25/2024 12:22:18 - INFO - __main__ - Step 12067: {'lr': 0.0004928840264604005, 'samples': 579216, 'steps': 12066, 'loss/train': 2.2686264514923096} +07/25/2024 12:22:19 - INFO - __main__ - Step 12068: {'lr': 0.0004928827802300093, 'samples': 579264, 'steps': 12067, 'loss/train': 1.9420439004898071} +07/25/2024 12:22:19 - INFO - __main__ - Step 12069: {'lr': 0.0004928815338920766, 'samples': 579312, 'steps': 12068, 'loss/train': 1.1819835901260376} +07/25/2024 12:22:19 - INFO - __main__ - Step 12070: {'lr': 0.0004928802874466027, 'samples': 579360, 'steps': 12069, 'loss/train': 2.073930025100708} +07/25/2024 12:22:20 - INFO - __main__ - Step 12071: {'lr': 0.0004928790408935882, 'samples': 579408, 'steps': 12070, 'loss/train': 1.5176670551300049} +07/25/2024 12:22:20 - INFO - __main__ - Step 12072: {'lr': 0.0004928777942330337, 'samples': 579456, 'steps': 12071, 'loss/train': 2.0164403915405273} +07/25/2024 12:22:20 - INFO - __main__ - Step 12073: {'lr': 0.0004928765474649398, 'samples': 579504, 'steps': 12072, 'loss/train': 1.731400489807129} +07/25/2024 12:22:20 - INFO - __main__ - Step 12074: {'lr': 0.0004928753005893068, 'samples': 579552, 'steps': 12073, 'loss/train': 1.968339204788208} +07/25/2024 12:22:21 - INFO - __main__ - Step 12075: {'lr': 0.0004928740536061355, 'samples': 579600, 'steps': 12074, 'loss/train': 1.6529797315597534} +07/25/2024 12:22:21 - INFO - __main__ - Step 12076: {'lr': 0.0004928728065154264, 'samples': 579648, 'steps': 12075, 'loss/train': 0.4088500440120697} +07/25/2024 12:22:21 - INFO - __main__ - Step 12077: {'lr': 0.00049287155931718, 'samples': 579696, 'steps': 12076, 'loss/train': 1.3614503145217896} +07/25/2024 12:22:22 - INFO - __main__ - Step 12078: {'lr': 0.0004928703120113968, 'samples': 579744, 'steps': 12077, 'loss/train': 1.7196789979934692} +07/25/2024 12:22:22 - INFO - __main__ - Step 12079: {'lr': 0.0004928690645980776, 'samples': 579792, 'steps': 12078, 'loss/train': 1.7479921579360962} +07/25/2024 12:22:22 - INFO - __main__ - Step 12080: {'lr': 0.0004928678170772226, 'samples': 579840, 'steps': 12079, 'loss/train': 1.9958745241165161} +07/25/2024 12:22:22 - INFO - __main__ - Step 12081: {'lr': 0.0004928665694488327, 'samples': 579888, 'steps': 12080, 'loss/train': 2.086818218231201} +07/25/2024 12:22:23 - INFO - __main__ - Step 12082: {'lr': 0.0004928653217129083, 'samples': 579936, 'steps': 12081, 'loss/train': 1.8409996032714844} +07/25/2024 12:22:23 - INFO - __main__ - Step 12083: {'lr': 0.0004928640738694498, 'samples': 579984, 'steps': 12082, 'loss/train': 1.6080279350280762} +07/25/2024 12:22:23 - INFO - __main__ - Step 12084: {'lr': 0.0004928628259184579, 'samples': 580032, 'steps': 12083, 'loss/train': 2.146764039993286} +07/25/2024 12:22:24 - INFO - __main__ - Step 12085: {'lr': 0.0004928615778599332, 'samples': 580080, 'steps': 12084, 'loss/train': 1.1538399457931519} +07/25/2024 12:22:24 - INFO - __main__ - Step 12086: {'lr': 0.0004928603296938762, 'samples': 580128, 'steps': 12085, 'loss/train': 0.07944858819246292} +07/25/2024 12:22:24 - INFO - __main__ - Step 12087: {'lr': 0.0004928590814202875, 'samples': 580176, 'steps': 12086, 'loss/train': 1.5589014291763306} +07/25/2024 12:22:24 - INFO - __main__ - Step 12088: {'lr': 0.0004928578330391676, 'samples': 580224, 'steps': 12087, 'loss/train': 2.023932933807373} +07/25/2024 12:22:25 - INFO - __main__ - Step 12089: {'lr': 0.0004928565845505169, 'samples': 580272, 'steps': 12088, 'loss/train': 1.6537946462631226} +07/25/2024 12:22:25 - INFO - __main__ - Step 12090: {'lr': 0.0004928553359543362, 'samples': 580320, 'steps': 12089, 'loss/train': 1.7494773864746094} +07/25/2024 12:22:25 - INFO - __main__ - Step 12091: {'lr': 0.0004928540872506259, 'samples': 580368, 'steps': 12090, 'loss/train': 2.3455746173858643} +07/25/2024 12:22:26 - INFO - __main__ - Step 12092: {'lr': 0.0004928528384393868, 'samples': 580416, 'steps': 12091, 'loss/train': 1.615975260734558} +07/25/2024 12:22:26 - INFO - __main__ - Step 12093: {'lr': 0.0004928515895206192, 'samples': 580464, 'steps': 12092, 'loss/train': 0.8967782855033875} +07/25/2024 12:22:26 - INFO - __main__ - Step 12094: {'lr': 0.0004928503404943237, 'samples': 580512, 'steps': 12093, 'loss/train': 1.973436951637268} +07/25/2024 12:22:26 - INFO - __main__ - Step 12095: {'lr': 0.0004928490913605008, 'samples': 580560, 'steps': 12094, 'loss/train': 1.5416702032089233} +07/25/2024 12:22:27 - INFO - __main__ - Step 12096: {'lr': 0.0004928478421191511, 'samples': 580608, 'steps': 12095, 'loss/train': 1.6996474266052246} +07/25/2024 12:22:27 - INFO - __main__ - Step 12097: {'lr': 0.0004928465927702754, 'samples': 580656, 'steps': 12096, 'loss/train': 1.6898102760314941} +07/25/2024 12:22:27 - INFO - __main__ - Step 12098: {'lr': 0.0004928453433138739, 'samples': 580704, 'steps': 12097, 'loss/train': 2.332120180130005} +07/25/2024 12:22:28 - INFO - __main__ - Step 12099: {'lr': 0.0004928440937499472, 'samples': 580752, 'steps': 12098, 'loss/train': 1.7451753616333008} +07/25/2024 12:22:28 - INFO - __main__ - Step 12100: {'lr': 0.000492842844078496, 'samples': 580800, 'steps': 12099, 'loss/train': 0.9135735630989075} +07/25/2024 12:22:28 - INFO - __main__ - Step 12101: {'lr': 0.0004928415942995208, 'samples': 580848, 'steps': 12100, 'loss/train': 1.6040239334106445} +07/25/2024 12:22:28 - INFO - __main__ - Step 12102: {'lr': 0.0004928403444130222, 'samples': 580896, 'steps': 12101, 'loss/train': 1.8484407663345337} +07/25/2024 12:22:29 - INFO - __main__ - Step 12103: {'lr': 0.0004928390944190006, 'samples': 580944, 'steps': 12102, 'loss/train': 1.4561008214950562} +07/25/2024 12:22:29 - INFO - __main__ - Step 12104: {'lr': 0.0004928378443174568, 'samples': 580992, 'steps': 12103, 'loss/train': 1.5545400381088257} +07/25/2024 12:22:29 - INFO - __main__ - Step 12105: {'lr': 0.0004928365941083911, 'samples': 581040, 'steps': 12104, 'loss/train': 1.9711248874664307} +07/25/2024 12:22:30 - INFO - __main__ - Step 12106: {'lr': 0.0004928353437918042, 'samples': 581088, 'steps': 12105, 'loss/train': 1.6225836277008057} +07/25/2024 12:22:30 - INFO - __main__ - Step 12107: {'lr': 0.0004928340933676965, 'samples': 581136, 'steps': 12106, 'loss/train': 1.6201047897338867} +07/25/2024 12:22:30 - INFO - __main__ - Step 12108: {'lr': 0.0004928328428360689, 'samples': 581184, 'steps': 12107, 'loss/train': 1.792565107345581} +07/25/2024 12:22:30 - INFO - __main__ - Step 12109: {'lr': 0.0004928315921969215, 'samples': 581232, 'steps': 12108, 'loss/train': 1.1683615446090698} +07/25/2024 12:22:31 - INFO - __main__ - Step 12110: {'lr': 0.0004928303414502552, 'samples': 581280, 'steps': 12109, 'loss/train': 0.059895116835832596} +07/25/2024 12:22:31 - INFO - __main__ - Step 12111: {'lr': 0.0004928290905960703, 'samples': 581328, 'steps': 12110, 'loss/train': 1.8078619241714478} +07/25/2024 12:22:31 - INFO - __main__ - Step 12112: {'lr': 0.0004928278396343676, 'samples': 581376, 'steps': 12111, 'loss/train': 2.1557774543762207} +07/25/2024 12:22:32 - INFO - __main__ - Step 12113: {'lr': 0.0004928265885651475, 'samples': 581424, 'steps': 12112, 'loss/train': 1.5271085500717163} +07/25/2024 12:22:32 - INFO - __main__ - Step 12114: {'lr': 0.0004928253373884106, 'samples': 581472, 'steps': 12113, 'loss/train': 2.077610731124878} +07/25/2024 12:22:32 - INFO - __main__ - Step 12115: {'lr': 0.0004928240861041574, 'samples': 581520, 'steps': 12114, 'loss/train': 1.311033844947815} +07/25/2024 12:22:32 - INFO - __main__ - Step 12116: {'lr': 0.0004928228347123886, 'samples': 581568, 'steps': 12115, 'loss/train': 1.9431926012039185} +07/25/2024 12:22:33 - INFO - __main__ - Step 12117: {'lr': 0.0004928215832131046, 'samples': 581616, 'steps': 12116, 'loss/train': 1.4564411640167236} +07/25/2024 12:22:33 - INFO - __main__ - Step 12118: {'lr': 0.000492820331606306, 'samples': 581664, 'steps': 12117, 'loss/train': 1.9687161445617676} +07/25/2024 12:22:33 - INFO - __main__ - Step 12119: {'lr': 0.0004928190798919933, 'samples': 581712, 'steps': 12118, 'loss/train': 1.5718790292739868} +07/25/2024 12:22:34 - INFO - __main__ - Step 12120: {'lr': 0.0004928178280701672, 'samples': 581760, 'steps': 12119, 'loss/train': 2.058629035949707} +07/25/2024 12:22:34 - INFO - __main__ - Step 12121: {'lr': 0.0004928165761408282, 'samples': 581808, 'steps': 12120, 'loss/train': 2.0637526512145996} +07/25/2024 12:22:34 - INFO - __main__ - Step 12122: {'lr': 0.0004928153241039768, 'samples': 581856, 'steps': 12121, 'loss/train': 2.2192494869232178} +07/25/2024 12:22:34 - INFO - __main__ - Step 12123: {'lr': 0.0004928140719596135, 'samples': 581904, 'steps': 12122, 'loss/train': 1.4781919717788696} +07/25/2024 12:22:35 - INFO - __main__ - Step 12124: {'lr': 0.000492812819707739, 'samples': 581952, 'steps': 12123, 'loss/train': 1.5990606546401978} +07/25/2024 12:22:35 - INFO - __main__ - Step 12125: {'lr': 0.0004928115673483537, 'samples': 582000, 'steps': 12124, 'loss/train': 2.3260140419006348} +07/25/2024 12:22:35 - INFO - __main__ - Step 12126: {'lr': 0.0004928103148814584, 'samples': 582048, 'steps': 12125, 'loss/train': 1.6933549642562866} +07/25/2024 12:22:36 - INFO - __main__ - Step 12127: {'lr': 0.0004928090623070534, 'samples': 582096, 'steps': 12126, 'loss/train': 2.3378922939300537} +07/25/2024 12:22:36 - INFO - __main__ - Step 12128: {'lr': 0.0004928078096251392, 'samples': 582144, 'steps': 12127, 'loss/train': 1.901605248451233} +07/25/2024 12:22:36 - INFO - __main__ - Step 12129: {'lr': 0.0004928065568357168, 'samples': 582192, 'steps': 12128, 'loss/train': 1.7494089603424072} +07/25/2024 12:22:36 - INFO - __main__ - Step 12130: {'lr': 0.0004928053039387863, 'samples': 582240, 'steps': 12129, 'loss/train': 1.594796895980835} +07/25/2024 12:22:37 - INFO - __main__ - Step 12131: {'lr': 0.0004928040509343484, 'samples': 582288, 'steps': 12130, 'loss/train': 1.8547747135162354} +07/25/2024 12:22:37 - INFO - __main__ - Step 12132: {'lr': 0.0004928027978224037, 'samples': 582336, 'steps': 12131, 'loss/train': 2.019284725189209} +07/25/2024 12:22:37 - INFO - __main__ - Step 12133: {'lr': 0.0004928015446029528, 'samples': 582384, 'steps': 12132, 'loss/train': 1.4699543714523315} +07/25/2024 12:22:37 - INFO - __main__ - Step 12134: {'lr': 0.000492800291275996, 'samples': 582432, 'steps': 12133, 'loss/train': 0.0794510543346405} +07/25/2024 12:22:38 - INFO - __main__ - Step 12135: {'lr': 0.0004927990378415342, 'samples': 582480, 'steps': 12134, 'loss/train': 1.3786427974700928} +07/25/2024 12:22:38 - INFO - __main__ - Step 12136: {'lr': 0.0004927977842995677, 'samples': 582528, 'steps': 12135, 'loss/train': 1.6454217433929443} +07/25/2024 12:22:38 - INFO - __main__ - Step 12137: {'lr': 0.0004927965306500972, 'samples': 582576, 'steps': 12136, 'loss/train': 1.393922209739685} +07/25/2024 12:22:39 - INFO - __main__ - Step 12138: {'lr': 0.0004927952768931231, 'samples': 582624, 'steps': 12137, 'loss/train': 1.8305389881134033} +07/25/2024 12:22:39 - INFO - __main__ - Step 12139: {'lr': 0.0004927940230286462, 'samples': 582672, 'steps': 12138, 'loss/train': 1.7649470567703247} +07/25/2024 12:22:39 - INFO - __main__ - Step 12140: {'lr': 0.0004927927690566668, 'samples': 582720, 'steps': 12139, 'loss/train': 1.7725210189819336} +07/25/2024 12:22:39 - INFO - __main__ - Step 12141: {'lr': 0.0004927915149771857, 'samples': 582768, 'steps': 12140, 'loss/train': 1.3278307914733887} +07/25/2024 12:22:40 - INFO - __main__ - Step 12142: {'lr': 0.0004927902607902031, 'samples': 582816, 'steps': 12141, 'loss/train': 2.170649290084839} +07/25/2024 12:22:40 - INFO - __main__ - Step 12143: {'lr': 0.00049278900649572, 'samples': 582864, 'steps': 12142, 'loss/train': 1.9557090997695923} +07/25/2024 12:22:40 - INFO - __main__ - Step 12144: {'lr': 0.0004927877520937366, 'samples': 582912, 'steps': 12143, 'loss/train': 1.9791274070739746} +07/25/2024 12:22:41 - INFO - __main__ - Step 12145: {'lr': 0.0004927864975842536, 'samples': 582960, 'steps': 12144, 'loss/train': 1.544486403465271} +07/25/2024 12:22:41 - INFO - __main__ - Step 12146: {'lr': 0.0004927852429672716, 'samples': 583008, 'steps': 12145, 'loss/train': 2.0539798736572266} +07/25/2024 12:22:41 - INFO - __main__ - Step 12147: {'lr': 0.000492783988242791, 'samples': 583056, 'steps': 12146, 'loss/train': 1.2011874914169312} +07/25/2024 12:22:41 - INFO - __main__ - Step 12148: {'lr': 0.0004927827334108124, 'samples': 583104, 'steps': 12147, 'loss/train': 1.5367066860198975} +07/25/2024 12:22:42 - INFO - __main__ - Step 12149: {'lr': 0.0004927814784713366, 'samples': 583152, 'steps': 12148, 'loss/train': 1.3989832401275635} +07/25/2024 12:22:42 - INFO - __main__ - Step 12150: {'lr': 0.0004927802234243639, 'samples': 583200, 'steps': 12149, 'loss/train': 1.5873115062713623} +07/25/2024 12:22:42 - INFO - __main__ - Step 12151: {'lr': 0.0004927789682698948, 'samples': 583248, 'steps': 12150, 'loss/train': 1.4373095035552979} +07/25/2024 12:22:43 - INFO - __main__ - Step 12152: {'lr': 0.0004927777130079302, 'samples': 583296, 'steps': 12151, 'loss/train': 1.6942459344863892} +07/25/2024 12:22:43 - INFO - __main__ - Step 12153: {'lr': 0.0004927764576384703, 'samples': 583344, 'steps': 12152, 'loss/train': 1.0752105712890625} +07/25/2024 12:22:43 - INFO - __main__ - Step 12154: {'lr': 0.0004927752021615158, 'samples': 583392, 'steps': 12153, 'loss/train': 1.777543306350708} +07/25/2024 12:22:43 - INFO - __main__ - Step 12155: {'lr': 0.0004927739465770672, 'samples': 583440, 'steps': 12154, 'loss/train': 1.8237295150756836} +07/25/2024 12:22:44 - INFO - __main__ - Step 12156: {'lr': 0.0004927726908851251, 'samples': 583488, 'steps': 12155, 'loss/train': 2.0794436931610107} +07/25/2024 12:22:44 - INFO - __main__ - Step 12157: {'lr': 0.0004927714350856902, 'samples': 583536, 'steps': 12156, 'loss/train': 2.5654520988464355} +07/25/2024 12:22:44 - INFO - __main__ - Step 12158: {'lr': 0.0004927701791787627, 'samples': 583584, 'steps': 12157, 'loss/train': 0.07472304254770279} +07/25/2024 12:22:45 - INFO - __main__ - Step 12159: {'lr': 0.0004927689231643436, 'samples': 583632, 'steps': 12158, 'loss/train': 1.0695770978927612} +07/25/2024 12:22:45 - INFO - __main__ - Step 12160: {'lr': 0.000492767667042433, 'samples': 583680, 'steps': 12159, 'loss/train': 1.8875950574874878} +07/25/2024 12:22:45 - INFO - __main__ - Step 12161: {'lr': 0.0004927664108130318, 'samples': 583728, 'steps': 12160, 'loss/train': 2.044020175933838} +07/25/2024 12:22:45 - INFO - __main__ - Step 12162: {'lr': 0.0004927651544761404, 'samples': 583776, 'steps': 12161, 'loss/train': 2.058715581893921} +07/25/2024 12:22:46 - INFO - __main__ - Step 12163: {'lr': 0.0004927638980317593, 'samples': 583824, 'steps': 12162, 'loss/train': 2.6023685932159424} +07/25/2024 12:22:46 - INFO - __main__ - Step 12164: {'lr': 0.0004927626414798893, 'samples': 583872, 'steps': 12163, 'loss/train': 2.2129428386688232} +07/25/2024 12:22:46 - INFO - __main__ - Step 12165: {'lr': 0.0004927613848205308, 'samples': 583920, 'steps': 12164, 'loss/train': 1.7075732946395874} +07/25/2024 12:22:47 - INFO - __main__ - Step 12166: {'lr': 0.0004927601280536843, 'samples': 583968, 'steps': 12165, 'loss/train': 1.6958653926849365} +07/25/2024 12:22:47 - INFO - __main__ - Step 12167: {'lr': 0.0004927588711793504, 'samples': 584016, 'steps': 12166, 'loss/train': 2.266434907913208} +07/25/2024 12:22:47 - INFO - __main__ - Step 12168: {'lr': 0.0004927576141975297, 'samples': 584064, 'steps': 12167, 'loss/train': 1.9056347608566284} +07/25/2024 12:22:47 - INFO - __main__ - Step 12169: {'lr': 0.0004927563571082228, 'samples': 584112, 'steps': 12168, 'loss/train': 1.7031521797180176} +07/25/2024 12:22:48 - INFO - __main__ - Step 12170: {'lr': 0.00049275509991143, 'samples': 584160, 'steps': 12169, 'loss/train': 2.0898385047912598} +07/25/2024 12:22:48 - INFO - __main__ - Step 12171: {'lr': 0.0004927538426071522, 'samples': 584208, 'steps': 12170, 'loss/train': 1.270167350769043} +07/25/2024 12:22:48 - INFO - __main__ - Step 12172: {'lr': 0.0004927525851953898, 'samples': 584256, 'steps': 12171, 'loss/train': 1.3701404333114624} +07/25/2024 12:22:49 - INFO - __main__ - Step 12173: {'lr': 0.0004927513276761433, 'samples': 584304, 'steps': 12172, 'loss/train': 0.8576317429542542} +07/25/2024 12:22:49 - INFO - __main__ - Step 12174: {'lr': 0.0004927500700494133, 'samples': 584352, 'steps': 12173, 'loss/train': 1.1606351137161255} +07/25/2024 12:22:49 - INFO - __main__ - Step 12175: {'lr': 0.0004927488123152004, 'samples': 584400, 'steps': 12174, 'loss/train': 2.072007179260254} +07/25/2024 12:22:49 - INFO - __main__ - Step 12176: {'lr': 0.0004927475544735052, 'samples': 584448, 'steps': 12175, 'loss/train': 1.855972170829773} +07/25/2024 12:22:50 - INFO - __main__ - Step 12177: {'lr': 0.0004927462965243281, 'samples': 584496, 'steps': 12176, 'loss/train': 1.8618971109390259} +07/25/2024 12:22:50 - INFO - __main__ - Step 12178: {'lr': 0.0004927450384676697, 'samples': 584544, 'steps': 12177, 'loss/train': 1.9821033477783203} +07/25/2024 12:22:50 - INFO - __main__ - Step 12179: {'lr': 0.0004927437803035307, 'samples': 584592, 'steps': 12178, 'loss/train': 1.7643628120422363} +07/25/2024 12:22:51 - INFO - __main__ - Step 12180: {'lr': 0.0004927425220319115, 'samples': 584640, 'steps': 12179, 'loss/train': 1.5183109045028687} +07/25/2024 12:22:51 - INFO - __main__ - Step 12181: {'lr': 0.0004927412636528127, 'samples': 584688, 'steps': 12180, 'loss/train': 2.6726553440093994} +07/25/2024 12:22:51 - INFO - __main__ - Step 12182: {'lr': 0.0004927400051662349, 'samples': 584736, 'steps': 12181, 'loss/train': 0.08414608985185623} +07/25/2024 12:22:51 - INFO - __main__ - Step 12183: {'lr': 0.0004927387465721787, 'samples': 584784, 'steps': 12182, 'loss/train': 1.4068593978881836} +07/25/2024 12:22:52 - INFO - __main__ - Step 12184: {'lr': 0.0004927374878706444, 'samples': 584832, 'steps': 12183, 'loss/train': 2.160015344619751} +07/25/2024 12:22:52 - INFO - __main__ - Step 12185: {'lr': 0.0004927362290616329, 'samples': 584880, 'steps': 12184, 'loss/train': 1.9027949571609497} +07/25/2024 12:22:52 - INFO - __main__ - Step 12186: {'lr': 0.0004927349701451445, 'samples': 584928, 'steps': 12185, 'loss/train': 1.8695118427276611} +07/25/2024 12:22:53 - INFO - __main__ - Step 12187: {'lr': 0.0004927337111211799, 'samples': 584976, 'steps': 12186, 'loss/train': 3.761157512664795} +07/25/2024 12:22:53 - INFO - __main__ - Step 12188: {'lr': 0.0004927324519897396, 'samples': 585024, 'steps': 12187, 'loss/train': 2.043102264404297} +07/25/2024 12:22:53 - INFO - __main__ - Step 12189: {'lr': 0.0004927311927508242, 'samples': 585072, 'steps': 12188, 'loss/train': 1.567436933517456} +07/25/2024 12:22:53 - INFO - __main__ - Step 12190: {'lr': 0.0004927299334044343, 'samples': 585120, 'steps': 12189, 'loss/train': 1.991171956062317} +07/25/2024 12:22:54 - INFO - __main__ - Step 12191: {'lr': 0.0004927286739505703, 'samples': 585168, 'steps': 12190, 'loss/train': 1.8400378227233887} +07/25/2024 12:22:54 - INFO - __main__ - Step 12192: {'lr': 0.0004927274143892329, 'samples': 585216, 'steps': 12191, 'loss/train': 1.8154696226119995} +07/25/2024 12:22:54 - INFO - __main__ - Step 12193: {'lr': 0.0004927261547204225, 'samples': 585264, 'steps': 12192, 'loss/train': 1.9571640491485596} +07/25/2024 12:22:55 - INFO - __main__ - Step 12194: {'lr': 0.0004927248949441398, 'samples': 585312, 'steps': 12193, 'loss/train': 2.0306875705718994} +07/25/2024 12:22:55 - INFO - __main__ - Step 12195: {'lr': 0.0004927236350603853, 'samples': 585360, 'steps': 12194, 'loss/train': 1.2244445085525513} +07/25/2024 12:22:55 - INFO - __main__ - Step 12196: {'lr': 0.0004927223750691597, 'samples': 585408, 'steps': 12195, 'loss/train': 1.3910515308380127} +07/25/2024 12:22:55 - INFO - __main__ - Step 12197: {'lr': 0.0004927211149704633, 'samples': 585456, 'steps': 12196, 'loss/train': 1.5229823589324951} +07/25/2024 12:22:56 - INFO - __main__ - Step 12198: {'lr': 0.0004927198547642969, 'samples': 585504, 'steps': 12197, 'loss/train': 1.5582343339920044} +07/25/2024 12:22:56 - INFO - __main__ - Step 12199: {'lr': 0.0004927185944506608, 'samples': 585552, 'steps': 12198, 'loss/train': 1.3360668420791626} +07/25/2024 12:22:56 - INFO - __main__ - Step 12200: {'lr': 0.0004927173340295558, 'samples': 585600, 'steps': 12199, 'loss/train': 1.7205443382263184} +07/25/2024 12:22:57 - INFO - __main__ - Step 12201: {'lr': 0.0004927160735009823, 'samples': 585648, 'steps': 12200, 'loss/train': 1.8372505903244019} +07/25/2024 12:22:57 - INFO - __main__ - Step 12202: {'lr': 0.000492714812864941, 'samples': 585696, 'steps': 12201, 'loss/train': 1.4033945798873901} +07/25/2024 12:22:57 - INFO - __main__ - Step 12203: {'lr': 0.0004927135521214323, 'samples': 585744, 'steps': 12202, 'loss/train': 2.125852346420288} +07/25/2024 12:22:57 - INFO - __main__ - Step 12204: {'lr': 0.0004927122912704569, 'samples': 585792, 'steps': 12203, 'loss/train': 2.0938210487365723} +07/25/2024 12:22:58 - INFO - __main__ - Step 12205: {'lr': 0.0004927110303120153, 'samples': 585840, 'steps': 12204, 'loss/train': 1.9687342643737793} +07/25/2024 12:22:58 - INFO - __main__ - Step 12206: {'lr': 0.0004927097692461081, 'samples': 585888, 'steps': 12205, 'loss/train': 0.10216565430164337} +07/25/2024 12:22:58 - INFO - __main__ - Step 12207: {'lr': 0.0004927085080727357, 'samples': 585936, 'steps': 12206, 'loss/train': 1.8019859790802002} +07/25/2024 12:22:59 - INFO - __main__ - Step 12208: {'lr': 0.0004927072467918989, 'samples': 585984, 'steps': 12207, 'loss/train': 1.4742352962493896} +07/25/2024 12:22:59 - INFO - __main__ - Step 12209: {'lr': 0.000492705985403598, 'samples': 586032, 'steps': 12208, 'loss/train': 1.3422770500183105} +07/25/2024 12:22:59 - INFO - __main__ - Step 12210: {'lr': 0.0004927047239078338, 'samples': 586080, 'steps': 12209, 'loss/train': 1.6294353008270264} +07/25/2024 12:22:59 - INFO - __main__ - Step 12211: {'lr': 0.0004927034623046067, 'samples': 586128, 'steps': 12210, 'loss/train': 4.395598411560059} +07/25/2024 12:23:00 - INFO - __main__ - Step 12212: {'lr': 0.0004927022005939174, 'samples': 586176, 'steps': 12211, 'loss/train': 2.2755074501037598} +07/25/2024 12:23:00 - INFO - __main__ - Step 12213: {'lr': 0.0004927009387757662, 'samples': 586224, 'steps': 12212, 'loss/train': 1.5126110315322876} +07/25/2024 12:23:00 - INFO - __main__ - Step 12214: {'lr': 0.0004926996768501539, 'samples': 586272, 'steps': 12213, 'loss/train': 1.998291254043579} +07/25/2024 12:23:01 - INFO - __main__ - Step 12215: {'lr': 0.000492698414817081, 'samples': 586320, 'steps': 12214, 'loss/train': 0.4161456525325775} +07/25/2024 12:23:01 - INFO - __main__ - Step 12216: {'lr': 0.000492697152676548, 'samples': 586368, 'steps': 12215, 'loss/train': 1.9843848943710327} +07/25/2024 12:23:01 - INFO - __main__ - Step 12217: {'lr': 0.0004926958904285556, 'samples': 586416, 'steps': 12216, 'loss/train': 2.124408721923828} +07/25/2024 12:23:01 - INFO - __main__ - Step 12218: {'lr': 0.0004926946280731042, 'samples': 586464, 'steps': 12217, 'loss/train': 2.461127996444702} +07/25/2024 12:23:02 - INFO - __main__ - Step 12219: {'lr': 0.0004926933656101943, 'samples': 586512, 'steps': 12218, 'loss/train': 1.2339391708374023} +07/25/2024 12:23:02 - INFO - __main__ - Step 12220: {'lr': 0.0004926921030398268, 'samples': 586560, 'steps': 12219, 'loss/train': 1.741431713104248} +07/25/2024 12:23:02 - INFO - __main__ - Step 12221: {'lr': 0.0004926908403620019, 'samples': 586608, 'steps': 12220, 'loss/train': 1.9658973217010498} +07/25/2024 12:23:02 - INFO - __main__ - Step 12222: {'lr': 0.0004926895775767203, 'samples': 586656, 'steps': 12221, 'loss/train': 1.7503856420516968} +07/25/2024 12:23:03 - INFO - __main__ - Step 12223: {'lr': 0.0004926883146839827, 'samples': 586704, 'steps': 12222, 'loss/train': 0.7913888096809387} +07/25/2024 12:23:03 - INFO - __main__ - Step 12224: {'lr': 0.0004926870516837893, 'samples': 586752, 'steps': 12223, 'loss/train': 1.8057940006256104} +07/25/2024 12:23:03 - INFO - __main__ - Step 12225: {'lr': 0.000492685788576141, 'samples': 586800, 'steps': 12224, 'loss/train': 1.58865225315094} +07/25/2024 12:23:04 - INFO - __main__ - Step 12226: {'lr': 0.0004926845253610382, 'samples': 586848, 'steps': 12225, 'loss/train': 2.1996021270751953} +07/25/2024 12:23:04 - INFO - __main__ - Step 12227: {'lr': 0.0004926832620384815, 'samples': 586896, 'steps': 12226, 'loss/train': 1.9388970136642456} +07/25/2024 12:23:04 - INFO - __main__ - Step 12228: {'lr': 0.0004926819986084714, 'samples': 586944, 'steps': 12227, 'loss/train': 1.8478679656982422} +07/25/2024 12:23:04 - INFO - __main__ - Step 12229: {'lr': 0.0004926807350710086, 'samples': 586992, 'steps': 12228, 'loss/train': 1.7678744792938232} +07/25/2024 12:23:05 - INFO - __main__ - Step 12230: {'lr': 0.0004926794714260935, 'samples': 587040, 'steps': 12229, 'loss/train': 0.09816408902406693} +07/25/2024 12:23:05 - INFO - __main__ - Step 12231: {'lr': 0.0004926782076737267, 'samples': 587088, 'steps': 12230, 'loss/train': 2.0625} +07/25/2024 12:23:05 - INFO - __main__ - Step 12232: {'lr': 0.0004926769438139089, 'samples': 587136, 'steps': 12231, 'loss/train': 1.224212646484375} +07/25/2024 12:23:06 - INFO - __main__ - Step 12233: {'lr': 0.0004926756798466405, 'samples': 587184, 'steps': 12232, 'loss/train': 1.5618804693222046} +07/25/2024 12:23:06 - INFO - __main__ - Step 12234: {'lr': 0.0004926744157719221, 'samples': 587232, 'steps': 12233, 'loss/train': 1.4222283363342285} +07/25/2024 12:23:06 - INFO - __main__ - Step 12235: {'lr': 0.0004926731515897542, 'samples': 587280, 'steps': 12234, 'loss/train': 2.922447443008423} +07/25/2024 12:23:06 - INFO - __main__ - Step 12236: {'lr': 0.0004926718873001375, 'samples': 587328, 'steps': 12235, 'loss/train': 2.019102096557617} +07/25/2024 12:23:07 - INFO - __main__ - Step 12237: {'lr': 0.0004926706229030725, 'samples': 587376, 'steps': 12236, 'loss/train': 1.5950037240982056} +07/25/2024 12:23:07 - INFO - __main__ - Step 12238: {'lr': 0.0004926693583985598, 'samples': 587424, 'steps': 12237, 'loss/train': 2.068247079849243} +07/25/2024 12:23:07 - INFO - __main__ - Step 12239: {'lr': 0.0004926680937865998, 'samples': 587472, 'steps': 12238, 'loss/train': 2.4229273796081543} +07/25/2024 12:23:08 - INFO - __main__ - Step 12240: {'lr': 0.0004926668290671932, 'samples': 587520, 'steps': 12239, 'loss/train': 1.56024968624115} +07/25/2024 12:23:08 - INFO - __main__ - Step 12241: {'lr': 0.0004926655642403406, 'samples': 587568, 'steps': 12240, 'loss/train': 2.403378963470459} +07/25/2024 12:23:08 - INFO - __main__ - Step 12242: {'lr': 0.0004926642993060424, 'samples': 587616, 'steps': 12241, 'loss/train': 2.2039787769317627} +07/25/2024 12:23:08 - INFO - __main__ - Step 12243: {'lr': 0.0004926630342642993, 'samples': 587664, 'steps': 12242, 'loss/train': 1.0586280822753906} +07/25/2024 12:23:09 - INFO - __main__ - Step 12244: {'lr': 0.0004926617691151117, 'samples': 587712, 'steps': 12243, 'loss/train': 1.9076972007751465} +07/25/2024 12:23:09 - INFO - __main__ - Step 12245: {'lr': 0.0004926605038584803, 'samples': 587760, 'steps': 12244, 'loss/train': 2.2183024883270264} +07/25/2024 12:23:09 - INFO - __main__ - Step 12246: {'lr': 0.0004926592384944058, 'samples': 587808, 'steps': 12245, 'loss/train': 1.734006404876709} +07/25/2024 12:23:10 - INFO - __main__ - Step 12247: {'lr': 0.0004926579730228884, 'samples': 587856, 'steps': 12246, 'loss/train': 0.7234412431716919} +07/25/2024 12:23:10 - INFO - __main__ - Step 12248: {'lr': 0.0004926567074439289, 'samples': 587904, 'steps': 12247, 'loss/train': 1.9368938207626343} +07/25/2024 12:23:10 - INFO - __main__ - Step 12249: {'lr': 0.0004926554417575278, 'samples': 587952, 'steps': 12248, 'loss/train': 1.7711645364761353} +07/25/2024 12:23:10 - INFO - __main__ - Step 12250: {'lr': 0.0004926541759636857, 'samples': 588000, 'steps': 12249, 'loss/train': 2.268993854522705} +07/25/2024 12:23:11 - INFO - __main__ - Step 12251: {'lr': 0.0004926529100624031, 'samples': 588048, 'steps': 12250, 'loss/train': 1.971667766571045} +07/25/2024 12:23:11 - INFO - __main__ - Step 12252: {'lr': 0.0004926516440536807, 'samples': 588096, 'steps': 12251, 'loss/train': 1.904115080833435} +07/25/2024 12:23:11 - INFO - __main__ - Step 12253: {'lr': 0.0004926503779375188, 'samples': 588144, 'steps': 12252, 'loss/train': 1.203878402709961} +07/25/2024 12:23:12 - INFO - __main__ - Step 12254: {'lr': 0.0004926491117139182, 'samples': 588192, 'steps': 12253, 'loss/train': 0.12185255438089371} +07/25/2024 12:23:12 - INFO - __main__ - Step 12255: {'lr': 0.0004926478453828793, 'samples': 588240, 'steps': 12254, 'loss/train': 2.2553138732910156} +07/25/2024 12:23:12 - INFO - __main__ - Step 12256: {'lr': 0.0004926465789444027, 'samples': 588288, 'steps': 12255, 'loss/train': 1.0227606296539307} +07/25/2024 12:23:12 - INFO - __main__ - Step 12257: {'lr': 0.0004926453123984892, 'samples': 588336, 'steps': 12256, 'loss/train': 1.416646122932434} +07/25/2024 12:23:13 - INFO - __main__ - Step 12258: {'lr': 0.000492644045745139, 'samples': 588384, 'steps': 12257, 'loss/train': 0.8162453770637512} +07/25/2024 12:23:13 - INFO - __main__ - Step 12259: {'lr': 0.0004926427789843529, 'samples': 588432, 'steps': 12258, 'loss/train': 3.2648215293884277} +07/25/2024 12:23:13 - INFO - __main__ - Step 12260: {'lr': 0.0004926415121161313, 'samples': 588480, 'steps': 12259, 'loss/train': 2.554549217224121} +07/25/2024 12:23:14 - INFO - __main__ - Step 12261: {'lr': 0.0004926402451404749, 'samples': 588528, 'steps': 12260, 'loss/train': 1.9173864126205444} +07/25/2024 12:23:14 - INFO - __main__ - Step 12262: {'lr': 0.0004926389780573841, 'samples': 588576, 'steps': 12261, 'loss/train': 2.2168638706207275} +07/25/2024 12:23:14 - INFO - __main__ - Step 12263: {'lr': 0.0004926377108668596, 'samples': 588624, 'steps': 12262, 'loss/train': 1.7362306118011475} +07/25/2024 12:23:14 - INFO - __main__ - Step 12264: {'lr': 0.000492636443568902, 'samples': 588672, 'steps': 12263, 'loss/train': 1.8360792398452759} +07/25/2024 12:23:15 - INFO - __main__ - Step 12265: {'lr': 0.0004926351761635117, 'samples': 588720, 'steps': 12264, 'loss/train': 2.1091465950012207} +07/25/2024 12:23:15 - INFO - __main__ - Step 12266: {'lr': 0.0004926339086506893, 'samples': 588768, 'steps': 12265, 'loss/train': 1.951075792312622} +07/25/2024 12:23:15 - INFO - __main__ - Step 12267: {'lr': 0.0004926326410304354, 'samples': 588816, 'steps': 12266, 'loss/train': 1.461837887763977} +07/25/2024 12:23:16 - INFO - __main__ - Step 12268: {'lr': 0.0004926313733027507, 'samples': 588864, 'steps': 12267, 'loss/train': 2.0051090717315674} +07/25/2024 12:23:16 - INFO - __main__ - Step 12269: {'lr': 0.0004926301054676356, 'samples': 588912, 'steps': 12268, 'loss/train': 1.5919257402420044} +07/25/2024 12:23:16 - INFO - __main__ - Step 12270: {'lr': 0.0004926288375250906, 'samples': 588960, 'steps': 12269, 'loss/train': 1.786339521408081} +07/25/2024 12:23:16 - INFO - __main__ - Step 12271: {'lr': 0.0004926275694751164, 'samples': 589008, 'steps': 12270, 'loss/train': 1.5175689458847046} +07/25/2024 12:23:17 - INFO - __main__ - Step 12272: {'lr': 0.0004926263013177134, 'samples': 589056, 'steps': 12271, 'loss/train': 1.8655201196670532} +07/25/2024 12:23:17 - INFO - __main__ - Step 12273: {'lr': 0.0004926250330528825, 'samples': 589104, 'steps': 12272, 'loss/train': 2.252983570098877} +07/25/2024 12:23:17 - INFO - __main__ - Step 12274: {'lr': 0.0004926237646806239, 'samples': 589152, 'steps': 12273, 'loss/train': 2.074112892150879} +07/25/2024 12:23:18 - INFO - __main__ - Step 12275: {'lr': 0.0004926224962009383, 'samples': 589200, 'steps': 12274, 'loss/train': 2.288104295730591} +07/25/2024 12:23:18 - INFO - __main__ - Step 12276: {'lr': 0.0004926212276138262, 'samples': 589248, 'steps': 12275, 'loss/train': 1.8766794204711914} +07/25/2024 12:23:18 - INFO - __main__ - Step 12277: {'lr': 0.0004926199589192884, 'samples': 589296, 'steps': 12276, 'loss/train': 1.5226577520370483} +07/25/2024 12:23:18 - INFO - __main__ - Step 12278: {'lr': 0.000492618690117325, 'samples': 589344, 'steps': 12277, 'loss/train': 0.10912559181451797} +07/25/2024 12:23:19 - INFO - __main__ - Step 12279: {'lr': 0.0004926174212079371, 'samples': 589392, 'steps': 12278, 'loss/train': 2.03157114982605} +07/25/2024 12:23:19 - INFO - __main__ - Step 12280: {'lr': 0.0004926161521911249, 'samples': 589440, 'steps': 12279, 'loss/train': 1.3594192266464233} +07/25/2024 12:23:19 - INFO - __main__ - Step 12281: {'lr': 0.0004926148830668891, 'samples': 589488, 'steps': 12280, 'loss/train': 1.9476687908172607} +07/25/2024 12:23:20 - INFO - __main__ - Step 12282: {'lr': 0.0004926136138352301, 'samples': 589536, 'steps': 12281, 'loss/train': 2.2375540733337402} +07/25/2024 12:23:20 - INFO - __main__ - Step 12283: {'lr': 0.0004926123444961487, 'samples': 589584, 'steps': 12282, 'loss/train': 2.782846212387085} +07/25/2024 12:23:20 - INFO - __main__ - Step 12284: {'lr': 0.0004926110750496453, 'samples': 589632, 'steps': 12283, 'loss/train': 1.6556676626205444} +07/25/2024 12:23:20 - INFO - __main__ - Step 12285: {'lr': 0.0004926098054957206, 'samples': 589680, 'steps': 12284, 'loss/train': 1.906531572341919} +07/25/2024 12:23:21 - INFO - __main__ - Step 12286: {'lr': 0.0004926085358343751, 'samples': 589728, 'steps': 12285, 'loss/train': 1.9540660381317139} +07/25/2024 12:23:21 - INFO - __main__ - Step 12287: {'lr': 0.0004926072660656092, 'samples': 589776, 'steps': 12286, 'loss/train': 1.9247839450836182} +07/25/2024 12:23:21 - INFO - __main__ - Step 12288: {'lr': 0.0004926059961894237, 'samples': 589824, 'steps': 12287, 'loss/train': 1.5006113052368164} +07/25/2024 12:23:22 - INFO - __main__ - Step 12289: {'lr': 0.000492604726205819, 'samples': 589872, 'steps': 12288, 'loss/train': 1.771702527999878} +07/25/2024 12:23:22 - INFO - __main__ - Step 12290: {'lr': 0.0004926034561147958, 'samples': 589920, 'steps': 12289, 'loss/train': 1.8145411014556885} +07/25/2024 12:23:22 - INFO - __main__ - Step 12291: {'lr': 0.0004926021859163545, 'samples': 589968, 'steps': 12290, 'loss/train': 2.0733802318573} +07/25/2024 12:23:22 - INFO - __main__ - Step 12292: {'lr': 0.0004926009156104958, 'samples': 590016, 'steps': 12291, 'loss/train': 1.2983492612838745} +07/25/2024 12:23:23 - INFO - __main__ - Step 12293: {'lr': 0.0004925996451972201, 'samples': 590064, 'steps': 12292, 'loss/train': 1.3639607429504395} +07/25/2024 12:23:23 - INFO - __main__ - Step 12294: {'lr': 0.0004925983746765282, 'samples': 590112, 'steps': 12293, 'loss/train': 1.68570077419281} +07/25/2024 12:23:23 - INFO - __main__ - Step 12295: {'lr': 0.0004925971040484206, 'samples': 590160, 'steps': 12294, 'loss/train': 1.6411327123641968} +07/25/2024 12:23:24 - INFO - __main__ - Step 12296: {'lr': 0.0004925958333128977, 'samples': 590208, 'steps': 12295, 'loss/train': 1.9788416624069214} +07/25/2024 12:23:24 - INFO - __main__ - Step 12297: {'lr': 0.0004925945624699601, 'samples': 590256, 'steps': 12296, 'loss/train': 2.07920241355896} +07/25/2024 12:23:24 - INFO - __main__ - Step 12298: {'lr': 0.0004925932915196086, 'samples': 590304, 'steps': 12297, 'loss/train': 2.2324905395507812} +07/25/2024 12:23:24 - INFO - __main__ - Step 12299: {'lr': 0.0004925920204618435, 'samples': 590352, 'steps': 12298, 'loss/train': 1.9552549123764038} +07/25/2024 12:23:25 - INFO - __main__ - Step 12300: {'lr': 0.0004925907492966654, 'samples': 590400, 'steps': 12299, 'loss/train': 1.9287608861923218} +07/25/2024 12:23:25 - INFO - __main__ - Step 12301: {'lr': 0.000492589478024075, 'samples': 590448, 'steps': 12300, 'loss/train': 1.8458000421524048} +07/25/2024 12:23:25 - INFO - __main__ - Step 12302: {'lr': 0.0004925882066440727, 'samples': 590496, 'steps': 12301, 'loss/train': 0.10556494444608688} +07/25/2024 12:23:26 - INFO - __main__ - Step 12303: {'lr': 0.0004925869351566592, 'samples': 590544, 'steps': 12302, 'loss/train': 1.9153541326522827} +07/25/2024 12:23:26 - INFO - __main__ - Step 12304: {'lr': 0.000492585663561835, 'samples': 590592, 'steps': 12303, 'loss/train': 1.6707684993743896} +07/25/2024 12:23:26 - INFO - __main__ - Step 12305: {'lr': 0.0004925843918596006, 'samples': 590640, 'steps': 12304, 'loss/train': 1.9022376537322998} +07/25/2024 12:23:26 - INFO - __main__ - Step 12306: {'lr': 0.0004925831200499566, 'samples': 590688, 'steps': 12305, 'loss/train': 2.1250345706939697} +07/25/2024 12:23:27 - INFO - __main__ - Step 12307: {'lr': 0.0004925818481329037, 'samples': 590736, 'steps': 12306, 'loss/train': 2.621692657470703} +07/25/2024 12:23:27 - INFO - __main__ - Step 12308: {'lr': 0.0004925805761084423, 'samples': 590784, 'steps': 12307, 'loss/train': 1.4931535720825195} +07/25/2024 12:23:27 - INFO - __main__ - Step 12309: {'lr': 0.000492579303976573, 'samples': 590832, 'steps': 12308, 'loss/train': 1.810801386833191} +07/25/2024 12:23:28 - INFO - __main__ - Step 12310: {'lr': 0.0004925780317372964, 'samples': 590880, 'steps': 12309, 'loss/train': 1.7595843076705933} +07/25/2024 12:23:28 - INFO - __main__ - Step 12311: {'lr': 0.0004925767593906131, 'samples': 590928, 'steps': 12310, 'loss/train': 1.9802451133728027} +07/25/2024 12:23:28 - INFO - __main__ - Step 12312: {'lr': 0.0004925754869365235, 'samples': 590976, 'steps': 12311, 'loss/train': 1.7772575616836548} +07/25/2024 12:23:28 - INFO - __main__ - Step 12313: {'lr': 0.0004925742143750283, 'samples': 591024, 'steps': 12312, 'loss/train': 2.091569423675537} +07/25/2024 12:23:29 - INFO - __main__ - Step 12314: {'lr': 0.0004925729417061282, 'samples': 591072, 'steps': 12313, 'loss/train': 2.401546001434326} +07/25/2024 12:23:29 - INFO - __main__ - Step 12315: {'lr': 0.0004925716689298234, 'samples': 591120, 'steps': 12314, 'loss/train': 0.9846076965332031} +07/25/2024 12:23:29 - INFO - __main__ - Step 12316: {'lr': 0.0004925703960461147, 'samples': 591168, 'steps': 12315, 'loss/train': 1.8076204061508179} +07/25/2024 12:23:30 - INFO - __main__ - Step 12317: {'lr': 0.0004925691230550027, 'samples': 591216, 'steps': 12316, 'loss/train': 1.4979219436645508} +07/25/2024 12:23:30 - INFO - __main__ - Step 12318: {'lr': 0.0004925678499564879, 'samples': 591264, 'steps': 12317, 'loss/train': 1.9748034477233887} +07/25/2024 12:23:30 - INFO - __main__ - Step 12319: {'lr': 0.0004925665767505708, 'samples': 591312, 'steps': 12318, 'loss/train': 1.7166693210601807} +07/25/2024 12:23:30 - INFO - __main__ - Step 12320: {'lr': 0.0004925653034372519, 'samples': 591360, 'steps': 12319, 'loss/train': 1.7103121280670166} +07/25/2024 12:23:31 - INFO - __main__ - Step 12321: {'lr': 0.000492564030016532, 'samples': 591408, 'steps': 12320, 'loss/train': 1.9272494316101074} +07/25/2024 12:23:31 - INFO - __main__ - Step 12322: {'lr': 0.0004925627564884116, 'samples': 591456, 'steps': 12321, 'loss/train': 2.091585159301758} +07/25/2024 12:23:31 - INFO - __main__ - Step 12323: {'lr': 0.0004925614828528911, 'samples': 591504, 'steps': 12322, 'loss/train': 1.4679383039474487} +07/25/2024 12:23:32 - INFO - __main__ - Step 12324: {'lr': 0.0004925602091099713, 'samples': 591552, 'steps': 12323, 'loss/train': 2.3439579010009766} +07/25/2024 12:23:32 - INFO - __main__ - Step 12325: {'lr': 0.0004925589352596525, 'samples': 591600, 'steps': 12324, 'loss/train': 2.2674756050109863} +07/25/2024 12:23:32 - INFO - __main__ - Step 12326: {'lr': 0.0004925576613019354, 'samples': 591648, 'steps': 12325, 'loss/train': 0.26501214504241943} +07/25/2024 12:23:32 - INFO - __main__ - Step 12327: {'lr': 0.0004925563872368206, 'samples': 591696, 'steps': 12326, 'loss/train': 1.8182158470153809} +07/25/2024 12:23:33 - INFO - __main__ - Step 12328: {'lr': 0.0004925551130643088, 'samples': 591744, 'steps': 12327, 'loss/train': 1.8723175525665283} +07/25/2024 12:23:33 - INFO - __main__ - Step 12329: {'lr': 0.0004925538387844002, 'samples': 591792, 'steps': 12328, 'loss/train': 1.4660422801971436} +07/25/2024 12:23:33 - INFO - __main__ - Step 12330: {'lr': 0.0004925525643970955, 'samples': 591840, 'steps': 12329, 'loss/train': 1.8651971817016602} +07/25/2024 12:23:34 - INFO - __main__ - Step 12331: {'lr': 0.0004925512899023955, 'samples': 591888, 'steps': 12330, 'loss/train': 2.5826170444488525} +07/25/2024 12:23:34 - INFO - __main__ - Step 12332: {'lr': 0.0004925500153003004, 'samples': 591936, 'steps': 12331, 'loss/train': 1.9151089191436768} +07/25/2024 12:23:34 - INFO - __main__ - Step 12333: {'lr': 0.0004925487405908111, 'samples': 591984, 'steps': 12332, 'loss/train': 1.6027415990829468} +07/25/2024 12:23:34 - INFO - __main__ - Step 12334: {'lr': 0.000492547465773928, 'samples': 592032, 'steps': 12333, 'loss/train': 1.9483217000961304} +07/25/2024 12:23:35 - INFO - __main__ - Step 12335: {'lr': 0.0004925461908496517, 'samples': 592080, 'steps': 12334, 'loss/train': 1.8775383234024048} +07/25/2024 12:23:35 - INFO - __main__ - Step 12336: {'lr': 0.0004925449158179826, 'samples': 592128, 'steps': 12335, 'loss/train': 1.7461447715759277} +07/25/2024 12:23:35 - INFO - __main__ - Step 12337: {'lr': 0.0004925436406789215, 'samples': 592176, 'steps': 12336, 'loss/train': 1.5737377405166626} +07/25/2024 12:23:36 - INFO - __main__ - Step 12338: {'lr': 0.0004925423654324689, 'samples': 592224, 'steps': 12337, 'loss/train': 2.2420828342437744} +07/25/2024 12:23:36 - INFO - __main__ - Step 12339: {'lr': 0.0004925410900786252, 'samples': 592272, 'steps': 12338, 'loss/train': 1.038503646850586} +07/25/2024 12:23:36 - INFO - __main__ - Step 12340: {'lr': 0.0004925398146173913, 'samples': 592320, 'steps': 12339, 'loss/train': 1.8917150497436523} +07/25/2024 12:23:36 - INFO - __main__ - Step 12341: {'lr': 0.0004925385390487675, 'samples': 592368, 'steps': 12340, 'loss/train': 1.5194493532180786} +07/25/2024 12:23:37 - INFO - __main__ - Step 12342: {'lr': 0.0004925372633727543, 'samples': 592416, 'steps': 12341, 'loss/train': 1.8433915376663208} +07/25/2024 12:23:37 - INFO - __main__ - Step 12343: {'lr': 0.0004925359875893525, 'samples': 592464, 'steps': 12342, 'loss/train': 1.9300779104232788} +07/25/2024 12:23:37 - INFO - __main__ - Step 12344: {'lr': 0.0004925347116985626, 'samples': 592512, 'steps': 12343, 'loss/train': 0.6808714270591736} +07/25/2024 12:23:37 - INFO - __main__ - Step 12345: {'lr': 0.000492533435700385, 'samples': 592560, 'steps': 12344, 'loss/train': 1.996457576751709} +07/25/2024 12:23:38 - INFO - __main__ - Step 12346: {'lr': 0.0004925321595948205, 'samples': 592608, 'steps': 12345, 'loss/train': 1.2848395109176636} +07/25/2024 12:23:38 - INFO - __main__ - Step 12347: {'lr': 0.0004925308833818695, 'samples': 592656, 'steps': 12346, 'loss/train': 1.8880447149276733} +07/25/2024 12:23:38 - INFO - __main__ - Step 12348: {'lr': 0.0004925296070615327, 'samples': 592704, 'steps': 12347, 'loss/train': 1.8073703050613403} +07/25/2024 12:23:39 - INFO - __main__ - Step 12349: {'lr': 0.0004925283306338105, 'samples': 592752, 'steps': 12348, 'loss/train': 1.5358117818832397} +07/25/2024 12:23:39 - INFO - __main__ - Step 12350: {'lr': 0.0004925270540987036, 'samples': 592800, 'steps': 12349, 'loss/train': 0.5864265561103821} +07/25/2024 12:23:39 - INFO - __main__ - Step 12351: {'lr': 0.0004925257774562125, 'samples': 592848, 'steps': 12350, 'loss/train': 1.7863445281982422} +07/25/2024 12:23:39 - INFO - __main__ - Step 12352: {'lr': 0.0004925245007063378, 'samples': 592896, 'steps': 12351, 'loss/train': 1.7255176305770874} +07/25/2024 12:23:40 - INFO - __main__ - Step 12353: {'lr': 0.00049252322384908, 'samples': 592944, 'steps': 12352, 'loss/train': 1.5286222696304321} +07/25/2024 12:23:40 - INFO - __main__ - Step 12354: {'lr': 0.0004925219468844398, 'samples': 592992, 'steps': 12353, 'loss/train': 1.5694913864135742} +07/25/2024 12:23:40 - INFO - __main__ - Step 12355: {'lr': 0.0004925206698124175, 'samples': 593040, 'steps': 12354, 'loss/train': 2.439483880996704} +07/25/2024 12:23:41 - INFO - __main__ - Step 12356: {'lr': 0.0004925193926330141, 'samples': 593088, 'steps': 12355, 'loss/train': 1.7179473638534546} +07/25/2024 12:23:41 - INFO - __main__ - Step 12357: {'lr': 0.0004925181153462297, 'samples': 593136, 'steps': 12356, 'loss/train': 1.661950945854187} +07/25/2024 12:23:41 - INFO - __main__ - Step 12358: {'lr': 0.0004925168379520652, 'samples': 593184, 'steps': 12357, 'loss/train': 2.47491455078125} +07/25/2024 12:23:41 - INFO - __main__ - Step 12359: {'lr': 0.000492515560450521, 'samples': 593232, 'steps': 12358, 'loss/train': 1.9131290912628174} +07/25/2024 12:23:42 - INFO - __main__ - Step 12360: {'lr': 0.0004925142828415976, 'samples': 593280, 'steps': 12359, 'loss/train': 1.6155120134353638} +07/25/2024 12:23:42 - INFO - __main__ - Step 12361: {'lr': 0.0004925130051252958, 'samples': 593328, 'steps': 12360, 'loss/train': 1.039768934249878} +07/25/2024 12:23:42 - INFO - __main__ - Step 12362: {'lr': 0.0004925117273016159, 'samples': 593376, 'steps': 12361, 'loss/train': 2.304748058319092} +07/25/2024 12:23:43 - INFO - __main__ - Step 12363: {'lr': 0.0004925104493705587, 'samples': 593424, 'steps': 12362, 'loss/train': 1.4066435098648071} +07/25/2024 12:23:43 - INFO - __main__ - Step 12364: {'lr': 0.0004925091713321247, 'samples': 593472, 'steps': 12363, 'loss/train': 1.9688211679458618} +07/25/2024 12:23:43 - INFO - __main__ - Step 12365: {'lr': 0.0004925078931863144, 'samples': 593520, 'steps': 12364, 'loss/train': 2.2839484214782715} +07/25/2024 12:23:43 - INFO - __main__ - Step 12366: {'lr': 0.0004925066149331284, 'samples': 593568, 'steps': 12365, 'loss/train': 1.6090492010116577} +07/25/2024 12:23:44 - INFO - __main__ - Step 12367: {'lr': 0.0004925053365725672, 'samples': 593616, 'steps': 12366, 'loss/train': 2.1828761100769043} +07/25/2024 12:23:44 - INFO - __main__ - Step 12368: {'lr': 0.0004925040581046315, 'samples': 593664, 'steps': 12367, 'loss/train': 1.505921721458435} +07/25/2024 12:23:44 - INFO - __main__ - Step 12369: {'lr': 0.0004925027795293218, 'samples': 593712, 'steps': 12368, 'loss/train': 2.017953634262085} +07/25/2024 12:23:45 - INFO - __main__ - Step 12370: {'lr': 0.0004925015008466387, 'samples': 593760, 'steps': 12369, 'loss/train': 1.8425313234329224} +07/25/2024 12:23:45 - INFO - __main__ - Step 12371: {'lr': 0.0004925002220565826, 'samples': 593808, 'steps': 12370, 'loss/train': 1.3717095851898193} +07/25/2024 12:23:45 - INFO - __main__ - Step 12372: {'lr': 0.0004924989431591544, 'samples': 593856, 'steps': 12371, 'loss/train': 1.6899374723434448} +07/25/2024 12:23:45 - INFO - __main__ - Step 12373: {'lr': 0.0004924976641543543, 'samples': 593904, 'steps': 12372, 'loss/train': 1.81346595287323} +07/25/2024 12:23:46 - INFO - __main__ - Step 12374: {'lr': 0.0004924963850421831, 'samples': 593952, 'steps': 12373, 'loss/train': 0.3640769124031067} +07/25/2024 12:23:46 - INFO - __main__ - Step 12375: {'lr': 0.0004924951058226413, 'samples': 594000, 'steps': 12374, 'loss/train': 1.8881185054779053} +07/25/2024 12:23:46 - INFO - __main__ - Step 12376: {'lr': 0.0004924938264957294, 'samples': 594048, 'steps': 12375, 'loss/train': 2.4787824153900146} +07/25/2024 12:23:47 - INFO - __main__ - Step 12377: {'lr': 0.000492492547061448, 'samples': 594096, 'steps': 12376, 'loss/train': 1.4706062078475952} +07/25/2024 12:23:47 - INFO - __main__ - Step 12378: {'lr': 0.0004924912675197978, 'samples': 594144, 'steps': 12377, 'loss/train': 2.245929479598999} +07/25/2024 12:23:47 - INFO - __main__ - Step 12379: {'lr': 0.0004924899878707791, 'samples': 594192, 'steps': 12378, 'loss/train': 2.6640336513519287} +07/25/2024 12:23:47 - INFO - __main__ - Step 12380: {'lr': 0.0004924887081143929, 'samples': 594240, 'steps': 12379, 'loss/train': 2.2688958644866943} +07/25/2024 12:23:48 - INFO - __main__ - Step 12381: {'lr': 0.0004924874282506393, 'samples': 594288, 'steps': 12380, 'loss/train': 1.6211349964141846} +07/25/2024 12:23:48 - INFO - __main__ - Step 12382: {'lr': 0.0004924861482795191, 'samples': 594336, 'steps': 12381, 'loss/train': 2.1496479511260986} +07/25/2024 12:23:48 - INFO - __main__ - Step 12383: {'lr': 0.0004924848682010327, 'samples': 594384, 'steps': 12382, 'loss/train': 1.4441808462142944} +07/25/2024 12:23:49 - INFO - __main__ - Step 12384: {'lr': 0.000492483588015181, 'samples': 594432, 'steps': 12383, 'loss/train': 1.6677606105804443} +07/25/2024 12:23:49 - INFO - __main__ - Step 12385: {'lr': 0.0004924823077219643, 'samples': 594480, 'steps': 12384, 'loss/train': 2.407195806503296} +07/25/2024 12:23:49 - INFO - __main__ - Step 12386: {'lr': 0.0004924810273213831, 'samples': 594528, 'steps': 12385, 'loss/train': 1.8440738916397095} +07/25/2024 12:23:49 - INFO - __main__ - Step 12387: {'lr': 0.0004924797468134383, 'samples': 594576, 'steps': 12386, 'loss/train': 1.3514912128448486} +07/25/2024 12:23:50 - INFO - __main__ - Step 12388: {'lr': 0.0004924784661981302, 'samples': 594624, 'steps': 12387, 'loss/train': 1.8247326612472534} +07/25/2024 12:23:50 - INFO - __main__ - Step 12389: {'lr': 0.0004924771854754594, 'samples': 594672, 'steps': 12388, 'loss/train': 2.425624132156372} +07/25/2024 12:23:50 - INFO - __main__ - Step 12390: {'lr': 0.0004924759046454264, 'samples': 594720, 'steps': 12389, 'loss/train': 1.8635953664779663} +07/25/2024 12:23:51 - INFO - __main__ - Step 12391: {'lr': 0.000492474623708032, 'samples': 594768, 'steps': 12390, 'loss/train': 1.8273708820343018} +07/25/2024 12:23:51 - INFO - __main__ - Step 12392: {'lr': 0.0004924733426632766, 'samples': 594816, 'steps': 12391, 'loss/train': 1.53538978099823} +07/25/2024 12:23:51 - INFO - __main__ - Step 12393: {'lr': 0.0004924720615111607, 'samples': 594864, 'steps': 12392, 'loss/train': 1.7391157150268555} +07/25/2024 12:23:51 - INFO - __main__ - Step 12394: {'lr': 0.0004924707802516852, 'samples': 594912, 'steps': 12393, 'loss/train': 1.8061192035675049} +07/25/2024 12:23:52 - INFO - __main__ - Step 12395: {'lr': 0.0004924694988848503, 'samples': 594960, 'steps': 12394, 'loss/train': 2.0260977745056152} +07/25/2024 12:23:52 - INFO - __main__ - Step 12396: {'lr': 0.0004924682174106566, 'samples': 595008, 'steps': 12395, 'loss/train': 1.465173602104187} +07/25/2024 12:23:52 - INFO - __main__ - Step 12397: {'lr': 0.0004924669358291049, 'samples': 595056, 'steps': 12396, 'loss/train': 1.7593414783477783} +07/25/2024 12:23:53 - INFO - __main__ - Step 12398: {'lr': 0.0004924656541401956, 'samples': 595104, 'steps': 12397, 'loss/train': 1.4658825397491455} +07/25/2024 12:23:53 - INFO - __main__ - Step 12399: {'lr': 0.0004924643723439292, 'samples': 595152, 'steps': 12398, 'loss/train': 1.856847882270813} +07/25/2024 12:23:53 - INFO - __main__ - Step 12400: {'lr': 0.0004924630904403065, 'samples': 595200, 'steps': 12399, 'loss/train': 3.1464481353759766} +07/25/2024 12:23:53 - INFO - __main__ - Step 12401: {'lr': 0.0004924618084293278, 'samples': 595248, 'steps': 12400, 'loss/train': 1.7398045063018799} +07/25/2024 12:23:54 - INFO - __main__ - Step 12402: {'lr': 0.0004924605263109939, 'samples': 595296, 'steps': 12401, 'loss/train': 2.602649450302124} +07/25/2024 12:23:54 - INFO - __main__ - Step 12403: {'lr': 0.0004924592440853052, 'samples': 595344, 'steps': 12402, 'loss/train': 2.7438571453094482} +07/25/2024 12:23:54 - INFO - __main__ - Step 12404: {'lr': 0.0004924579617522625, 'samples': 595392, 'steps': 12403, 'loss/train': 1.787399411201477} +07/25/2024 12:23:55 - INFO - __main__ - Step 12405: {'lr': 0.0004924566793118661, 'samples': 595440, 'steps': 12404, 'loss/train': 1.8059113025665283} +07/25/2024 12:23:55 - INFO - __main__ - Step 12406: {'lr': 0.0004924553967641167, 'samples': 595488, 'steps': 12405, 'loss/train': 2.259047746658325} +07/25/2024 12:23:55 - INFO - __main__ - Step 12407: {'lr': 0.0004924541141090149, 'samples': 595536, 'steps': 12406, 'loss/train': 1.4478288888931274} +07/25/2024 12:23:55 - INFO - __main__ - Step 12408: {'lr': 0.0004924528313465611, 'samples': 595584, 'steps': 12407, 'loss/train': 2.0966200828552246} +07/25/2024 12:23:56 - INFO - __main__ - Step 12409: {'lr': 0.0004924515484767561, 'samples': 595632, 'steps': 12408, 'loss/train': 2.5267434120178223} +07/25/2024 12:23:56 - INFO - __main__ - Step 12410: {'lr': 0.0004924502654996002, 'samples': 595680, 'steps': 12409, 'loss/train': 1.1131789684295654} +07/25/2024 12:23:56 - INFO - __main__ - Step 12411: {'lr': 0.0004924489824150943, 'samples': 595728, 'steps': 12410, 'loss/train': 1.0558396577835083} +07/25/2024 12:23:57 - INFO - __main__ - Step 12412: {'lr': 0.0004924476992232386, 'samples': 595776, 'steps': 12411, 'loss/train': 1.546656847000122} +07/25/2024 12:23:57 - INFO - __main__ - Step 12413: {'lr': 0.0004924464159240339, 'samples': 595824, 'steps': 12412, 'loss/train': 1.914962649345398} +07/25/2024 12:23:57 - INFO - __main__ - Step 12414: {'lr': 0.0004924451325174808, 'samples': 595872, 'steps': 12413, 'loss/train': 1.2720768451690674} +07/25/2024 12:23:57 - INFO - __main__ - Step 12415: {'lr': 0.0004924438490035797, 'samples': 595920, 'steps': 12414, 'loss/train': 2.293118715286255} +07/25/2024 12:23:58 - INFO - __main__ - Step 12416: {'lr': 0.0004924425653823314, 'samples': 595968, 'steps': 12415, 'loss/train': 1.5271658897399902} +07/25/2024 12:23:58 - INFO - __main__ - Step 12417: {'lr': 0.0004924412816537362, 'samples': 596016, 'steps': 12416, 'loss/train': 1.8424545526504517} +07/25/2024 12:23:58 - INFO - __main__ - Step 12418: {'lr': 0.0004924399978177948, 'samples': 596064, 'steps': 12417, 'loss/train': 1.6003596782684326} +07/25/2024 12:23:59 - INFO - __main__ - Step 12419: {'lr': 0.0004924387138745078, 'samples': 596112, 'steps': 12418, 'loss/train': 1.6579687595367432} +07/25/2024 12:23:59 - INFO - __main__ - Step 12420: {'lr': 0.0004924374298238757, 'samples': 596160, 'steps': 12419, 'loss/train': 1.702722430229187} +07/25/2024 12:23:59 - INFO - __main__ - Step 12421: {'lr': 0.0004924361456658991, 'samples': 596208, 'steps': 12420, 'loss/train': 0.9680401086807251} +07/25/2024 12:23:59 - INFO - __main__ - Step 12422: {'lr': 0.0004924348614005786, 'samples': 596256, 'steps': 12421, 'loss/train': 2.0883147716522217} +07/25/2024 12:24:00 - INFO - __main__ - Step 12423: {'lr': 0.0004924335770279147, 'samples': 596304, 'steps': 12422, 'loss/train': 1.678320050239563} +07/25/2024 12:24:00 - INFO - __main__ - Step 12424: {'lr': 0.000492432292547908, 'samples': 596352, 'steps': 12423, 'loss/train': 3.2050013542175293} +07/25/2024 12:24:00 - INFO - __main__ - Step 12425: {'lr': 0.0004924310079605591, 'samples': 596400, 'steps': 12424, 'loss/train': 1.7476013898849487} +07/25/2024 12:24:01 - INFO - __main__ - Step 12426: {'lr': 0.0004924297232658685, 'samples': 596448, 'steps': 12425, 'loss/train': 1.9591593742370605} +07/25/2024 12:24:01 - INFO - __main__ - Step 12427: {'lr': 0.0004924284384638368, 'samples': 596496, 'steps': 12426, 'loss/train': 2.476654291152954} +07/25/2024 12:24:01 - INFO - __main__ - Step 12428: {'lr': 0.0004924271535544646, 'samples': 596544, 'steps': 12427, 'loss/train': 1.76093327999115} +07/25/2024 12:24:01 - INFO - __main__ - Step 12429: {'lr': 0.0004924258685377525, 'samples': 596592, 'steps': 12428, 'loss/train': 1.7813085317611694} +07/25/2024 12:24:02 - INFO - __main__ - Step 12430: {'lr': 0.000492424583413701, 'samples': 596640, 'steps': 12429, 'loss/train': 2.264683485031128} +07/25/2024 12:24:02 - INFO - __main__ - Step 12431: {'lr': 0.0004924232981823107, 'samples': 596688, 'steps': 12430, 'loss/train': 0.35734856128692627} +07/25/2024 12:24:02 - INFO - __main__ - Step 12432: {'lr': 0.0004924220128435822, 'samples': 596736, 'steps': 12431, 'loss/train': 2.3552498817443848} +07/25/2024 12:24:03 - INFO - __main__ - Step 12433: {'lr': 0.0004924207273975158, 'samples': 596784, 'steps': 12432, 'loss/train': 1.6449757814407349} +07/25/2024 12:24:03 - INFO - __main__ - Step 12434: {'lr': 0.0004924194418441125, 'samples': 596832, 'steps': 12433, 'loss/train': 0.27230387926101685} +07/25/2024 12:24:03 - INFO - __main__ - Step 12435: {'lr': 0.0004924181561833727, 'samples': 596880, 'steps': 12434, 'loss/train': 0.9496411681175232} +07/25/2024 12:24:03 - INFO - __main__ - Step 12436: {'lr': 0.0004924168704152968, 'samples': 596928, 'steps': 12435, 'loss/train': 1.7889156341552734} +07/25/2024 12:24:04 - INFO - __main__ - Step 12437: {'lr': 0.0004924155845398855, 'samples': 596976, 'steps': 12436, 'loss/train': 1.4627840518951416} +07/25/2024 12:24:04 - INFO - __main__ - Step 12438: {'lr': 0.0004924142985571395, 'samples': 597024, 'steps': 12437, 'loss/train': 1.5375174283981323} +07/25/2024 12:24:04 - INFO - __main__ - Step 12439: {'lr': 0.0004924130124670592, 'samples': 597072, 'steps': 12438, 'loss/train': 2.0727603435516357} +07/25/2024 12:24:04 - INFO - __main__ - Step 12440: {'lr': 0.0004924117262696451, 'samples': 597120, 'steps': 12439, 'loss/train': 2.0193123817443848} +07/25/2024 12:24:05 - INFO - __main__ - Step 12441: {'lr': 0.0004924104399648979, 'samples': 597168, 'steps': 12440, 'loss/train': 2.0910916328430176} +07/25/2024 12:24:05 - INFO - __main__ - Step 12442: {'lr': 0.0004924091535528183, 'samples': 597216, 'steps': 12441, 'loss/train': 2.9614100456237793} +07/25/2024 12:24:05 - INFO - __main__ - Step 12443: {'lr': 0.0004924078670334066, 'samples': 597264, 'steps': 12442, 'loss/train': 2.2590317726135254} +07/25/2024 12:24:06 - INFO - __main__ - Step 12444: {'lr': 0.0004924065804066635, 'samples': 597312, 'steps': 12443, 'loss/train': 1.9033740758895874} +07/25/2024 12:24:06 - INFO - __main__ - Step 12445: {'lr': 0.0004924052936725896, 'samples': 597360, 'steps': 12444, 'loss/train': 0.1933443397283554} +07/25/2024 12:24:06 - INFO - __main__ - Step 12446: {'lr': 0.0004924040068311855, 'samples': 597408, 'steps': 12445, 'loss/train': 1.8216873407363892} +07/25/2024 12:24:06 - INFO - __main__ - Step 12447: {'lr': 0.0004924027198824515, 'samples': 597456, 'steps': 12446, 'loss/train': 1.757176399230957} +07/25/2024 12:24:07 - INFO - __main__ - Step 12448: {'lr': 0.0004924014328263885, 'samples': 597504, 'steps': 12447, 'loss/train': 3.2363507747650146} +07/25/2024 12:24:07 - INFO - __main__ - Step 12449: {'lr': 0.0004924001456629969, 'samples': 597552, 'steps': 12448, 'loss/train': 2.1913001537323} +07/25/2024 12:24:07 - INFO - __main__ - Step 12450: {'lr': 0.0004923988583922773, 'samples': 597600, 'steps': 12449, 'loss/train': 2.332041025161743} +07/25/2024 12:24:08 - INFO - __main__ - Step 12451: {'lr': 0.0004923975710142303, 'samples': 597648, 'steps': 12450, 'loss/train': 1.8912477493286133} +07/25/2024 12:24:08 - INFO - __main__ - Step 12452: {'lr': 0.0004923962835288564, 'samples': 597696, 'steps': 12451, 'loss/train': 1.5403672456741333} +07/25/2024 12:24:08 - INFO - __main__ - Step 12453: {'lr': 0.0004923949959361562, 'samples': 597744, 'steps': 12452, 'loss/train': 2.284024477005005} +07/25/2024 12:24:08 - INFO - __main__ - Step 12454: {'lr': 0.0004923937082361304, 'samples': 597792, 'steps': 12453, 'loss/train': 1.8900258541107178} +07/25/2024 12:24:09 - INFO - __main__ - Step 12455: {'lr': 0.0004923924204287794, 'samples': 597840, 'steps': 12454, 'loss/train': 1.3897004127502441} +07/25/2024 12:24:09 - INFO - __main__ - Step 12456: {'lr': 0.0004923911325141039, 'samples': 597888, 'steps': 12455, 'loss/train': 3.3662867546081543} +07/25/2024 12:24:09 - INFO - __main__ - Step 12457: {'lr': 0.0004923898444921043, 'samples': 597936, 'steps': 12456, 'loss/train': 1.9021341800689697} +07/25/2024 12:24:10 - INFO - __main__ - Step 12458: {'lr': 0.0004923885563627812, 'samples': 597984, 'steps': 12457, 'loss/train': 1.2895945310592651} +07/25/2024 12:24:10 - INFO - __main__ - Step 12459: {'lr': 0.0004923872681261353, 'samples': 598032, 'steps': 12458, 'loss/train': 0.9785230159759521} +07/25/2024 12:24:10 - INFO - __main__ - Step 12460: {'lr': 0.0004923859797821672, 'samples': 598080, 'steps': 12459, 'loss/train': 1.7403947114944458} +07/25/2024 12:24:10 - INFO - __main__ - Step 12461: {'lr': 0.0004923846913308773, 'samples': 598128, 'steps': 12460, 'loss/train': 0.675112783908844} +07/25/2024 12:24:11 - INFO - __main__ - Step 12462: {'lr': 0.0004923834027722663, 'samples': 598176, 'steps': 12461, 'loss/train': 1.950676441192627} +07/25/2024 12:24:11 - INFO - __main__ - Step 12463: {'lr': 0.0004923821141063347, 'samples': 598224, 'steps': 12462, 'loss/train': 2.0788779258728027} +07/25/2024 12:24:11 - INFO - __main__ - Step 12464: {'lr': 0.0004923808253330831, 'samples': 598272, 'steps': 12463, 'loss/train': 1.4533107280731201} +07/25/2024 12:24:12 - INFO - __main__ - Step 12465: {'lr': 0.000492379536452512, 'samples': 598320, 'steps': 12464, 'loss/train': 1.954600214958191} +07/25/2024 12:24:12 - INFO - __main__ - Step 12466: {'lr': 0.0004923782474646222, 'samples': 598368, 'steps': 12465, 'loss/train': 2.612818479537964} +07/25/2024 12:24:12 - INFO - __main__ - Step 12467: {'lr': 0.0004923769583694139, 'samples': 598416, 'steps': 12466, 'loss/train': 2.16473650932312} +07/25/2024 12:24:12 - INFO - __main__ - Step 12468: {'lr': 0.0004923756691668879, 'samples': 598464, 'steps': 12467, 'loss/train': 2.0726158618927} +07/25/2024 12:24:13 - INFO - __main__ - Step 12469: {'lr': 0.0004923743798570448, 'samples': 598512, 'steps': 12468, 'loss/train': 0.11973314732313156} +07/25/2024 12:24:13 - INFO - __main__ - Step 12470: {'lr': 0.0004923730904398851, 'samples': 598560, 'steps': 12469, 'loss/train': 1.7729531526565552} +07/25/2024 12:24:13 - INFO - __main__ - Step 12471: {'lr': 0.0004923718009154094, 'samples': 598608, 'steps': 12470, 'loss/train': 1.4802347421646118} +07/25/2024 12:24:14 - INFO - __main__ - Step 12472: {'lr': 0.0004923705112836183, 'samples': 598656, 'steps': 12471, 'loss/train': 3.4918320178985596} +07/25/2024 12:24:14 - INFO - __main__ - Step 12473: {'lr': 0.0004923692215445122, 'samples': 598704, 'steps': 12472, 'loss/train': 2.372755289077759} +07/25/2024 12:24:14 - INFO - __main__ - Step 12474: {'lr': 0.000492367931698092, 'samples': 598752, 'steps': 12473, 'loss/train': 1.59389066696167} +07/25/2024 12:24:14 - INFO - __main__ - Step 12475: {'lr': 0.000492366641744358, 'samples': 598800, 'steps': 12474, 'loss/train': 1.9756577014923096} +07/25/2024 12:24:15 - INFO - __main__ - Step 12476: {'lr': 0.0004923653516833108, 'samples': 598848, 'steps': 12475, 'loss/train': 2.086049795150757} +07/25/2024 12:24:15 - INFO - __main__ - Step 12477: {'lr': 0.0004923640615149509, 'samples': 598896, 'steps': 12476, 'loss/train': 2.058983325958252} +07/25/2024 12:24:15 - INFO - __main__ - Step 12478: {'lr': 0.0004923627712392791, 'samples': 598944, 'steps': 12477, 'loss/train': 2.0464534759521484} +07/25/2024 12:24:16 - INFO - __main__ - Step 12479: {'lr': 0.0004923614808562959, 'samples': 598992, 'steps': 12478, 'loss/train': 1.4649631977081299} +07/25/2024 12:24:16 - INFO - __main__ - Step 12480: {'lr': 0.0004923601903660018, 'samples': 599040, 'steps': 12479, 'loss/train': 3.1312243938446045} +07/25/2024 12:24:16 - INFO - __main__ - Step 12481: {'lr': 0.0004923588997683973, 'samples': 599088, 'steps': 12480, 'loss/train': 1.5938012599945068} +07/25/2024 12:24:16 - INFO - __main__ - Step 12482: {'lr': 0.0004923576090634831, 'samples': 599136, 'steps': 12481, 'loss/train': 1.1511800289154053} +07/25/2024 12:24:17 - INFO - __main__ - Step 12483: {'lr': 0.00049235631825126, 'samples': 599184, 'steps': 12482, 'loss/train': 0.913587212562561} +07/25/2024 12:24:17 - INFO - __main__ - Step 12484: {'lr': 0.000492355027331728, 'samples': 599232, 'steps': 12483, 'loss/train': 1.5740586519241333} +07/25/2024 12:24:17 - INFO - __main__ - Step 12485: {'lr': 0.0004923537363048881, 'samples': 599280, 'steps': 12484, 'loss/train': 1.7247412204742432} +07/25/2024 12:24:18 - INFO - __main__ - Step 12486: {'lr': 0.0004923524451707407, 'samples': 599328, 'steps': 12485, 'loss/train': 1.6805764436721802} +07/25/2024 12:24:18 - INFO - __main__ - Step 12487: {'lr': 0.0004923511539292866, 'samples': 599376, 'steps': 12486, 'loss/train': 2.2902886867523193} +07/25/2024 12:24:18 - INFO - __main__ - Step 12488: {'lr': 0.000492349862580526, 'samples': 599424, 'steps': 12487, 'loss/train': 1.6943730115890503} +07/25/2024 12:24:18 - INFO - __main__ - Step 12489: {'lr': 0.0004923485711244598, 'samples': 599472, 'steps': 12488, 'loss/train': 2.071601629257202} +07/25/2024 12:24:19 - INFO - __main__ - Step 12490: {'lr': 0.0004923472795610883, 'samples': 599520, 'steps': 12489, 'loss/train': 2.105828285217285} +07/25/2024 12:24:19 - INFO - __main__ - Step 12491: {'lr': 0.0004923459878904123, 'samples': 599568, 'steps': 12490, 'loss/train': 1.550352931022644} +07/25/2024 12:24:19 - INFO - __main__ - Step 12492: {'lr': 0.0004923446961124324, 'samples': 599616, 'steps': 12491, 'loss/train': 1.7469409704208374} +07/25/2024 12:24:20 - INFO - __main__ - Step 12493: {'lr': 0.0004923434042271488, 'samples': 599664, 'steps': 12492, 'loss/train': 0.11704956740140915} +07/25/2024 12:24:20 - INFO - __main__ - Step 12494: {'lr': 0.0004923421122345626, 'samples': 599712, 'steps': 12493, 'loss/train': 1.379903793334961} +07/25/2024 12:24:20 - INFO - __main__ - Step 12495: {'lr': 0.000492340820134674, 'samples': 599760, 'steps': 12494, 'loss/train': 1.1049953699111938} +07/25/2024 12:24:20 - INFO - __main__ - Step 12496: {'lr': 0.0004923395279274836, 'samples': 599808, 'steps': 12495, 'loss/train': 2.9752233028411865} +07/25/2024 12:24:21 - INFO - __main__ - Step 12497: {'lr': 0.000492338235612992, 'samples': 599856, 'steps': 12496, 'loss/train': 1.6026917695999146} +07/25/2024 12:24:21 - INFO - __main__ - Step 12498: {'lr': 0.0004923369431912001, 'samples': 599904, 'steps': 12497, 'loss/train': 1.912791132926941} +07/25/2024 12:24:21 - INFO - __main__ - Step 12499: {'lr': 0.000492335650662108, 'samples': 599952, 'steps': 12498, 'loss/train': 1.6824636459350586} +07/25/2024 12:24:22 - INFO - __main__ - Step 12500: {'lr': 0.0004923343580257164, 'samples': 600000, 'steps': 12499, 'loss/train': 1.3657900094985962} +07/25/2024 12:24:22 - INFO - __main__ - Step 12501: {'lr': 0.0004923330652820261, 'samples': 600048, 'steps': 12500, 'loss/train': 2.259812593460083} +07/25/2024 12:24:22 - INFO - __main__ - Step 12502: {'lr': 0.0004923317724310374, 'samples': 600096, 'steps': 12501, 'loss/train': 1.7835806608200073} +07/25/2024 12:24:22 - INFO - __main__ - Step 12503: {'lr': 0.0004923304794727509, 'samples': 600144, 'steps': 12502, 'loss/train': 1.4805322885513306} +07/25/2024 12:24:23 - INFO - __main__ - Step 12504: {'lr': 0.0004923291864071674, 'samples': 600192, 'steps': 12503, 'loss/train': 3.67732572555542} +07/25/2024 12:24:23 - INFO - __main__ - Step 12505: {'lr': 0.0004923278932342873, 'samples': 600240, 'steps': 12504, 'loss/train': 1.4406884908676147} +07/25/2024 12:24:23 - INFO - __main__ - Step 12506: {'lr': 0.0004923265999541112, 'samples': 600288, 'steps': 12505, 'loss/train': 2.501952886581421} +07/25/2024 12:24:24 - INFO - __main__ - Step 12507: {'lr': 0.0004923253065666396, 'samples': 600336, 'steps': 12506, 'loss/train': 0.9626271724700928} +07/25/2024 12:24:24 - INFO - __main__ - Step 12508: {'lr': 0.0004923240130718733, 'samples': 600384, 'steps': 12507, 'loss/train': 1.8813104629516602} +07/25/2024 12:24:24 - INFO - __main__ - Step 12509: {'lr': 0.0004923227194698125, 'samples': 600432, 'steps': 12508, 'loss/train': 2.1929469108581543} +07/25/2024 12:24:24 - INFO - __main__ - Step 12510: {'lr': 0.0004923214257604582, 'samples': 600480, 'steps': 12509, 'loss/train': 1.9391251802444458} +07/25/2024 12:24:25 - INFO - __main__ - Step 12511: {'lr': 0.0004923201319438107, 'samples': 600528, 'steps': 12510, 'loss/train': 2.174124002456665} +07/25/2024 12:24:25 - INFO - __main__ - Step 12512: {'lr': 0.0004923188380198706, 'samples': 600576, 'steps': 12511, 'loss/train': 1.6473159790039062} +07/25/2024 12:24:25 - INFO - __main__ - Step 12513: {'lr': 0.0004923175439886385, 'samples': 600624, 'steps': 12512, 'loss/train': 2.601806879043579} +07/25/2024 12:24:26 - INFO - __main__ - Step 12514: {'lr': 0.000492316249850115, 'samples': 600672, 'steps': 12513, 'loss/train': 2.1308488845825195} +07/25/2024 12:24:26 - INFO - __main__ - Step 12515: {'lr': 0.0004923149556043006, 'samples': 600720, 'steps': 12514, 'loss/train': 1.623557448387146} +07/25/2024 12:24:26 - INFO - __main__ - Step 12516: {'lr': 0.000492313661251196, 'samples': 600768, 'steps': 12515, 'loss/train': 1.2778651714324951} +07/25/2024 12:24:26 - INFO - __main__ - Step 12517: {'lr': 0.0004923123667908017, 'samples': 600816, 'steps': 12516, 'loss/train': 0.1274484097957611} +07/25/2024 12:24:27 - INFO - __main__ - Step 12518: {'lr': 0.0004923110722231183, 'samples': 600864, 'steps': 12517, 'loss/train': 1.40715754032135} +07/25/2024 12:24:27 - INFO - __main__ - Step 12519: {'lr': 0.0004923097775481462, 'samples': 600912, 'steps': 12518, 'loss/train': 1.2097034454345703} +07/25/2024 12:24:27 - INFO - __main__ - Step 12520: {'lr': 0.0004923084827658863, 'samples': 600960, 'steps': 12519, 'loss/train': 3.1308650970458984} +07/25/2024 12:24:28 - INFO - __main__ - Step 12521: {'lr': 0.0004923071878763388, 'samples': 601008, 'steps': 12520, 'loss/train': 1.7201581001281738} +07/25/2024 12:24:28 - INFO - __main__ - Step 12522: {'lr': 0.0004923058928795047, 'samples': 601056, 'steps': 12521, 'loss/train': 2.351778030395508} +07/25/2024 12:24:28 - INFO - __main__ - Step 12523: {'lr': 0.0004923045977753841, 'samples': 601104, 'steps': 12522, 'loss/train': 0.5779992938041687} +07/25/2024 12:24:28 - INFO - __main__ - Step 12524: {'lr': 0.0004923033025639779, 'samples': 601152, 'steps': 12523, 'loss/train': 2.250622034072876} +07/25/2024 12:24:29 - INFO - __main__ - Step 12525: {'lr': 0.0004923020072452866, 'samples': 601200, 'steps': 12524, 'loss/train': 1.7657861709594727} +07/25/2024 12:24:29 - INFO - __main__ - Step 12526: {'lr': 0.0004923007118193108, 'samples': 601248, 'steps': 12525, 'loss/train': 1.6999146938323975} +07/25/2024 12:24:29 - INFO - __main__ - Step 12527: {'lr': 0.0004922994162860511, 'samples': 601296, 'steps': 12526, 'loss/train': 1.7818610668182373} +07/25/2024 12:24:29 - INFO - __main__ - Step 12528: {'lr': 0.0004922981206455079, 'samples': 601344, 'steps': 12527, 'loss/train': 2.9488015174865723} +07/25/2024 12:24:30 - INFO - __main__ - Step 12529: {'lr': 0.0004922968248976819, 'samples': 601392, 'steps': 12528, 'loss/train': 1.5941356420516968} +07/25/2024 12:24:30 - INFO - __main__ - Step 12530: {'lr': 0.0004922955290425737, 'samples': 601440, 'steps': 12529, 'loss/train': 0.8575345277786255} +07/25/2024 12:24:30 - INFO - __main__ - Step 12531: {'lr': 0.0004922942330801837, 'samples': 601488, 'steps': 12530, 'loss/train': 0.9497289061546326} +07/25/2024 12:24:31 - INFO - __main__ - Step 12532: {'lr': 0.0004922929370105127, 'samples': 601536, 'steps': 12531, 'loss/train': 2.033578395843506} +07/25/2024 12:24:31 - INFO - __main__ - Step 12533: {'lr': 0.0004922916408335612, 'samples': 601584, 'steps': 12532, 'loss/train': 1.238232135772705} +07/25/2024 12:24:31 - INFO - __main__ - Step 12534: {'lr': 0.0004922903445493297, 'samples': 601632, 'steps': 12533, 'loss/train': 1.913521409034729} +07/25/2024 12:24:31 - INFO - __main__ - Step 12535: {'lr': 0.0004922890481578188, 'samples': 601680, 'steps': 12534, 'loss/train': 2.1382572650909424} +07/25/2024 12:24:32 - INFO - __main__ - Step 12536: {'lr': 0.0004922877516590292, 'samples': 601728, 'steps': 12535, 'loss/train': 1.595931887626648} +07/25/2024 12:24:32 - INFO - __main__ - Step 12537: {'lr': 0.0004922864550529612, 'samples': 601776, 'steps': 12536, 'loss/train': 1.508347511291504} +07/25/2024 12:24:32 - INFO - __main__ - Step 12538: {'lr': 0.0004922851583396157, 'samples': 601824, 'steps': 12537, 'loss/train': 1.7535374164581299} +07/25/2024 12:24:33 - INFO - __main__ - Step 12539: {'lr': 0.0004922838615189931, 'samples': 601872, 'steps': 12538, 'loss/train': 1.794806957244873} +07/25/2024 12:24:33 - INFO - __main__ - Step 12540: {'lr': 0.000492282564591094, 'samples': 601920, 'steps': 12539, 'loss/train': 2.4763007164001465} +07/25/2024 12:24:33 - INFO - __main__ - Step 12541: {'lr': 0.000492281267555919, 'samples': 601968, 'steps': 12540, 'loss/train': 0.1428692638874054} +07/25/2024 12:24:33 - INFO - __main__ - Step 12542: {'lr': 0.0004922799704134685, 'samples': 602016, 'steps': 12541, 'loss/train': 1.626413345336914} +07/25/2024 12:24:34 - INFO - __main__ - Step 12543: {'lr': 0.0004922786731637434, 'samples': 602064, 'steps': 12542, 'loss/train': 0.9801007509231567} +07/25/2024 12:24:34 - INFO - __main__ - Step 12544: {'lr': 0.0004922773758067439, 'samples': 602112, 'steps': 12543, 'loss/train': 3.1579647064208984} +07/25/2024 12:24:34 - INFO - __main__ - Step 12545: {'lr': 0.0004922760783424709, 'samples': 602160, 'steps': 12544, 'loss/train': 1.8559348583221436} +07/25/2024 12:24:35 - INFO - __main__ - Step 12546: {'lr': 0.0004922747807709247, 'samples': 602208, 'steps': 12545, 'loss/train': 2.2383408546447754} +07/25/2024 12:24:35 - INFO - __main__ - Step 12547: {'lr': 0.0004922734830921061, 'samples': 602256, 'steps': 12546, 'loss/train': 1.8039623498916626} +07/25/2024 12:24:35 - INFO - __main__ - Step 12548: {'lr': 0.0004922721853060157, 'samples': 602304, 'steps': 12547, 'loss/train': 1.631387710571289} +07/25/2024 12:24:35 - INFO - __main__ - Step 12549: {'lr': 0.0004922708874126538, 'samples': 602352, 'steps': 12548, 'loss/train': 1.564738392829895} +07/25/2024 12:24:36 - INFO - __main__ - Step 12550: {'lr': 0.0004922695894120213, 'samples': 602400, 'steps': 12549, 'loss/train': 1.957193374633789} +07/25/2024 12:24:36 - INFO - __main__ - Step 12551: {'lr': 0.0004922682913041184, 'samples': 602448, 'steps': 12550, 'loss/train': 1.5362396240234375} +07/25/2024 12:24:36 - INFO - __main__ - Step 12552: {'lr': 0.000492266993088946, 'samples': 602496, 'steps': 12551, 'loss/train': 2.5094943046569824} +07/25/2024 12:24:37 - INFO - __main__ - Step 12553: {'lr': 0.0004922656947665046, 'samples': 602544, 'steps': 12552, 'loss/train': 1.4665634632110596} +07/25/2024 12:24:37 - INFO - __main__ - Step 12554: {'lr': 0.0004922643963367947, 'samples': 602592, 'steps': 12553, 'loss/train': 1.7210112810134888} +07/25/2024 12:24:37 - INFO - __main__ - Step 12555: {'lr': 0.000492263097799817, 'samples': 602640, 'steps': 12554, 'loss/train': 0.9151065349578857} +07/25/2024 12:24:37 - INFO - __main__ - Step 12556: {'lr': 0.0004922617991555719, 'samples': 602688, 'steps': 12555, 'loss/train': 1.9620832204818726} +07/25/2024 12:24:38 - INFO - __main__ - Step 12557: {'lr': 0.0004922605004040601, 'samples': 602736, 'steps': 12556, 'loss/train': 1.653228998184204} +07/25/2024 12:24:38 - INFO - __main__ - Step 12558: {'lr': 0.0004922592015452822, 'samples': 602784, 'steps': 12557, 'loss/train': 1.5852420330047607} +07/25/2024 12:24:38 - INFO - __main__ - Step 12559: {'lr': 0.0004922579025792385, 'samples': 602832, 'steps': 12558, 'loss/train': 1.9668983221054077} +07/25/2024 12:24:39 - INFO - __main__ - Step 12560: {'lr': 0.00049225660350593, 'samples': 602880, 'steps': 12559, 'loss/train': 1.9150503873825073} +07/25/2024 12:24:39 - INFO - __main__ - Step 12561: {'lr': 0.0004922553043253569, 'samples': 602928, 'steps': 12560, 'loss/train': 1.1032662391662598} +07/25/2024 12:24:39 - INFO - __main__ - Step 12562: {'lr': 0.00049225400503752, 'samples': 602976, 'steps': 12561, 'loss/train': 1.7309887409210205} +07/25/2024 12:24:39 - INFO - __main__ - Step 12563: {'lr': 0.0004922527056424198, 'samples': 603024, 'steps': 12562, 'loss/train': 1.785878300666809} +07/25/2024 12:24:40 - INFO - __main__ - Step 12564: {'lr': 0.000492251406140057, 'samples': 603072, 'steps': 12563, 'loss/train': 2.1177241802215576} +07/25/2024 12:24:40 - INFO - __main__ - Step 12565: {'lr': 0.000492250106530432, 'samples': 603120, 'steps': 12564, 'loss/train': 0.09604696929454803} +07/25/2024 12:24:40 - INFO - __main__ - Step 12566: {'lr': 0.0004922488068135455, 'samples': 603168, 'steps': 12565, 'loss/train': 1.6233104467391968} +07/25/2024 12:24:41 - INFO - __main__ - Step 12567: {'lr': 0.0004922475069893979, 'samples': 603216, 'steps': 12566, 'loss/train': 1.477238655090332} +07/25/2024 12:24:41 - INFO - __main__ - Step 12568: {'lr': 0.0004922462070579899, 'samples': 603264, 'steps': 12567, 'loss/train': 1.986837387084961} +07/25/2024 12:24:41 - INFO - __main__ - Step 12569: {'lr': 0.0004922449070193221, 'samples': 603312, 'steps': 12568, 'loss/train': 1.4460179805755615} +07/25/2024 12:24:41 - INFO - __main__ - Step 12570: {'lr': 0.0004922436068733951, 'samples': 603360, 'steps': 12569, 'loss/train': 1.3552426099777222} +07/25/2024 12:24:42 - INFO - __main__ - Step 12571: {'lr': 0.0004922423066202094, 'samples': 603408, 'steps': 12570, 'loss/train': 1.657555103302002} +07/25/2024 12:24:42 - INFO - __main__ - Step 12572: {'lr': 0.0004922410062597655, 'samples': 603456, 'steps': 12571, 'loss/train': 1.7945506572723389} +07/25/2024 12:24:42 - INFO - __main__ - Step 12573: {'lr': 0.0004922397057920642, 'samples': 603504, 'steps': 12572, 'loss/train': 1.4545540809631348} +07/25/2024 12:24:43 - INFO - __main__ - Step 12574: {'lr': 0.0004922384052171058, 'samples': 603552, 'steps': 12573, 'loss/train': 1.7685545682907104} +07/25/2024 12:24:43 - INFO - __main__ - Step 12575: {'lr': 0.0004922371045348911, 'samples': 603600, 'steps': 12574, 'loss/train': 1.7951040267944336} +07/25/2024 12:24:43 - INFO - __main__ - Step 12576: {'lr': 0.0004922358037454206, 'samples': 603648, 'steps': 12575, 'loss/train': 2.572021961212158} +07/25/2024 12:24:43 - INFO - __main__ - Step 12577: {'lr': 0.0004922345028486949, 'samples': 603696, 'steps': 12576, 'loss/train': 2.107858657836914} +07/25/2024 12:24:44 - INFO - __main__ - Step 12578: {'lr': 0.0004922332018447146, 'samples': 603744, 'steps': 12577, 'loss/train': 1.7196837663650513} +07/25/2024 12:24:44 - INFO - __main__ - Step 12579: {'lr': 0.0004922319007334801, 'samples': 603792, 'steps': 12578, 'loss/train': 1.285691738128662} +07/25/2024 12:24:44 - INFO - __main__ - Step 12580: {'lr': 0.0004922305995149922, 'samples': 603840, 'steps': 12579, 'loss/train': 1.4970890283584595} +07/25/2024 12:24:45 - INFO - __main__ - Step 12581: {'lr': 0.0004922292981892512, 'samples': 603888, 'steps': 12580, 'loss/train': 2.1818370819091797} +07/25/2024 12:24:45 - INFO - __main__ - Step 12582: {'lr': 0.0004922279967562581, 'samples': 603936, 'steps': 12581, 'loss/train': 1.3357925415039062} +07/25/2024 12:24:45 - INFO - __main__ - Step 12583: {'lr': 0.000492226695216013, 'samples': 603984, 'steps': 12582, 'loss/train': 2.1957497596740723} +07/25/2024 12:24:45 - INFO - __main__ - Step 12584: {'lr': 0.000492225393568517, 'samples': 604032, 'steps': 12583, 'loss/train': 1.8639205694198608} +07/25/2024 12:24:46 - INFO - __main__ - Step 12585: {'lr': 0.0004922240918137701, 'samples': 604080, 'steps': 12584, 'loss/train': 1.3900878429412842} +07/25/2024 12:24:46 - INFO - __main__ - Step 12586: {'lr': 0.0004922227899517733, 'samples': 604128, 'steps': 12585, 'loss/train': 1.7859416007995605} +07/25/2024 12:24:46 - INFO - __main__ - Step 12587: {'lr': 0.0004922214879825269, 'samples': 604176, 'steps': 12586, 'loss/train': 1.6441305875778198} +07/25/2024 12:24:47 - INFO - __main__ - Step 12588: {'lr': 0.0004922201859060317, 'samples': 604224, 'steps': 12587, 'loss/train': 1.230931043624878} +07/25/2024 12:24:47 - INFO - __main__ - Step 12589: {'lr': 0.0004922188837222883, 'samples': 604272, 'steps': 12588, 'loss/train': 0.1342141330242157} +07/25/2024 12:24:47 - INFO - __main__ - Step 12590: {'lr': 0.0004922175814312971, 'samples': 604320, 'steps': 12589, 'loss/train': 1.3264027833938599} +07/25/2024 12:24:47 - INFO - __main__ - Step 12591: {'lr': 0.0004922162790330586, 'samples': 604368, 'steps': 12590, 'loss/train': 2.0014936923980713} +07/25/2024 12:24:48 - INFO - __main__ - Step 12592: {'lr': 0.0004922149765275736, 'samples': 604416, 'steps': 12591, 'loss/train': 2.322552442550659} +07/25/2024 12:24:48 - INFO - __main__ - Step 12593: {'lr': 0.0004922136739148426, 'samples': 604464, 'steps': 12592, 'loss/train': 0.8765213489532471} +07/25/2024 12:24:48 - INFO - __main__ - Step 12594: {'lr': 0.0004922123711948662, 'samples': 604512, 'steps': 12593, 'loss/train': 2.051469564437866} +07/25/2024 12:24:49 - INFO - __main__ - Step 12595: {'lr': 0.000492211068367645, 'samples': 604560, 'steps': 12594, 'loss/train': 1.454796314239502} +07/25/2024 12:24:49 - INFO - __main__ - Step 12596: {'lr': 0.0004922097654331794, 'samples': 604608, 'steps': 12595, 'loss/train': 1.6307361125946045} +07/25/2024 12:24:49 - INFO - __main__ - Step 12597: {'lr': 0.0004922084623914702, 'samples': 604656, 'steps': 12596, 'loss/train': 1.8822752237319946} +07/25/2024 12:24:49 - INFO - __main__ - Step 12598: {'lr': 0.0004922071592425178, 'samples': 604704, 'steps': 12597, 'loss/train': 1.6318544149398804} +07/25/2024 12:24:50 - INFO - __main__ - Step 12599: {'lr': 0.000492205855986323, 'samples': 604752, 'steps': 12598, 'loss/train': 2.0752711296081543} +07/25/2024 12:24:50 - INFO - __main__ - Step 12600: {'lr': 0.0004922045526228861, 'samples': 604800, 'steps': 12599, 'loss/train': 2.7731924057006836} +07/25/2024 12:24:50 - INFO - __main__ - Step 12601: {'lr': 0.0004922032491522078, 'samples': 604848, 'steps': 12600, 'loss/train': 2.28123140335083} +07/25/2024 12:24:51 - INFO - __main__ - Step 12602: {'lr': 0.0004922019455742887, 'samples': 604896, 'steps': 12601, 'loss/train': 2.1947548389434814} +07/25/2024 12:24:51 - INFO - __main__ - Step 12603: {'lr': 0.0004922006418891295, 'samples': 604944, 'steps': 12602, 'loss/train': 1.4012353420257568} +07/25/2024 12:24:51 - INFO - __main__ - Step 12604: {'lr': 0.0004921993380967304, 'samples': 604992, 'steps': 12603, 'loss/train': 1.7387866973876953} +07/25/2024 12:24:51 - INFO - __main__ - Step 12605: {'lr': 0.0004921980341970924, 'samples': 605040, 'steps': 12604, 'loss/train': 1.829308271408081} +07/25/2024 12:24:52 - INFO - __main__ - Step 12606: {'lr': 0.0004921967301902159, 'samples': 605088, 'steps': 12605, 'loss/train': 1.24748957157135} +07/25/2024 12:24:52 - INFO - __main__ - Step 12607: {'lr': 0.0004921954260761014, 'samples': 605136, 'steps': 12606, 'loss/train': 2.091170310974121} +07/25/2024 12:24:52 - INFO - __main__ - Step 12608: {'lr': 0.0004921941218547496, 'samples': 605184, 'steps': 12607, 'loss/train': 1.8333957195281982} +07/25/2024 12:24:53 - INFO - __main__ - Step 12609: {'lr': 0.000492192817526161, 'samples': 605232, 'steps': 12608, 'loss/train': 1.5792288780212402} +07/25/2024 12:24:53 - INFO - __main__ - Step 12610: {'lr': 0.0004921915130903363, 'samples': 605280, 'steps': 12609, 'loss/train': 1.937665343284607} +07/25/2024 12:24:53 - INFO - __main__ - Step 12611: {'lr': 0.0004921902085472758, 'samples': 605328, 'steps': 12610, 'loss/train': 1.645860195159912} +07/25/2024 12:24:53 - INFO - __main__ - Step 12612: {'lr': 0.0004921889038969804, 'samples': 605376, 'steps': 12611, 'loss/train': 1.6810578107833862} +07/25/2024 12:24:54 - INFO - __main__ - Step 12613: {'lr': 0.0004921875991394505, 'samples': 605424, 'steps': 12612, 'loss/train': 0.163948193192482} +07/25/2024 12:24:54 - INFO - __main__ - Step 12614: {'lr': 0.0004921862942746868, 'samples': 605472, 'steps': 12613, 'loss/train': 1.731736421585083} +07/25/2024 12:24:54 - INFO - __main__ - Step 12615: {'lr': 0.0004921849893026896, 'samples': 605520, 'steps': 12614, 'loss/train': 2.016275405883789} +07/25/2024 12:24:55 - INFO - __main__ - Step 12616: {'lr': 0.0004921836842234598, 'samples': 605568, 'steps': 12615, 'loss/train': 1.873241901397705} +07/25/2024 12:24:55 - INFO - __main__ - Step 12617: {'lr': 0.0004921823790369978, 'samples': 605616, 'steps': 12616, 'loss/train': 2.09214186668396} +07/25/2024 12:24:55 - INFO - __main__ - Step 12618: {'lr': 0.0004921810737433044, 'samples': 605664, 'steps': 12617, 'loss/train': 1.329769492149353} +07/25/2024 12:24:55 - INFO - __main__ - Step 12619: {'lr': 0.0004921797683423798, 'samples': 605712, 'steps': 12618, 'loss/train': 1.542771816253662} +07/25/2024 12:24:56 - INFO - __main__ - Step 12620: {'lr': 0.0004921784628342248, 'samples': 605760, 'steps': 12619, 'loss/train': 1.7943364381790161} +07/25/2024 12:24:56 - INFO - __main__ - Step 12621: {'lr': 0.00049217715721884, 'samples': 605808, 'steps': 12620, 'loss/train': 1.3243354558944702} +07/25/2024 12:24:56 - INFO - __main__ - Step 12622: {'lr': 0.000492175851496226, 'samples': 605856, 'steps': 12621, 'loss/train': 1.5337380170822144} +07/25/2024 12:24:56 - INFO - __main__ - Step 12623: {'lr': 0.0004921745456663833, 'samples': 605904, 'steps': 12622, 'loss/train': 1.6952016353607178} +07/25/2024 12:24:57 - INFO - __main__ - Step 12624: {'lr': 0.0004921732397293125, 'samples': 605952, 'steps': 12623, 'loss/train': 2.544391393661499} +07/25/2024 12:24:57 - INFO - __main__ - Step 12625: {'lr': 0.0004921719336850141, 'samples': 606000, 'steps': 12624, 'loss/train': 2.4473772048950195} +07/25/2024 12:24:57 - INFO - __main__ - Step 12626: {'lr': 0.0004921706275334888, 'samples': 606048, 'steps': 12625, 'loss/train': 2.209381341934204} +07/25/2024 12:24:58 - INFO - __main__ - Step 12627: {'lr': 0.0004921693212747372, 'samples': 606096, 'steps': 12626, 'loss/train': 1.2934614419937134} +07/25/2024 12:24:58 - INFO - __main__ - Step 12628: {'lr': 0.0004921680149087597, 'samples': 606144, 'steps': 12627, 'loss/train': 1.8547852039337158} +07/25/2024 12:24:58 - INFO - __main__ - Step 12629: {'lr': 0.000492166708435557, 'samples': 606192, 'steps': 12628, 'loss/train': 1.8852250576019287} +07/25/2024 12:24:58 - INFO - __main__ - Step 12630: {'lr': 0.0004921654018551297, 'samples': 606240, 'steps': 12629, 'loss/train': 0.47251418232917786} +07/25/2024 12:24:59 - INFO - __main__ - Step 12631: {'lr': 0.0004921640951674784, 'samples': 606288, 'steps': 12630, 'loss/train': 1.4171278476715088} +07/25/2024 12:24:59 - INFO - __main__ - Step 12632: {'lr': 0.0004921627883726035, 'samples': 606336, 'steps': 12631, 'loss/train': 1.5467480421066284} +07/25/2024 12:24:59 - INFO - __main__ - Step 12633: {'lr': 0.0004921614814705059, 'samples': 606384, 'steps': 12632, 'loss/train': 2.014042377471924} +07/25/2024 12:25:00 - INFO - __main__ - Step 12634: {'lr': 0.0004921601744611858, 'samples': 606432, 'steps': 12633, 'loss/train': 1.8662413358688354} +07/25/2024 12:25:00 - INFO - __main__ - Step 12635: {'lr': 0.0004921588673446439, 'samples': 606480, 'steps': 12634, 'loss/train': 2.208491325378418} +07/25/2024 12:25:00 - INFO - __main__ - Step 12636: {'lr': 0.0004921575601208811, 'samples': 606528, 'steps': 12635, 'loss/train': 0.7013089656829834} +07/25/2024 12:25:00 - INFO - __main__ - Step 12637: {'lr': 0.0004921562527898975, 'samples': 606576, 'steps': 12636, 'loss/train': 0.12981444597244263} +07/25/2024 12:25:01 - INFO - __main__ - Step 12638: {'lr': 0.000492154945351694, 'samples': 606624, 'steps': 12637, 'loss/train': 1.5020411014556885} +07/25/2024 12:25:01 - INFO - __main__ - Step 12639: {'lr': 0.0004921536378062711, 'samples': 606672, 'steps': 12638, 'loss/train': 1.9461153745651245} +07/25/2024 12:25:01 - INFO - __main__ - Step 12640: {'lr': 0.0004921523301536293, 'samples': 606720, 'steps': 12639, 'loss/train': 1.6533130407333374} +07/25/2024 12:25:02 - INFO - __main__ - Step 12641: {'lr': 0.0004921510223937692, 'samples': 606768, 'steps': 12640, 'loss/train': 2.0163633823394775} +07/25/2024 12:25:02 - INFO - __main__ - Step 12642: {'lr': 0.0004921497145266914, 'samples': 606816, 'steps': 12641, 'loss/train': 1.8197005987167358} +07/25/2024 12:25:02 - INFO - __main__ - Step 12643: {'lr': 0.0004921484065523965, 'samples': 606864, 'steps': 12642, 'loss/train': 1.596014380455017} +07/25/2024 12:25:02 - INFO - __main__ - Step 12644: {'lr': 0.0004921470984708852, 'samples': 606912, 'steps': 12643, 'loss/train': 2.2953739166259766} +07/25/2024 12:25:03 - INFO - __main__ - Step 12645: {'lr': 0.0004921457902821578, 'samples': 606960, 'steps': 12644, 'loss/train': 2.200868606567383} +07/25/2024 12:25:03 - INFO - __main__ - Step 12646: {'lr': 0.0004921444819862151, 'samples': 607008, 'steps': 12645, 'loss/train': 1.3131182193756104} +07/25/2024 12:25:03 - INFO - __main__ - Step 12647: {'lr': 0.0004921431735830575, 'samples': 607056, 'steps': 12646, 'loss/train': 1.7820643186569214} +07/25/2024 12:25:04 - INFO - __main__ - Step 12648: {'lr': 0.0004921418650726859, 'samples': 607104, 'steps': 12647, 'loss/train': 2.3457272052764893} +07/25/2024 12:25:04 - INFO - __main__ - Step 12649: {'lr': 0.0004921405564551005, 'samples': 607152, 'steps': 12648, 'loss/train': 2.0343730449676514} +07/25/2024 12:25:04 - INFO - __main__ - Step 12650: {'lr': 0.000492139247730302, 'samples': 607200, 'steps': 12649, 'loss/train': 1.9496591091156006} +07/25/2024 12:25:04 - INFO - __main__ - Step 12651: {'lr': 0.0004921379388982912, 'samples': 607248, 'steps': 12650, 'loss/train': 1.2282142639160156} +07/25/2024 12:25:05 - INFO - __main__ - Step 12652: {'lr': 0.0004921366299590684, 'samples': 607296, 'steps': 12651, 'loss/train': 1.807303786277771} +07/25/2024 12:25:05 - INFO - __main__ - Step 12653: {'lr': 0.0004921353209126343, 'samples': 607344, 'steps': 12652, 'loss/train': 1.7805968523025513} +07/25/2024 12:25:05 - INFO - __main__ - Step 12654: {'lr': 0.0004921340117589895, 'samples': 607392, 'steps': 12653, 'loss/train': 1.4993500709533691} +07/25/2024 12:25:06 - INFO - __main__ - Step 12655: {'lr': 0.0004921327024981345, 'samples': 607440, 'steps': 12654, 'loss/train': 1.7011404037475586} +07/25/2024 12:25:06 - INFO - __main__ - Step 12656: {'lr': 0.00049213139313007, 'samples': 607488, 'steps': 12655, 'loss/train': 1.490517258644104} +07/25/2024 12:25:06 - INFO - __main__ - Step 12657: {'lr': 0.0004921300836547964, 'samples': 607536, 'steps': 12656, 'loss/train': 2.4122445583343506} +07/25/2024 12:25:06 - INFO - __main__ - Step 12658: {'lr': 0.0004921287740723144, 'samples': 607584, 'steps': 12657, 'loss/train': 1.7523521184921265} +07/25/2024 12:25:07 - INFO - __main__ - Step 12659: {'lr': 0.0004921274643826246, 'samples': 607632, 'steps': 12658, 'loss/train': 1.681196928024292} +07/25/2024 12:25:07 - INFO - __main__ - Step 12660: {'lr': 0.0004921261545857275, 'samples': 607680, 'steps': 12659, 'loss/train': 1.760143518447876} +07/25/2024 12:25:07 - INFO - __main__ - Step 12661: {'lr': 0.0004921248446816237, 'samples': 607728, 'steps': 12660, 'loss/train': 0.2493276596069336} +07/25/2024 12:25:08 - INFO - __main__ - Step 12662: {'lr': 0.000492123534670314, 'samples': 607776, 'steps': 12661, 'loss/train': 2.0813307762145996} +07/25/2024 12:25:08 - INFO - __main__ - Step 12663: {'lr': 0.0004921222245517986, 'samples': 607824, 'steps': 12662, 'loss/train': 1.871923565864563} +07/25/2024 12:25:08 - INFO - __main__ - Step 12664: {'lr': 0.0004921209143260783, 'samples': 607872, 'steps': 12663, 'loss/train': 1.3536357879638672} +07/25/2024 12:25:08 - INFO - __main__ - Step 12665: {'lr': 0.0004921196039931536, 'samples': 607920, 'steps': 12664, 'loss/train': 1.9571415185928345} +07/25/2024 12:25:09 - INFO - __main__ - Step 12666: {'lr': 0.0004921182935530251, 'samples': 607968, 'steps': 12665, 'loss/train': 1.4195537567138672} +07/25/2024 12:25:09 - INFO - __main__ - Step 12667: {'lr': 0.0004921169830056936, 'samples': 608016, 'steps': 12666, 'loss/train': 1.840780258178711} +07/25/2024 12:25:09 - INFO - __main__ - Step 12668: {'lr': 0.0004921156723511593, 'samples': 608064, 'steps': 12667, 'loss/train': 1.8065954446792603} +07/25/2024 12:25:10 - INFO - __main__ - Step 12669: {'lr': 0.0004921143615894231, 'samples': 608112, 'steps': 12668, 'loss/train': 1.7781105041503906} +07/25/2024 12:25:10 - INFO - __main__ - Step 12670: {'lr': 0.0004921130507204854, 'samples': 608160, 'steps': 12669, 'loss/train': 1.4433492422103882} +07/25/2024 12:25:10 - INFO - __main__ - Step 12671: {'lr': 0.0004921117397443468, 'samples': 608208, 'steps': 12670, 'loss/train': 1.5636157989501953} +07/25/2024 12:25:10 - INFO - __main__ - Step 12672: {'lr': 0.0004921104286610079, 'samples': 608256, 'steps': 12671, 'loss/train': 2.0815346240997314} +07/25/2024 12:25:11 - INFO - __main__ - Step 12673: {'lr': 0.0004921091174704692, 'samples': 608304, 'steps': 12672, 'loss/train': 1.719506025314331} +07/25/2024 12:25:11 - INFO - __main__ - Step 12674: {'lr': 0.0004921078061727314, 'samples': 608352, 'steps': 12673, 'loss/train': 1.9105473756790161} +07/25/2024 12:25:11 - INFO - __main__ - Step 12675: {'lr': 0.0004921064947677952, 'samples': 608400, 'steps': 12674, 'loss/train': 1.1971842050552368} +07/25/2024 12:25:12 - INFO - __main__ - Step 12676: {'lr': 0.0004921051832556609, 'samples': 608448, 'steps': 12675, 'loss/train': 1.5346057415008545} +07/25/2024 12:25:12 - INFO - __main__ - Step 12677: {'lr': 0.0004921038716363292, 'samples': 608496, 'steps': 12676, 'loss/train': 1.9756591320037842} +07/25/2024 12:25:12 - INFO - __main__ - Step 12678: {'lr': 0.0004921025599098007, 'samples': 608544, 'steps': 12677, 'loss/train': 2.178175449371338} +07/25/2024 12:25:12 - INFO - __main__ - Step 12679: {'lr': 0.0004921012480760761, 'samples': 608592, 'steps': 12678, 'loss/train': 1.377833604812622} +07/25/2024 12:25:13 - INFO - __main__ - Step 12680: {'lr': 0.0004920999361351557, 'samples': 608640, 'steps': 12679, 'loss/train': 1.764828085899353} +07/25/2024 12:25:13 - INFO - __main__ - Step 12681: {'lr': 0.0004920986240870402, 'samples': 608688, 'steps': 12680, 'loss/train': 1.925755262374878} +07/25/2024 12:25:13 - INFO - __main__ - Step 12682: {'lr': 0.0004920973119317304, 'samples': 608736, 'steps': 12681, 'loss/train': 2.194809913635254} +07/25/2024 12:25:14 - INFO - __main__ - Step 12683: {'lr': 0.0004920959996692265, 'samples': 608784, 'steps': 12682, 'loss/train': 2.2387850284576416} +07/25/2024 12:25:14 - INFO - __main__ - Step 12684: {'lr': 0.0004920946872995294, 'samples': 608832, 'steps': 12683, 'loss/train': 1.786548376083374} +07/25/2024 12:25:14 - INFO - __main__ - Step 12685: {'lr': 0.0004920933748226395, 'samples': 608880, 'steps': 12684, 'loss/train': 1.836901307106018} +07/25/2024 12:25:14 - INFO - __main__ - Step 12686: {'lr': 0.0004920920622385575, 'samples': 608928, 'steps': 12685, 'loss/train': 2.440607786178589} +07/25/2024 12:25:15 - INFO - __main__ - Step 12687: {'lr': 0.0004920907495472839, 'samples': 608976, 'steps': 12686, 'loss/train': 1.8510637283325195} +07/25/2024 12:25:15 - INFO - __main__ - Step 12688: {'lr': 0.0004920894367488192, 'samples': 609024, 'steps': 12687, 'loss/train': 1.8648344278335571} +07/25/2024 12:25:15 - INFO - __main__ - Step 12689: {'lr': 0.0004920881238431641, 'samples': 609072, 'steps': 12688, 'loss/train': 1.7787162065505981} +07/25/2024 12:25:16 - INFO - __main__ - Step 12690: {'lr': 0.0004920868108303192, 'samples': 609120, 'steps': 12689, 'loss/train': 2.138119697570801} +07/25/2024 12:25:16 - INFO - __main__ - Step 12691: {'lr': 0.0004920854977102851, 'samples': 609168, 'steps': 12690, 'loss/train': 2.337491273880005} +07/25/2024 12:25:16 - INFO - __main__ - Step 12692: {'lr': 0.0004920841844830622, 'samples': 609216, 'steps': 12691, 'loss/train': 2.138061285018921} +07/25/2024 12:25:16 - INFO - __main__ - Step 12693: {'lr': 0.0004920828711486514, 'samples': 609264, 'steps': 12692, 'loss/train': 1.6912508010864258} +07/25/2024 12:25:17 - INFO - __main__ - Step 12694: {'lr': 0.000492081557707053, 'samples': 609312, 'steps': 12693, 'loss/train': 0.8329355716705322} +07/25/2024 12:25:17 - INFO - __main__ - Step 12695: {'lr': 0.0004920802441582676, 'samples': 609360, 'steps': 12694, 'loss/train': 1.957169532775879} +07/25/2024 12:25:17 - INFO - __main__ - Step 12696: {'lr': 0.0004920789305022959, 'samples': 609408, 'steps': 12695, 'loss/train': 1.6676949262619019} +07/25/2024 12:25:18 - INFO - __main__ - Step 12697: {'lr': 0.0004920776167391384, 'samples': 609456, 'steps': 12696, 'loss/train': 1.6537060737609863} +07/25/2024 12:25:18 - INFO - __main__ - Step 12698: {'lr': 0.0004920763028687957, 'samples': 609504, 'steps': 12697, 'loss/train': 1.8627564907073975} +07/25/2024 12:25:18 - INFO - __main__ - Step 12699: {'lr': 0.0004920749888912685, 'samples': 609552, 'steps': 12698, 'loss/train': 1.9069150686264038} +07/25/2024 12:25:18 - INFO - __main__ - Step 12700: {'lr': 0.0004920736748065572, 'samples': 609600, 'steps': 12699, 'loss/train': 1.491896390914917} +07/25/2024 12:25:19 - INFO - __main__ - Step 12701: {'lr': 0.0004920723606146625, 'samples': 609648, 'steps': 12700, 'loss/train': 1.797882318496704} +07/25/2024 12:25:19 - INFO - __main__ - Step 12702: {'lr': 0.0004920710463155849, 'samples': 609696, 'steps': 12701, 'loss/train': 1.9125702381134033} +07/25/2024 12:25:19 - INFO - __main__ - Step 12703: {'lr': 0.0004920697319093251, 'samples': 609744, 'steps': 12702, 'loss/train': 1.7715880870819092} +07/25/2024 12:25:19 - INFO - __main__ - Step 12704: {'lr': 0.0004920684173958835, 'samples': 609792, 'steps': 12703, 'loss/train': 1.6992838382720947} +07/25/2024 12:25:20 - INFO - __main__ - Step 12705: {'lr': 0.0004920671027752607, 'samples': 609840, 'steps': 12704, 'loss/train': 1.8733817338943481} +07/25/2024 12:25:20 - INFO - __main__ - Step 12706: {'lr': 0.0004920657880474576, 'samples': 609888, 'steps': 12705, 'loss/train': 1.8065555095672607} +07/25/2024 12:25:20 - INFO - __main__ - Step 12707: {'lr': 0.0004920644732124745, 'samples': 609936, 'steps': 12706, 'loss/train': 1.5578985214233398} +07/25/2024 12:25:21 - INFO - __main__ - Step 12708: {'lr': 0.0004920631582703119, 'samples': 609984, 'steps': 12707, 'loss/train': 1.7209837436676025} +07/25/2024 12:25:21 - INFO - __main__ - Step 12709: {'lr': 0.0004920618432209706, 'samples': 610032, 'steps': 12708, 'loss/train': 1.5014373064041138} +07/25/2024 12:25:21 - INFO - __main__ - Step 12710: {'lr': 0.0004920605280644511, 'samples': 610080, 'steps': 12709, 'loss/train': 1.0410500764846802} +07/25/2024 12:25:21 - INFO - __main__ - Step 12711: {'lr': 0.000492059212800754, 'samples': 610128, 'steps': 12710, 'loss/train': 0.8860459923744202} +07/25/2024 12:25:22 - INFO - __main__ - Step 12712: {'lr': 0.0004920578974298798, 'samples': 610176, 'steps': 12711, 'loss/train': 1.6132988929748535} +07/25/2024 12:25:22 - INFO - __main__ - Step 12713: {'lr': 0.0004920565819518292, 'samples': 610224, 'steps': 12712, 'loss/train': 1.9174220561981201} +07/25/2024 12:25:22 - INFO - __main__ - Step 12714: {'lr': 0.0004920552663666029, 'samples': 610272, 'steps': 12713, 'loss/train': 1.7459696531295776} +07/25/2024 12:25:23 - INFO - __main__ - Step 12715: {'lr': 0.000492053950674201, 'samples': 610320, 'steps': 12714, 'loss/train': 1.934064269065857} +07/25/2024 12:25:23 - INFO - __main__ - Step 12716: {'lr': 0.0004920526348746245, 'samples': 610368, 'steps': 12715, 'loss/train': 1.7193056344985962} +07/25/2024 12:25:23 - INFO - __main__ - Step 12717: {'lr': 0.0004920513189678739, 'samples': 610416, 'steps': 12716, 'loss/train': 2.620957851409912} +07/25/2024 12:25:23 - INFO - __main__ - Step 12718: {'lr': 0.0004920500029539498, 'samples': 610464, 'steps': 12717, 'loss/train': 1.6507177352905273} +07/25/2024 12:25:24 - INFO - __main__ - Step 12719: {'lr': 0.0004920486868328527, 'samples': 610512, 'steps': 12718, 'loss/train': 0.17035050690174103} +07/25/2024 12:25:24 - INFO - __main__ - Step 12720: {'lr': 0.0004920473706045832, 'samples': 610560, 'steps': 12719, 'loss/train': 1.761291265487671} +07/25/2024 12:25:24 - INFO - __main__ - Step 12721: {'lr': 0.0004920460542691419, 'samples': 610608, 'steps': 12720, 'loss/train': 2.061939239501953} +07/25/2024 12:25:25 - INFO - __main__ - Step 12722: {'lr': 0.0004920447378265294, 'samples': 610656, 'steps': 12721, 'loss/train': 2.5163755416870117} +07/25/2024 12:25:25 - INFO - __main__ - Step 12723: {'lr': 0.0004920434212767463, 'samples': 610704, 'steps': 12722, 'loss/train': 1.6630915403366089} +07/25/2024 12:25:25 - INFO - __main__ - Step 12724: {'lr': 0.0004920421046197931, 'samples': 610752, 'steps': 12723, 'loss/train': 2.105886936187744} +07/25/2024 12:25:25 - INFO - __main__ - Step 12725: {'lr': 0.0004920407878556705, 'samples': 610800, 'steps': 12724, 'loss/train': 1.0600337982177734} +07/25/2024 12:25:26 - INFO - __main__ - Step 12726: {'lr': 0.000492039470984379, 'samples': 610848, 'steps': 12725, 'loss/train': 1.732081413269043} +07/25/2024 12:25:26 - INFO - __main__ - Step 12727: {'lr': 0.0004920381540059192, 'samples': 610896, 'steps': 12726, 'loss/train': 1.7266793251037598} +07/25/2024 12:25:26 - INFO - __main__ - Step 12728: {'lr': 0.0004920368369202917, 'samples': 610944, 'steps': 12727, 'loss/train': 1.9182449579238892} +07/25/2024 12:25:27 - INFO - __main__ - Step 12729: {'lr': 0.0004920355197274971, 'samples': 610992, 'steps': 12728, 'loss/train': 0.8616781234741211} +07/25/2024 12:25:27 - INFO - __main__ - Step 12730: {'lr': 0.0004920342024275359, 'samples': 611040, 'steps': 12729, 'loss/train': 1.634788990020752} +07/25/2024 12:25:27 - INFO - __main__ - Step 12731: {'lr': 0.0004920328850204087, 'samples': 611088, 'steps': 12730, 'loss/train': 1.63584566116333} +07/25/2024 12:25:27 - INFO - __main__ - Step 12732: {'lr': 0.0004920315675061162, 'samples': 611136, 'steps': 12731, 'loss/train': 1.5285236835479736} +07/25/2024 12:25:28 - INFO - __main__ - Step 12733: {'lr': 0.0004920302498846589, 'samples': 611184, 'steps': 12732, 'loss/train': 2.6484365463256836} +07/25/2024 12:25:28 - INFO - __main__ - Step 12734: {'lr': 0.0004920289321560374, 'samples': 611232, 'steps': 12733, 'loss/train': 1.583997130393982} +07/25/2024 12:25:28 - INFO - __main__ - Step 12735: {'lr': 0.0004920276143202522, 'samples': 611280, 'steps': 12734, 'loss/train': 1.861490249633789} +07/25/2024 12:25:29 - INFO - __main__ - Step 12736: {'lr': 0.000492026296377304, 'samples': 611328, 'steps': 12735, 'loss/train': 1.496652364730835} +07/25/2024 12:25:29 - INFO - __main__ - Step 12737: {'lr': 0.0004920249783271933, 'samples': 611376, 'steps': 12736, 'loss/train': 2.056955575942993} +07/25/2024 12:25:29 - INFO - __main__ - Step 12738: {'lr': 0.0004920236601699208, 'samples': 611424, 'steps': 12737, 'loss/train': 1.5568660497665405} +07/25/2024 12:25:29 - INFO - __main__ - Step 12739: {'lr': 0.0004920223419054869, 'samples': 611472, 'steps': 12738, 'loss/train': 1.5772587060928345} +07/25/2024 12:25:30 - INFO - __main__ - Step 12740: {'lr': 0.0004920210235338923, 'samples': 611520, 'steps': 12739, 'loss/train': 1.6327131986618042} +07/25/2024 12:25:30 - INFO - __main__ - Step 12741: {'lr': 0.0004920197050551377, 'samples': 611568, 'steps': 12740, 'loss/train': 2.2470529079437256} +07/25/2024 12:25:30 - INFO - __main__ - Step 12742: {'lr': 0.0004920183864692235, 'samples': 611616, 'steps': 12741, 'loss/train': 2.1460907459259033} +07/25/2024 12:25:31 - INFO - __main__ - Step 12743: {'lr': 0.0004920170677761502, 'samples': 611664, 'steps': 12742, 'loss/train': 0.13338445127010345} +07/25/2024 12:25:31 - INFO - __main__ - Step 12744: {'lr': 0.0004920157489759187, 'samples': 611712, 'steps': 12743, 'loss/train': 2.169654369354248} +07/25/2024 12:25:31 - INFO - __main__ - Step 12745: {'lr': 0.0004920144300685293, 'samples': 611760, 'steps': 12744, 'loss/train': 1.8313816785812378} +07/25/2024 12:25:31 - INFO - __main__ - Step 12746: {'lr': 0.0004920131110539828, 'samples': 611808, 'steps': 12745, 'loss/train': 2.229813814163208} +07/25/2024 12:25:32 - INFO - __main__ - Step 12747: {'lr': 0.0004920117919322795, 'samples': 611856, 'steps': 12746, 'loss/train': 1.8315294981002808} +07/25/2024 12:25:32 - INFO - __main__ - Step 12748: {'lr': 0.0004920104727034202, 'samples': 611904, 'steps': 12747, 'loss/train': 2.24149489402771} +07/25/2024 12:25:32 - INFO - __main__ - Step 12749: {'lr': 0.0004920091533674056, 'samples': 611952, 'steps': 12748, 'loss/train': 2.081798553466797} +07/25/2024 12:25:33 - INFO - __main__ - Step 12750: {'lr': 0.000492007833924236, 'samples': 612000, 'steps': 12749, 'loss/train': 2.0698866844177246} +07/25/2024 12:25:33 - INFO - __main__ - Step 12751: {'lr': 0.0004920065143739122, 'samples': 612048, 'steps': 12750, 'loss/train': 2.206756114959717} +07/25/2024 12:25:33 - INFO - __main__ - Step 12752: {'lr': 0.0004920051947164346, 'samples': 612096, 'steps': 12751, 'loss/train': 2.1599061489105225} +07/25/2024 12:25:33 - INFO - __main__ - Step 12753: {'lr': 0.000492003874951804, 'samples': 612144, 'steps': 12752, 'loss/train': 1.3684107065200806} +07/25/2024 12:25:34 - INFO - __main__ - Step 12754: {'lr': 0.0004920025550800207, 'samples': 612192, 'steps': 12753, 'loss/train': 1.8083887100219727} +07/25/2024 12:25:34 - INFO - __main__ - Step 12755: {'lr': 0.0004920012351010856, 'samples': 612240, 'steps': 12754, 'loss/train': 2.349973678588867} +07/25/2024 12:25:34 - INFO - __main__ - Step 12756: {'lr': 0.0004919999150149991, 'samples': 612288, 'steps': 12755, 'loss/train': 1.7213369607925415} +07/25/2024 12:25:35 - INFO - __main__ - Step 12757: {'lr': 0.0004919985948217618, 'samples': 612336, 'steps': 12756, 'loss/train': 1.4676936864852905} +07/25/2024 12:25:35 - INFO - __main__ - Step 12758: {'lr': 0.0004919972745213743, 'samples': 612384, 'steps': 12757, 'loss/train': 2.051365375518799} +07/25/2024 12:25:35 - INFO - __main__ - Step 12759: {'lr': 0.0004919959541138372, 'samples': 612432, 'steps': 12758, 'loss/train': 1.0501524209976196} +07/25/2024 12:25:35 - INFO - __main__ - Step 12760: {'lr': 0.000491994633599151, 'samples': 612480, 'steps': 12759, 'loss/train': 2.039951801300049} +07/25/2024 12:25:36 - INFO - __main__ - Step 12761: {'lr': 0.0004919933129773164, 'samples': 612528, 'steps': 12760, 'loss/train': 1.961150884628296} +07/25/2024 12:25:36 - INFO - __main__ - Step 12762: {'lr': 0.000491991992248334, 'samples': 612576, 'steps': 12761, 'loss/train': 2.087782382965088} +07/25/2024 12:25:36 - INFO - __main__ - Step 12763: {'lr': 0.0004919906714122042, 'samples': 612624, 'steps': 12762, 'loss/train': 1.6789472103118896} +07/25/2024 12:25:37 - INFO - __main__ - Step 12764: {'lr': 0.0004919893504689278, 'samples': 612672, 'steps': 12763, 'loss/train': 0.7364797592163086} +07/25/2024 12:25:37 - INFO - __main__ - Step 12765: {'lr': 0.0004919880294185052, 'samples': 612720, 'steps': 12764, 'loss/train': 1.4424107074737549} +07/25/2024 12:25:37 - INFO - __main__ - Step 12766: {'lr': 0.0004919867082609372, 'samples': 612768, 'steps': 12765, 'loss/train': 2.5269057750701904} +07/25/2024 12:25:37 - INFO - __main__ - Step 12767: {'lr': 0.0004919853869962243, 'samples': 612816, 'steps': 12766, 'loss/train': 0.10770402103662491} +07/25/2024 12:25:38 - INFO - __main__ - Step 12768: {'lr': 0.0004919840656243669, 'samples': 612864, 'steps': 12767, 'loss/train': 2.2379820346832275} +07/25/2024 12:25:38 - INFO - __main__ - Step 12769: {'lr': 0.000491982744145366, 'samples': 612912, 'steps': 12768, 'loss/train': 1.6339107751846313} +07/25/2024 12:25:38 - INFO - __main__ - Step 12770: {'lr': 0.0004919814225592216, 'samples': 612960, 'steps': 12769, 'loss/train': 1.9268200397491455} +07/25/2024 12:25:39 - INFO - __main__ - Step 12771: {'lr': 0.0004919801008659348, 'samples': 613008, 'steps': 12770, 'loss/train': 1.7615147829055786} +07/25/2024 12:25:39 - INFO - __main__ - Step 12772: {'lr': 0.0004919787790655059, 'samples': 613056, 'steps': 12771, 'loss/train': 1.9024008512496948} +07/25/2024 12:25:39 - INFO - __main__ - Step 12773: {'lr': 0.0004919774571579356, 'samples': 613104, 'steps': 12772, 'loss/train': 1.8390878438949585} +07/25/2024 12:25:39 - INFO - __main__ - Step 12774: {'lr': 0.0004919761351432245, 'samples': 613152, 'steps': 12773, 'loss/train': 2.0213396549224854} +07/25/2024 12:25:40 - INFO - __main__ - Step 12775: {'lr': 0.0004919748130213732, 'samples': 613200, 'steps': 12774, 'loss/train': 1.9965407848358154} +07/25/2024 12:25:40 - INFO - __main__ - Step 12776: {'lr': 0.0004919734907923821, 'samples': 613248, 'steps': 12775, 'loss/train': 2.3233039379119873} +07/25/2024 12:25:40 - INFO - __main__ - Step 12777: {'lr': 0.000491972168456252, 'samples': 613296, 'steps': 12776, 'loss/train': 1.5634818077087402} +07/25/2024 12:25:40 - INFO - __main__ - Step 12778: {'lr': 0.0004919708460129834, 'samples': 613344, 'steps': 12777, 'loss/train': 1.679350733757019} +07/25/2024 12:25:41 - INFO - __main__ - Step 12779: {'lr': 0.0004919695234625769, 'samples': 613392, 'steps': 12778, 'loss/train': 1.7186568975448608} +07/25/2024 12:25:41 - INFO - __main__ - Step 12780: {'lr': 0.000491968200805033, 'samples': 613440, 'steps': 12779, 'loss/train': 1.1602541208267212} +07/25/2024 12:25:41 - INFO - __main__ - Step 12781: {'lr': 0.0004919668780403524, 'samples': 613488, 'steps': 12780, 'loss/train': 1.5404465198516846} +07/25/2024 12:25:42 - INFO - __main__ - Step 12782: {'lr': 0.0004919655551685356, 'samples': 613536, 'steps': 12781, 'loss/train': 2.0626380443573} +07/25/2024 12:25:42 - INFO - __main__ - Step 12783: {'lr': 0.0004919642321895833, 'samples': 613584, 'steps': 12782, 'loss/train': 1.3253604173660278} +07/25/2024 12:25:42 - INFO - __main__ - Step 12784: {'lr': 0.000491962909103496, 'samples': 613632, 'steps': 12783, 'loss/train': 1.9058406352996826} +07/25/2024 12:25:42 - INFO - __main__ - Step 12785: {'lr': 0.0004919615859102744, 'samples': 613680, 'steps': 12784, 'loss/train': 1.9356143474578857} +07/25/2024 12:25:43 - INFO - __main__ - Step 12786: {'lr': 0.0004919602626099188, 'samples': 613728, 'steps': 12785, 'loss/train': 1.8374723196029663} +07/25/2024 12:25:43 - INFO - __main__ - Step 12787: {'lr': 0.00049195893920243, 'samples': 613776, 'steps': 12786, 'loss/train': 1.972548007965088} +07/25/2024 12:25:43 - INFO - __main__ - Step 12788: {'lr': 0.0004919576156878087, 'samples': 613824, 'steps': 12787, 'loss/train': 1.7222709655761719} +07/25/2024 12:25:44 - INFO - __main__ - Step 12789: {'lr': 0.0004919562920660553, 'samples': 613872, 'steps': 12788, 'loss/train': 1.8662610054016113} +07/25/2024 12:25:44 - INFO - __main__ - Step 12790: {'lr': 0.0004919549683371705, 'samples': 613920, 'steps': 12789, 'loss/train': 1.9824328422546387} +07/25/2024 12:25:44 - INFO - __main__ - Step 12791: {'lr': 0.0004919536445011546, 'samples': 613968, 'steps': 12790, 'loss/train': 0.18840400874614716} +07/25/2024 12:25:44 - INFO - __main__ - Step 12792: {'lr': 0.0004919523205580086, 'samples': 614016, 'steps': 12791, 'loss/train': 1.6523443460464478} +07/25/2024 12:25:45 - INFO - __main__ - Step 12793: {'lr': 0.0004919509965077329, 'samples': 614064, 'steps': 12792, 'loss/train': 0.6680886149406433} +07/25/2024 12:25:45 - INFO - __main__ - Step 12794: {'lr': 0.000491949672350328, 'samples': 614112, 'steps': 12793, 'loss/train': 1.967133641242981} +07/25/2024 12:25:45 - INFO - __main__ - Step 12795: {'lr': 0.0004919483480857944, 'samples': 614160, 'steps': 12794, 'loss/train': 2.1463444232940674} +07/25/2024 12:25:46 - INFO - __main__ - Step 12796: {'lr': 0.0004919470237141332, 'samples': 614208, 'steps': 12795, 'loss/train': 2.0962839126586914} +07/25/2024 12:25:46 - INFO - __main__ - Step 12797: {'lr': 0.0004919456992353444, 'samples': 614256, 'steps': 12796, 'loss/train': 2.0536885261535645} +07/25/2024 12:25:46 - INFO - __main__ - Step 12798: {'lr': 0.0004919443746494289, 'samples': 614304, 'steps': 12797, 'loss/train': 1.5139979124069214} +07/25/2024 12:25:46 - INFO - __main__ - Step 12799: {'lr': 0.0004919430499563871, 'samples': 614352, 'steps': 12798, 'loss/train': 1.6469273567199707} +07/25/2024 12:25:47 - INFO - __main__ - Step 12800: {'lr': 0.0004919417251562199, 'samples': 614400, 'steps': 12799, 'loss/train': 1.7628484964370728} +07/25/2024 12:25:47 - INFO - __main__ - Step 12801: {'lr': 0.0004919404002489275, 'samples': 614448, 'steps': 12800, 'loss/train': 1.037835955619812} +07/25/2024 12:25:47 - INFO - __main__ - Step 12802: {'lr': 0.0004919390752345107, 'samples': 614496, 'steps': 12801, 'loss/train': 2.109598159790039} +07/25/2024 12:25:48 - INFO - __main__ - Step 12803: {'lr': 0.0004919377501129701, 'samples': 614544, 'steps': 12802, 'loss/train': 1.9691002368927002} +07/25/2024 12:25:48 - INFO - __main__ - Step 12804: {'lr': 0.0004919364248843063, 'samples': 614592, 'steps': 12803, 'loss/train': 2.0034568309783936} +07/25/2024 12:25:48 - INFO - __main__ - Step 12805: {'lr': 0.0004919350995485198, 'samples': 614640, 'steps': 12804, 'loss/train': 1.0942401885986328} +07/25/2024 12:25:48 - INFO - __main__ - Step 12806: {'lr': 0.0004919337741056112, 'samples': 614688, 'steps': 12805, 'loss/train': 1.6668789386749268} +07/25/2024 12:25:49 - INFO - __main__ - Step 12807: {'lr': 0.0004919324485555811, 'samples': 614736, 'steps': 12806, 'loss/train': 2.269404172897339} +07/25/2024 12:25:49 - INFO - __main__ - Step 12808: {'lr': 0.0004919311228984301, 'samples': 614784, 'steps': 12807, 'loss/train': 2.0403754711151123} +07/25/2024 12:25:49 - INFO - __main__ - Step 12809: {'lr': 0.0004919297971341587, 'samples': 614832, 'steps': 12808, 'loss/train': 2.0878756046295166} +07/25/2024 12:25:50 - INFO - __main__ - Step 12810: {'lr': 0.0004919284712627676, 'samples': 614880, 'steps': 12809, 'loss/train': 2.0351312160491943} +07/25/2024 12:25:50 - INFO - __main__ - Step 12811: {'lr': 0.0004919271452842575, 'samples': 614928, 'steps': 12810, 'loss/train': 1.9288378953933716} +07/25/2024 12:25:50 - INFO - __main__ - Step 12812: {'lr': 0.0004919258191986287, 'samples': 614976, 'steps': 12811, 'loss/train': 1.5797457695007324} +07/25/2024 12:25:50 - INFO - __main__ - Step 12813: {'lr': 0.0004919244930058819, 'samples': 615024, 'steps': 12812, 'loss/train': 1.6307529211044312} +07/25/2024 12:25:51 - INFO - __main__ - Step 12814: {'lr': 0.0004919231667060177, 'samples': 615072, 'steps': 12813, 'loss/train': 1.928517460823059} +07/25/2024 12:25:51 - INFO - __main__ - Step 12815: {'lr': 0.0004919218402990367, 'samples': 615120, 'steps': 12814, 'loss/train': 0.13970538973808289} +07/25/2024 12:25:51 - INFO - __main__ - Step 12816: {'lr': 0.0004919205137849396, 'samples': 615168, 'steps': 12815, 'loss/train': 1.999046802520752} +07/25/2024 12:25:52 - INFO - __main__ - Step 12817: {'lr': 0.0004919191871637268, 'samples': 615216, 'steps': 12816, 'loss/train': 1.8294025659561157} +07/25/2024 12:25:52 - INFO - __main__ - Step 12818: {'lr': 0.0004919178604353991, 'samples': 615264, 'steps': 12817, 'loss/train': 2.1902315616607666} +07/25/2024 12:25:52 - INFO - __main__ - Step 12819: {'lr': 0.0004919165335999568, 'samples': 615312, 'steps': 12818, 'loss/train': 1.7463406324386597} +07/25/2024 12:25:52 - INFO - __main__ - Step 12820: {'lr': 0.0004919152066574006, 'samples': 615360, 'steps': 12819, 'loss/train': 1.1737529039382935} +07/25/2024 12:25:53 - INFO - __main__ - Step 12821: {'lr': 0.0004919138796077312, 'samples': 615408, 'steps': 12820, 'loss/train': 2.0077991485595703} +07/25/2024 12:25:53 - INFO - __main__ - Step 12822: {'lr': 0.0004919125524509491, 'samples': 615456, 'steps': 12821, 'loss/train': 1.5916433334350586} +07/25/2024 12:25:53 - INFO - __main__ - Step 12823: {'lr': 0.0004919112251870549, 'samples': 615504, 'steps': 12822, 'loss/train': 1.9784855842590332} +07/25/2024 12:25:54 - INFO - __main__ - Step 12824: {'lr': 0.0004919098978160493, 'samples': 615552, 'steps': 12823, 'loss/train': 1.5397063493728638} +07/25/2024 12:25:54 - INFO - __main__ - Step 12825: {'lr': 0.0004919085703379326, 'samples': 615600, 'steps': 12824, 'loss/train': 2.5203890800476074} +07/25/2024 12:25:54 - INFO - __main__ - Step 12826: {'lr': 0.0004919072427527056, 'samples': 615648, 'steps': 12825, 'loss/train': 1.471305012702942} +07/25/2024 12:25:54 - INFO - __main__ - Step 12827: {'lr': 0.0004919059150603689, 'samples': 615696, 'steps': 12826, 'loss/train': 1.2633591890335083} +07/25/2024 12:25:55 - INFO - __main__ - Step 12828: {'lr': 0.000491904587260923, 'samples': 615744, 'steps': 12827, 'loss/train': 1.9079657793045044} +07/25/2024 12:25:55 - INFO - __main__ - Step 12829: {'lr': 0.0004919032593543686, 'samples': 615792, 'steps': 12828, 'loss/train': 1.7406846284866333} +07/25/2024 12:25:55 - INFO - __main__ - Step 12830: {'lr': 0.0004919019313407062, 'samples': 615840, 'steps': 12829, 'loss/train': 1.72600519657135} +07/25/2024 12:25:56 - INFO - __main__ - Step 12831: {'lr': 0.0004919006032199363, 'samples': 615888, 'steps': 12830, 'loss/train': 1.9411826133728027} +07/25/2024 12:25:56 - INFO - __main__ - Step 12832: {'lr': 0.0004918992749920597, 'samples': 615936, 'steps': 12831, 'loss/train': 1.9683326482772827} +07/25/2024 12:25:56 - INFO - __main__ - Step 12833: {'lr': 0.0004918979466570769, 'samples': 615984, 'steps': 12832, 'loss/train': 1.5131162405014038} +07/25/2024 12:25:56 - INFO - __main__ - Step 12834: {'lr': 0.0004918966182149883, 'samples': 616032, 'steps': 12833, 'loss/train': 1.9160027503967285} +07/25/2024 12:25:57 - INFO - __main__ - Step 12835: {'lr': 0.0004918952896657948, 'samples': 616080, 'steps': 12834, 'loss/train': 1.7446959018707275} +07/25/2024 12:25:57 - INFO - __main__ - Step 12836: {'lr': 0.0004918939610094968, 'samples': 616128, 'steps': 12835, 'loss/train': 1.4932368993759155} +07/25/2024 12:25:57 - INFO - __main__ - Step 12837: {'lr': 0.000491892632246095, 'samples': 616176, 'steps': 12836, 'loss/train': 0.7690361142158508} +07/25/2024 12:25:58 - INFO - __main__ - Step 12838: {'lr': 0.0004918913033755898, 'samples': 616224, 'steps': 12837, 'loss/train': 2.554959297180176} +07/25/2024 12:25:58 - INFO - __main__ - Step 12839: {'lr': 0.0004918899743979819, 'samples': 616272, 'steps': 12838, 'loss/train': 0.09184059500694275} +07/25/2024 12:25:58 - INFO - __main__ - Step 12840: {'lr': 0.000491888645313272, 'samples': 616320, 'steps': 12839, 'loss/train': 2.157499074935913} +07/25/2024 12:25:58 - INFO - __main__ - Step 12841: {'lr': 0.0004918873161214605, 'samples': 616368, 'steps': 12840, 'loss/train': 1.9800975322723389} +07/25/2024 12:25:59 - INFO - __main__ - Step 12842: {'lr': 0.0004918859868225481, 'samples': 616416, 'steps': 12841, 'loss/train': 1.5109379291534424} +07/25/2024 12:25:59 - INFO - __main__ - Step 12843: {'lr': 0.0004918846574165354, 'samples': 616464, 'steps': 12842, 'loss/train': 1.6805773973464966} +07/25/2024 12:25:59 - INFO - __main__ - Step 12844: {'lr': 0.000491883327903423, 'samples': 616512, 'steps': 12843, 'loss/train': 1.2672159671783447} +07/25/2024 12:26:00 - INFO - __main__ - Step 12845: {'lr': 0.0004918819982832113, 'samples': 616560, 'steps': 12844, 'loss/train': 2.0211682319641113} +07/25/2024 12:26:00 - INFO - __main__ - Step 12846: {'lr': 0.0004918806685559012, 'samples': 616608, 'steps': 12845, 'loss/train': 1.8544753789901733} +07/25/2024 12:26:00 - INFO - __main__ - Step 12847: {'lr': 0.0004918793387214929, 'samples': 616656, 'steps': 12846, 'loss/train': 1.7830910682678223} +07/25/2024 12:26:00 - INFO - __main__ - Step 12848: {'lr': 0.0004918780087799874, 'samples': 616704, 'steps': 12847, 'loss/train': 1.8954493999481201} +07/25/2024 12:26:01 - INFO - __main__ - Step 12849: {'lr': 0.0004918766787313849, 'samples': 616752, 'steps': 12848, 'loss/train': 2.263763904571533} +07/25/2024 12:26:01 - INFO - __main__ - Step 12850: {'lr': 0.0004918753485756863, 'samples': 616800, 'steps': 12849, 'loss/train': 1.490869164466858} +07/25/2024 12:26:01 - INFO - __main__ - Step 12851: {'lr': 0.0004918740183128922, 'samples': 616848, 'steps': 12850, 'loss/train': 1.8835508823394775} +07/25/2024 12:26:01 - INFO - __main__ - Step 12852: {'lr': 0.000491872687943003, 'samples': 616896, 'steps': 12851, 'loss/train': 1.804721713066101} +07/25/2024 12:26:02 - INFO - __main__ - Step 12853: {'lr': 0.0004918713574660192, 'samples': 616944, 'steps': 12852, 'loss/train': 1.606231927871704} +07/25/2024 12:26:02 - INFO - __main__ - Step 12854: {'lr': 0.0004918700268819417, 'samples': 616992, 'steps': 12853, 'loss/train': 2.4867429733276367} +07/25/2024 12:26:02 - INFO - __main__ - Step 12855: {'lr': 0.0004918686961907708, 'samples': 617040, 'steps': 12854, 'loss/train': 2.0586605072021484} +07/25/2024 12:26:03 - INFO - __main__ - Step 12856: {'lr': 0.0004918673653925073, 'samples': 617088, 'steps': 12855, 'loss/train': 1.9543555974960327} +07/25/2024 12:26:03 - INFO - __main__ - Step 12857: {'lr': 0.0004918660344871517, 'samples': 617136, 'steps': 12856, 'loss/train': 1.8227686882019043} +07/25/2024 12:26:03 - INFO - __main__ - Step 12858: {'lr': 0.0004918647034747045, 'samples': 617184, 'steps': 12857, 'loss/train': 1.4856613874435425} +07/25/2024 12:26:03 - INFO - __main__ - Step 12859: {'lr': 0.0004918633723551665, 'samples': 617232, 'steps': 12858, 'loss/train': 1.9064275026321411} +07/25/2024 12:26:04 - INFO - __main__ - Step 12860: {'lr': 0.0004918620411285382, 'samples': 617280, 'steps': 12859, 'loss/train': 1.512131929397583} +07/25/2024 12:26:04 - INFO - __main__ - Step 12861: {'lr': 0.0004918607097948201, 'samples': 617328, 'steps': 12860, 'loss/train': 2.0308239459991455} +07/25/2024 12:26:04 - INFO - __main__ - Step 12862: {'lr': 0.0004918593783540128, 'samples': 617376, 'steps': 12861, 'loss/train': 2.082597255706787} +07/25/2024 12:26:05 - INFO - __main__ - Step 12863: {'lr': 0.000491858046806117, 'samples': 617424, 'steps': 12862, 'loss/train': 0.1287066638469696} +07/25/2024 12:26:05 - INFO - __main__ - Step 12864: {'lr': 0.0004918567151511332, 'samples': 617472, 'steps': 12863, 'loss/train': 1.8004484176635742} +07/25/2024 12:26:05 - INFO - __main__ - Step 12865: {'lr': 0.0004918553833890622, 'samples': 617520, 'steps': 12864, 'loss/train': 1.6947530508041382} +07/25/2024 12:26:05 - INFO - __main__ - Step 12866: {'lr': 0.0004918540515199042, 'samples': 617568, 'steps': 12865, 'loss/train': 1.777125358581543} +07/25/2024 12:26:06 - INFO - __main__ - Step 12867: {'lr': 0.0004918527195436601, 'samples': 617616, 'steps': 12866, 'loss/train': 2.2637758255004883} +07/25/2024 12:26:06 - INFO - __main__ - Step 12868: {'lr': 0.0004918513874603304, 'samples': 617664, 'steps': 12867, 'loss/train': 2.0282981395721436} +07/25/2024 12:26:06 - INFO - __main__ - Step 12869: {'lr': 0.0004918500552699157, 'samples': 617712, 'steps': 12868, 'loss/train': 1.9294459819793701} +07/25/2024 12:26:07 - INFO - __main__ - Step 12870: {'lr': 0.0004918487229724164, 'samples': 617760, 'steps': 12869, 'loss/train': 1.8519295454025269} +07/25/2024 12:26:07 - INFO - __main__ - Step 12871: {'lr': 0.0004918473905678334, 'samples': 617808, 'steps': 12870, 'loss/train': 1.4960699081420898} +07/25/2024 12:26:07 - INFO - __main__ - Step 12872: {'lr': 0.0004918460580561672, 'samples': 617856, 'steps': 12871, 'loss/train': 2.2397782802581787} +07/25/2024 12:26:07 - INFO - __main__ - Step 12873: {'lr': 0.0004918447254374183, 'samples': 617904, 'steps': 12872, 'loss/train': 2.2096807956695557} +07/25/2024 12:26:08 - INFO - __main__ - Step 12874: {'lr': 0.0004918433927115873, 'samples': 617952, 'steps': 12873, 'loss/train': 1.8218525648117065} +07/25/2024 12:26:08 - INFO - __main__ - Step 12875: {'lr': 0.0004918420598786748, 'samples': 618000, 'steps': 12874, 'loss/train': 2.165004014968872} +07/25/2024 12:26:08 - INFO - __main__ - Step 12876: {'lr': 0.0004918407269386815, 'samples': 618048, 'steps': 12875, 'loss/train': 2.3732364177703857} +07/25/2024 12:26:09 - INFO - __main__ - Step 12877: {'lr': 0.0004918393938916078, 'samples': 618096, 'steps': 12876, 'loss/train': 1.8750884532928467} +07/25/2024 12:26:09 - INFO - __main__ - Step 12878: {'lr': 0.0004918380607374545, 'samples': 618144, 'steps': 12877, 'loss/train': 1.7285032272338867} +07/25/2024 12:26:09 - INFO - __main__ - Step 12879: {'lr': 0.000491836727476222, 'samples': 618192, 'steps': 12878, 'loss/train': 1.6548823118209839} +07/25/2024 12:26:09 - INFO - __main__ - Step 12880: {'lr': 0.0004918353941079111, 'samples': 618240, 'steps': 12879, 'loss/train': 1.6876155138015747} +07/25/2024 12:26:10 - INFO - __main__ - Step 12881: {'lr': 0.000491834060632522, 'samples': 618288, 'steps': 12880, 'loss/train': 1.9274694919586182} +07/25/2024 12:26:10 - INFO - __main__ - Step 12882: {'lr': 0.0004918327270500558, 'samples': 618336, 'steps': 12881, 'loss/train': 1.8029104471206665} +07/25/2024 12:26:10 - INFO - __main__ - Step 12883: {'lr': 0.0004918313933605128, 'samples': 618384, 'steps': 12882, 'loss/train': 1.8092185258865356} +07/25/2024 12:26:11 - INFO - __main__ - Step 12884: {'lr': 0.0004918300595638935, 'samples': 618432, 'steps': 12883, 'loss/train': 1.495406150817871} +07/25/2024 12:26:11 - INFO - __main__ - Step 12885: {'lr': 0.0004918287256601987, 'samples': 618480, 'steps': 12884, 'loss/train': 2.5534958839416504} +07/25/2024 12:26:11 - INFO - __main__ - Step 12886: {'lr': 0.000491827391649429, 'samples': 618528, 'steps': 12885, 'loss/train': 2.0990898609161377} +07/25/2024 12:26:11 - INFO - __main__ - Step 12887: {'lr': 0.0004918260575315849, 'samples': 618576, 'steps': 12886, 'loss/train': 0.7817641496658325} +07/25/2024 12:26:12 - INFO - __main__ - Step 12888: {'lr': 0.0004918247233066669, 'samples': 618624, 'steps': 12887, 'loss/train': 1.9574766159057617} +07/25/2024 12:26:12 - INFO - __main__ - Step 12889: {'lr': 0.0004918233889746757, 'samples': 618672, 'steps': 12888, 'loss/train': 1.6551783084869385} +07/25/2024 12:26:12 - INFO - __main__ - Step 12890: {'lr': 0.000491822054535612, 'samples': 618720, 'steps': 12889, 'loss/train': 1.6435301303863525} +07/25/2024 12:26:13 - INFO - __main__ - Step 12891: {'lr': 0.0004918207199894762, 'samples': 618768, 'steps': 12890, 'loss/train': 1.6051498651504517} +07/25/2024 12:26:13 - INFO - __main__ - Step 12892: {'lr': 0.0004918193853362689, 'samples': 618816, 'steps': 12891, 'loss/train': 1.3987493515014648} +07/25/2024 12:26:13 - INFO - __main__ - Step 12893: {'lr': 0.0004918180505759908, 'samples': 618864, 'steps': 12892, 'loss/train': 2.014813184738159} +07/25/2024 12:26:13 - INFO - __main__ - Step 12894: {'lr': 0.0004918167157086424, 'samples': 618912, 'steps': 12893, 'loss/train': 1.6038357019424438} +07/25/2024 12:26:14 - INFO - __main__ - Step 12895: {'lr': 0.0004918153807342244, 'samples': 618960, 'steps': 12894, 'loss/train': 0.6801826357841492} +07/25/2024 12:26:14 - INFO - __main__ - Step 12896: {'lr': 0.0004918140456527373, 'samples': 619008, 'steps': 12895, 'loss/train': 1.7607702016830444} +07/25/2024 12:26:14 - INFO - __main__ - Step 12897: {'lr': 0.0004918127104641817, 'samples': 619056, 'steps': 12896, 'loss/train': 1.6978881359100342} +07/25/2024 12:26:15 - INFO - __main__ - Step 12898: {'lr': 0.0004918113751685582, 'samples': 619104, 'steps': 12897, 'loss/train': 2.5444462299346924} +07/25/2024 12:26:15 - INFO - __main__ - Step 12899: {'lr': 0.0004918100397658674, 'samples': 619152, 'steps': 12898, 'loss/train': 1.777369499206543} +07/25/2024 12:26:15 - INFO - __main__ - Step 12900: {'lr': 0.0004918087042561099, 'samples': 619200, 'steps': 12899, 'loss/train': 1.6192418336868286} +07/25/2024 12:26:15 - INFO - __main__ - Step 12901: {'lr': 0.0004918073686392863, 'samples': 619248, 'steps': 12900, 'loss/train': 1.5211604833602905} +07/25/2024 12:26:16 - INFO - __main__ - Step 12902: {'lr': 0.0004918060329153972, 'samples': 619296, 'steps': 12901, 'loss/train': 2.2010531425476074} +07/25/2024 12:26:16 - INFO - __main__ - Step 12903: {'lr': 0.0004918046970844431, 'samples': 619344, 'steps': 12902, 'loss/train': 1.9546869993209839} +07/25/2024 12:26:16 - INFO - __main__ - Step 12904: {'lr': 0.0004918033611464247, 'samples': 619392, 'steps': 12903, 'loss/train': 1.6081892251968384} +07/25/2024 12:26:17 - INFO - __main__ - Step 12905: {'lr': 0.0004918020251013425, 'samples': 619440, 'steps': 12904, 'loss/train': 0.7611247301101685} +07/25/2024 12:26:17 - INFO - __main__ - Step 12906: {'lr': 0.0004918006889491972, 'samples': 619488, 'steps': 12905, 'loss/train': 2.122887372970581} +07/25/2024 12:26:17 - INFO - __main__ - Step 12907: {'lr': 0.0004917993526899893, 'samples': 619536, 'steps': 12906, 'loss/train': 2.179144859313965} +07/25/2024 12:26:17 - INFO - __main__ - Step 12908: {'lr': 0.0004917980163237194, 'samples': 619584, 'steps': 12907, 'loss/train': 1.6572496891021729} +07/25/2024 12:26:18 - INFO - __main__ - Step 12909: {'lr': 0.0004917966798503881, 'samples': 619632, 'steps': 12908, 'loss/train': 2.1305649280548096} +07/25/2024 12:26:18 - INFO - __main__ - Step 12910: {'lr': 0.000491795343269996, 'samples': 619680, 'steps': 12909, 'loss/train': 1.5427192449569702} +07/25/2024 12:26:18 - INFO - __main__ - Step 12911: {'lr': 0.0004917940065825438, 'samples': 619728, 'steps': 12910, 'loss/train': 2.0186519622802734} +07/25/2024 12:26:19 - INFO - __main__ - Step 12912: {'lr': 0.0004917926697880319, 'samples': 619776, 'steps': 12911, 'loss/train': 1.7714811563491821} +07/25/2024 12:26:19 - INFO - __main__ - Step 12913: {'lr': 0.000491791332886461, 'samples': 619824, 'steps': 12912, 'loss/train': 2.342128038406372} +07/25/2024 12:26:19 - INFO - __main__ - Step 12914: {'lr': 0.0004917899958778317, 'samples': 619872, 'steps': 12913, 'loss/train': 1.4177167415618896} +07/25/2024 12:26:19 - INFO - __main__ - Step 12915: {'lr': 0.0004917886587621445, 'samples': 619920, 'steps': 12914, 'loss/train': 1.7484021186828613} +07/25/2024 12:26:20 - INFO - __main__ - Step 12916: {'lr': 0.0004917873215394002, 'samples': 619968, 'steps': 12915, 'loss/train': 2.174130439758301} +07/25/2024 12:26:20 - INFO - __main__ - Step 12917: {'lr': 0.000491785984209599, 'samples': 620016, 'steps': 12916, 'loss/train': 1.901808500289917} +07/25/2024 12:26:20 - INFO - __main__ - Step 12918: {'lr': 0.000491784646772742, 'samples': 620064, 'steps': 12917, 'loss/train': 2.121103525161743} +07/25/2024 12:26:21 - INFO - __main__ - Step 12919: {'lr': 0.0004917833092288294, 'samples': 620112, 'steps': 12918, 'loss/train': 2.0157084465026855} +07/25/2024 12:26:21 - INFO - __main__ - Step 12920: {'lr': 0.000491781971577862, 'samples': 620160, 'steps': 12919, 'loss/train': 1.985945463180542} +07/25/2024 12:26:21 - INFO - __main__ - Step 12921: {'lr': 0.0004917806338198402, 'samples': 620208, 'steps': 12920, 'loss/train': 1.6351102590560913} +07/25/2024 12:26:21 - INFO - __main__ - Step 12922: {'lr': 0.0004917792959547647, 'samples': 620256, 'steps': 12921, 'loss/train': 1.6806893348693848} +07/25/2024 12:26:22 - INFO - __main__ - Step 12923: {'lr': 0.0004917779579826362, 'samples': 620304, 'steps': 12922, 'loss/train': 1.9375072717666626} +07/25/2024 12:26:22 - INFO - __main__ - Step 12924: {'lr': 0.0004917766199034553, 'samples': 620352, 'steps': 12923, 'loss/train': 2.224043369293213} +07/25/2024 12:26:22 - INFO - __main__ - Step 12925: {'lr': 0.0004917752817172223, 'samples': 620400, 'steps': 12924, 'loss/train': 1.5876282453536987} +07/25/2024 12:26:23 - INFO - __main__ - Step 12926: {'lr': 0.000491773943423938, 'samples': 620448, 'steps': 12925, 'loss/train': 1.812713384628296} +07/25/2024 12:26:23 - INFO - __main__ - Step 12927: {'lr': 0.000491772605023603, 'samples': 620496, 'steps': 12926, 'loss/train': 2.3593976497650146} +07/25/2024 12:26:23 - INFO - __main__ - Step 12928: {'lr': 0.0004917712665162178, 'samples': 620544, 'steps': 12927, 'loss/train': 2.293602228164673} +07/25/2024 12:26:23 - INFO - __main__ - Step 12929: {'lr': 0.0004917699279017832, 'samples': 620592, 'steps': 12928, 'loss/train': 2.2111079692840576} +07/25/2024 12:26:24 - INFO - __main__ - Step 12930: {'lr': 0.0004917685891802997, 'samples': 620640, 'steps': 12929, 'loss/train': 1.8131502866744995} +07/25/2024 12:26:24 - INFO - __main__ - Step 12931: {'lr': 0.0004917672503517676, 'samples': 620688, 'steps': 12930, 'loss/train': 2.072619676589966} +07/25/2024 12:26:24 - INFO - __main__ - Step 12932: {'lr': 0.0004917659114161879, 'samples': 620736, 'steps': 12931, 'loss/train': 1.7977715730667114} +07/25/2024 12:26:25 - INFO - __main__ - Step 12933: {'lr': 0.000491764572373561, 'samples': 620784, 'steps': 12932, 'loss/train': 1.652439832687378} +07/25/2024 12:26:25 - INFO - __main__ - Step 12934: {'lr': 0.0004917632332238875, 'samples': 620832, 'steps': 12933, 'loss/train': 1.715948224067688} +07/25/2024 12:26:25 - INFO - __main__ - Step 12935: {'lr': 0.000491761893967168, 'samples': 620880, 'steps': 12934, 'loss/train': 1.9444018602371216} +07/25/2024 12:26:25 - INFO - __main__ - Step 12936: {'lr': 0.0004917605546034032, 'samples': 620928, 'steps': 12935, 'loss/train': 2.4732112884521484} +07/25/2024 12:26:26 - INFO - __main__ - Step 12937: {'lr': 0.0004917592151325935, 'samples': 620976, 'steps': 12936, 'loss/train': 2.506218910217285} +07/25/2024 12:26:26 - INFO - __main__ - Step 12938: {'lr': 0.0004917578755547396, 'samples': 621024, 'steps': 12937, 'loss/train': 1.956485390663147} +07/25/2024 12:26:26 - INFO - __main__ - Step 12939: {'lr': 0.0004917565358698422, 'samples': 621072, 'steps': 12938, 'loss/train': 2.015380382537842} +07/25/2024 12:26:26 - INFO - __main__ - Step 12940: {'lr': 0.0004917551960779016, 'samples': 621120, 'steps': 12939, 'loss/train': 1.497867226600647} +07/25/2024 12:26:27 - INFO - __main__ - Step 12941: {'lr': 0.0004917538561789187, 'samples': 621168, 'steps': 12940, 'loss/train': 1.438028335571289} +07/25/2024 12:26:27 - INFO - __main__ - Step 12942: {'lr': 0.000491752516172894, 'samples': 621216, 'steps': 12941, 'loss/train': 1.9037672281265259} +07/25/2024 12:26:27 - INFO - __main__ - Step 12943: {'lr': 0.0004917511760598279, 'samples': 621264, 'steps': 12942, 'loss/train': 2.140695571899414} +07/25/2024 12:26:28 - INFO - __main__ - Step 12944: {'lr': 0.0004917498358397213, 'samples': 621312, 'steps': 12943, 'loss/train': 1.510437250137329} +07/25/2024 12:26:28 - INFO - __main__ - Step 12945: {'lr': 0.0004917484955125745, 'samples': 621360, 'steps': 12944, 'loss/train': 2.1444947719573975} +07/25/2024 12:26:28 - INFO - __main__ - Step 12946: {'lr': 0.0004917471550783885, 'samples': 621408, 'steps': 12945, 'loss/train': 1.4816588163375854} +07/25/2024 12:26:28 - INFO - __main__ - Step 12947: {'lr': 0.0004917458145371633, 'samples': 621456, 'steps': 12946, 'loss/train': 2.222588300704956} +07/25/2024 12:26:29 - INFO - __main__ - Step 12948: {'lr': 0.0004917444738889001, 'samples': 621504, 'steps': 12947, 'loss/train': 2.238607406616211} +07/25/2024 12:26:29 - INFO - __main__ - Step 12949: {'lr': 0.0004917431331335991, 'samples': 621552, 'steps': 12948, 'loss/train': 1.9689617156982422} +07/25/2024 12:26:29 - INFO - __main__ - Step 12950: {'lr': 0.000491741792271261, 'samples': 621600, 'steps': 12949, 'loss/train': 1.8528704643249512} +07/25/2024 12:26:30 - INFO - __main__ - Step 12951: {'lr': 0.0004917404513018864, 'samples': 621648, 'steps': 12950, 'loss/train': 1.8567558526992798} +07/25/2024 12:26:30 - INFO - __main__ - Step 12952: {'lr': 0.0004917391102254759, 'samples': 621696, 'steps': 12951, 'loss/train': 3.4192185401916504} +07/25/2024 12:26:30 - INFO - __main__ - Step 12953: {'lr': 0.0004917377690420302, 'samples': 621744, 'steps': 12952, 'loss/train': 2.49405837059021} +07/25/2024 12:26:30 - INFO - __main__ - Step 12954: {'lr': 0.0004917364277515497, 'samples': 621792, 'steps': 12953, 'loss/train': 1.8040130138397217} +07/25/2024 12:26:31 - INFO - __main__ - Step 12955: {'lr': 0.0004917350863540351, 'samples': 621840, 'steps': 12954, 'loss/train': 1.849083423614502} +07/25/2024 12:26:31 - INFO - __main__ - Step 12956: {'lr': 0.0004917337448494869, 'samples': 621888, 'steps': 12955, 'loss/train': 1.4873931407928467} +07/25/2024 12:26:31 - INFO - __main__ - Step 12957: {'lr': 0.000491732403237906, 'samples': 621936, 'steps': 12956, 'loss/train': 1.3694472312927246} +07/25/2024 12:26:32 - INFO - __main__ - Step 12958: {'lr': 0.0004917310615192926, 'samples': 621984, 'steps': 12957, 'loss/train': 1.969241738319397} +07/25/2024 12:26:32 - INFO - __main__ - Step 12959: {'lr': 0.0004917297196936474, 'samples': 622032, 'steps': 12958, 'loss/train': 2.05965518951416} +07/25/2024 12:26:32 - INFO - __main__ - Step 12960: {'lr': 0.0004917283777609711, 'samples': 622080, 'steps': 12959, 'loss/train': 2.859865665435791} +07/25/2024 12:26:32 - INFO - __main__ - Step 12961: {'lr': 0.0004917270357212643, 'samples': 622128, 'steps': 12960, 'loss/train': 1.9619519710540771} +07/25/2024 12:26:33 - INFO - __main__ - Step 12962: {'lr': 0.0004917256935745276, 'samples': 622176, 'steps': 12961, 'loss/train': 1.954538106918335} +07/25/2024 12:26:33 - INFO - __main__ - Step 12963: {'lr': 0.0004917243513207615, 'samples': 622224, 'steps': 12962, 'loss/train': 2.0172674655914307} +07/25/2024 12:26:33 - INFO - __main__ - Step 12964: {'lr': 0.0004917230089599666, 'samples': 622272, 'steps': 12963, 'loss/train': 1.692172646522522} +07/25/2024 12:26:34 - INFO - __main__ - Step 12965: {'lr': 0.0004917216664921435, 'samples': 622320, 'steps': 12964, 'loss/train': 1.7606569528579712} +07/25/2024 12:26:34 - INFO - __main__ - Step 12966: {'lr': 0.0004917203239172928, 'samples': 622368, 'steps': 12965, 'loss/train': 1.428939700126648} +07/25/2024 12:26:34 - INFO - __main__ - Step 12967: {'lr': 0.0004917189812354151, 'samples': 622416, 'steps': 12966, 'loss/train': 2.0171258449554443} +07/25/2024 12:26:34 - INFO - __main__ - Step 12968: {'lr': 0.0004917176384465111, 'samples': 622464, 'steps': 12967, 'loss/train': 1.4944982528686523} +07/25/2024 12:26:35 - INFO - __main__ - Step 12969: {'lr': 0.0004917162955505812, 'samples': 622512, 'steps': 12968, 'loss/train': 2.2863008975982666} +07/25/2024 12:26:35 - INFO - __main__ - Step 12970: {'lr': 0.0004917149525476261, 'samples': 622560, 'steps': 12969, 'loss/train': 1.946010947227478} +07/25/2024 12:26:35 - INFO - __main__ - Step 12971: {'lr': 0.0004917136094376466, 'samples': 622608, 'steps': 12970, 'loss/train': 1.3300466537475586} +07/25/2024 12:26:36 - INFO - __main__ - Step 12972: {'lr': 0.0004917122662206428, 'samples': 622656, 'steps': 12971, 'loss/train': 1.6483758687973022} +07/25/2024 12:26:36 - INFO - __main__ - Step 12973: {'lr': 0.0004917109228966157, 'samples': 622704, 'steps': 12972, 'loss/train': 2.076824426651001} +07/25/2024 12:26:36 - INFO - __main__ - Step 12974: {'lr': 0.0004917095794655658, 'samples': 622752, 'steps': 12973, 'loss/train': 1.5117664337158203} +07/25/2024 12:26:36 - INFO - __main__ - Step 12975: {'lr': 0.0004917082359274937, 'samples': 622800, 'steps': 12974, 'loss/train': 1.7574259042739868} +07/25/2024 12:26:37 - INFO - __main__ - Step 12976: {'lr': 0.0004917068922823999, 'samples': 622848, 'steps': 12975, 'loss/train': 4.321401119232178} +07/25/2024 12:26:37 - INFO - __main__ - Step 12977: {'lr': 0.000491705548530285, 'samples': 622896, 'steps': 12976, 'loss/train': 4.2264323234558105} +07/25/2024 12:26:37 - INFO - __main__ - Step 12978: {'lr': 0.0004917042046711499, 'samples': 622944, 'steps': 12977, 'loss/train': 1.9641501903533936} +07/25/2024 12:26:38 - INFO - __main__ - Step 12979: {'lr': 0.0004917028607049947, 'samples': 622992, 'steps': 12978, 'loss/train': 2.0705556869506836} +07/25/2024 12:26:38 - INFO - __main__ - Step 12980: {'lr': 0.0004917015166318204, 'samples': 623040, 'steps': 12979, 'loss/train': 1.8065369129180908} +07/25/2024 12:26:38 - INFO - __main__ - Step 12981: {'lr': 0.0004917001724516274, 'samples': 623088, 'steps': 12980, 'loss/train': 2.1073060035705566} +07/25/2024 12:26:38 - INFO - __main__ - Step 12982: {'lr': 0.0004916988281644162, 'samples': 623136, 'steps': 12981, 'loss/train': 1.909507393836975} +07/25/2024 12:26:39 - INFO - __main__ - Step 12983: {'lr': 0.0004916974837701877, 'samples': 623184, 'steps': 12982, 'loss/train': 1.3889132738113403} +07/25/2024 12:26:39 - INFO - __main__ - Step 12984: {'lr': 0.0004916961392689422, 'samples': 623232, 'steps': 12983, 'loss/train': 1.873221755027771} +07/25/2024 12:26:39 - INFO - __main__ - Step 12985: {'lr': 0.0004916947946606806, 'samples': 623280, 'steps': 12984, 'loss/train': 1.9712978601455688} +07/25/2024 12:26:40 - INFO - __main__ - Step 12986: {'lr': 0.0004916934499454032, 'samples': 623328, 'steps': 12985, 'loss/train': 2.0240416526794434} +07/25/2024 12:26:40 - INFO - __main__ - Step 12987: {'lr': 0.0004916921051231107, 'samples': 623376, 'steps': 12986, 'loss/train': 1.8642754554748535} +07/25/2024 12:26:40 - INFO - __main__ - Step 12988: {'lr': 0.0004916907601938037, 'samples': 623424, 'steps': 12987, 'loss/train': 1.902543544769287} +07/25/2024 12:26:40 - INFO - __main__ - Step 12989: {'lr': 0.0004916894151574828, 'samples': 623472, 'steps': 12988, 'loss/train': 2.0373618602752686} +07/25/2024 12:26:41 - INFO - __main__ - Step 12990: {'lr': 0.0004916880700141487, 'samples': 623520, 'steps': 12989, 'loss/train': 2.1651549339294434} +07/25/2024 12:26:41 - INFO - __main__ - Step 12991: {'lr': 0.0004916867247638018, 'samples': 623568, 'steps': 12990, 'loss/train': 2.0660572052001953} +07/25/2024 12:26:41 - INFO - __main__ - Step 12992: {'lr': 0.0004916853794064428, 'samples': 623616, 'steps': 12991, 'loss/train': 2.1641018390655518} +07/25/2024 12:26:42 - INFO - __main__ - Step 12993: {'lr': 0.0004916840339420723, 'samples': 623664, 'steps': 12992, 'loss/train': 2.43869686126709} +07/25/2024 12:26:42 - INFO - __main__ - Step 12994: {'lr': 0.0004916826883706909, 'samples': 623712, 'steps': 12993, 'loss/train': 2.1517739295959473} +07/25/2024 12:26:42 - INFO - __main__ - Step 12995: {'lr': 0.0004916813426922991, 'samples': 623760, 'steps': 12994, 'loss/train': 2.801525831222534} +07/25/2024 12:26:42 - INFO - __main__ - Step 12996: {'lr': 0.0004916799969068977, 'samples': 623808, 'steps': 12995, 'loss/train': 1.9780690670013428} +07/25/2024 12:26:43 - INFO - __main__ - Step 12997: {'lr': 0.0004916786510144871, 'samples': 623856, 'steps': 12996, 'loss/train': 2.0013372898101807} +07/25/2024 12:26:43 - INFO - __main__ - Step 12998: {'lr': 0.0004916773050150679, 'samples': 623904, 'steps': 12997, 'loss/train': 1.7330721616744995} +07/25/2024 12:26:43 - INFO - __main__ - Step 12999: {'lr': 0.0004916759589086409, 'samples': 623952, 'steps': 12998, 'loss/train': 1.3983988761901855} +07/25/2024 12:26:44 - INFO - __main__ - Step 13000: {'lr': 0.0004916746126952065, 'samples': 624000, 'steps': 12999, 'loss/train': 3.16433048248291} +07/25/2024 12:26:44 - INFO - __main__ - Step 13001: {'lr': 0.0004916732663747653, 'samples': 624048, 'steps': 13000, 'loss/train': 3.9096438884735107} +07/25/2024 12:26:44 - INFO - __main__ - Step 13002: {'lr': 0.0004916719199473179, 'samples': 624096, 'steps': 13001, 'loss/train': 2.1646196842193604} +07/25/2024 12:26:44 - INFO - __main__ - Step 13003: {'lr': 0.0004916705734128652, 'samples': 624144, 'steps': 13002, 'loss/train': 2.0876076221466064} +07/25/2024 12:26:45 - INFO - __main__ - Step 13004: {'lr': 0.0004916692267714073, 'samples': 624192, 'steps': 13003, 'loss/train': 2.183945655822754} +07/25/2024 12:26:45 - INFO - __main__ - Step 13005: {'lr': 0.0004916678800229451, 'samples': 624240, 'steps': 13004, 'loss/train': 2.3596529960632324} +07/25/2024 12:26:45 - INFO - __main__ - Step 13006: {'lr': 0.0004916665331674792, 'samples': 624288, 'steps': 13005, 'loss/train': 1.645460605621338} +07/25/2024 12:26:46 - INFO - __main__ - Step 13007: {'lr': 0.00049166518620501, 'samples': 624336, 'steps': 13006, 'loss/train': 2.066307544708252} +07/25/2024 12:26:46 - INFO - __main__ - Step 13008: {'lr': 0.0004916638391355383, 'samples': 624384, 'steps': 13007, 'loss/train': 2.1724908351898193} +07/25/2024 12:26:46 - INFO - __main__ - Step 13009: {'lr': 0.0004916624919590647, 'samples': 624432, 'steps': 13008, 'loss/train': 0.9570977687835693} +07/25/2024 12:26:46 - INFO - __main__ - Step 13010: {'lr': 0.0004916611446755897, 'samples': 624480, 'steps': 13009, 'loss/train': 1.7684261798858643} +07/25/2024 12:26:47 - INFO - __main__ - Step 13011: {'lr': 0.0004916597972851138, 'samples': 624528, 'steps': 13010, 'loss/train': 1.9220865964889526} +07/25/2024 12:26:47 - INFO - __main__ - Step 13012: {'lr': 0.0004916584497876378, 'samples': 624576, 'steps': 13011, 'loss/train': 2.221541404724121} +07/25/2024 12:26:47 - INFO - __main__ - Step 13013: {'lr': 0.0004916571021831622, 'samples': 624624, 'steps': 13012, 'loss/train': 1.6514219045639038} +07/25/2024 12:26:47 - INFO - __main__ - Step 13014: {'lr': 0.0004916557544716875, 'samples': 624672, 'steps': 13013, 'loss/train': 3.5320284366607666} +07/25/2024 12:26:48 - INFO - __main__ - Step 13015: {'lr': 0.0004916544066532146, 'samples': 624720, 'steps': 13014, 'loss/train': 1.9480972290039062} +07/25/2024 12:26:48 - INFO - __main__ - Step 13016: {'lr': 0.0004916530587277438, 'samples': 624768, 'steps': 13015, 'loss/train': 1.803297758102417} +07/25/2024 12:26:48 - INFO - __main__ - Step 13017: {'lr': 0.0004916517106952758, 'samples': 624816, 'steps': 13016, 'loss/train': 2.0431010723114014} +07/25/2024 12:26:49 - INFO - __main__ - Step 13018: {'lr': 0.0004916503625558112, 'samples': 624864, 'steps': 13017, 'loss/train': 1.9326595067977905} +07/25/2024 12:26:49 - INFO - __main__ - Step 13019: {'lr': 0.0004916490143093505, 'samples': 624912, 'steps': 13018, 'loss/train': 2.523219347000122} +07/25/2024 12:26:49 - INFO - __main__ - Step 13020: {'lr': 0.0004916476659558945, 'samples': 624960, 'steps': 13019, 'loss/train': 2.126394748687744} +07/25/2024 12:26:49 - INFO - __main__ - Step 13021: {'lr': 0.0004916463174954437, 'samples': 625008, 'steps': 13020, 'loss/train': 2.162855863571167} +07/25/2024 12:26:50 - INFO - __main__ - Step 13022: {'lr': 0.0004916449689279986, 'samples': 625056, 'steps': 13021, 'loss/train': 1.8296992778778076} +07/25/2024 12:26:50 - INFO - __main__ - Step 13023: {'lr': 0.0004916436202535599, 'samples': 625104, 'steps': 13022, 'loss/train': 2.1864216327667236} +07/25/2024 12:26:50 - INFO - __main__ - Step 13024: {'lr': 0.0004916422714721281, 'samples': 625152, 'steps': 13023, 'loss/train': 1.957566261291504} +07/25/2024 12:26:51 - INFO - __main__ - Step 13025: {'lr': 0.000491640922583704, 'samples': 625200, 'steps': 13024, 'loss/train': 2.0085387229919434} +07/25/2024 12:26:51 - INFO - __main__ - Step 13026: {'lr': 0.0004916395735882881, 'samples': 625248, 'steps': 13025, 'loss/train': 2.271883249282837} +07/25/2024 12:26:51 - INFO - __main__ - Step 13027: {'lr': 0.0004916382244858808, 'samples': 625296, 'steps': 13026, 'loss/train': 2.1830127239227295} +07/25/2024 12:26:51 - INFO - __main__ - Step 13028: {'lr': 0.0004916368752764831, 'samples': 625344, 'steps': 13027, 'loss/train': 2.042067050933838} +07/25/2024 12:26:52 - INFO - __main__ - Step 13029: {'lr': 0.0004916355259600951, 'samples': 625392, 'steps': 13028, 'loss/train': 1.6727440357208252} +07/25/2024 12:26:52 - INFO - __main__ - Step 13030: {'lr': 0.0004916341765367179, 'samples': 625440, 'steps': 13029, 'loss/train': 1.9613701105117798} +07/25/2024 12:26:52 - INFO - __main__ - Step 13031: {'lr': 0.0004916328270063516, 'samples': 625488, 'steps': 13030, 'loss/train': 1.8286449909210205} +07/25/2024 12:26:53 - INFO - __main__ - Step 13032: {'lr': 0.0004916314773689973, 'samples': 625536, 'steps': 13031, 'loss/train': 2.1062374114990234} +07/25/2024 12:26:53 - INFO - __main__ - Step 13033: {'lr': 0.0004916301276246552, 'samples': 625584, 'steps': 13032, 'loss/train': 2.167872905731201} +07/25/2024 12:26:53 - INFO - __main__ - Step 13034: {'lr': 0.000491628777773326, 'samples': 625632, 'steps': 13033, 'loss/train': 1.5012239217758179} +07/25/2024 12:26:53 - INFO - __main__ - Step 13035: {'lr': 0.0004916274278150105, 'samples': 625680, 'steps': 13034, 'loss/train': 1.5513801574707031} +07/25/2024 12:26:54 - INFO - __main__ - Step 13036: {'lr': 0.0004916260777497091, 'samples': 625728, 'steps': 13035, 'loss/train': 1.8804503679275513} +07/25/2024 12:26:54 - INFO - __main__ - Step 13037: {'lr': 0.0004916247275774224, 'samples': 625776, 'steps': 13036, 'loss/train': 1.117010235786438} +07/25/2024 12:26:54 - INFO - __main__ - Step 13038: {'lr': 0.0004916233772981511, 'samples': 625824, 'steps': 13037, 'loss/train': 3.1226255893707275} +07/25/2024 12:26:55 - INFO - __main__ - Step 13039: {'lr': 0.0004916220269118957, 'samples': 625872, 'steps': 13038, 'loss/train': 2.096527338027954} +07/25/2024 12:26:55 - INFO - __main__ - Step 13040: {'lr': 0.0004916206764186569, 'samples': 625920, 'steps': 13039, 'loss/train': 2.2616732120513916} +07/25/2024 12:26:55 - INFO - __main__ - Step 13041: {'lr': 0.0004916193258184352, 'samples': 625968, 'steps': 13040, 'loss/train': 2.3405022621154785} +07/25/2024 12:26:55 - INFO - __main__ - Step 13042: {'lr': 0.0004916179751112312, 'samples': 626016, 'steps': 13041, 'loss/train': 1.6794683933258057} +07/25/2024 12:26:56 - INFO - __main__ - Step 13043: {'lr': 0.0004916166242970455, 'samples': 626064, 'steps': 13042, 'loss/train': 1.8452600240707397} +07/25/2024 12:26:56 - INFO - __main__ - Step 13044: {'lr': 0.0004916152733758788, 'samples': 626112, 'steps': 13043, 'loss/train': 1.701242208480835} +07/25/2024 12:26:56 - INFO - __main__ - Step 13045: {'lr': 0.0004916139223477316, 'samples': 626160, 'steps': 13044, 'loss/train': 1.8169469833374023} +07/25/2024 12:26:57 - INFO - __main__ - Step 13046: {'lr': 0.0004916125712126044, 'samples': 626208, 'steps': 13045, 'loss/train': 2.677170515060425} +07/25/2024 12:26:57 - INFO - __main__ - Step 13047: {'lr': 0.0004916112199704982, 'samples': 626256, 'steps': 13046, 'loss/train': 1.9937937259674072} +07/25/2024 12:26:57 - INFO - __main__ - Step 13048: {'lr': 0.0004916098686214131, 'samples': 626304, 'steps': 13047, 'loss/train': 2.3002376556396484} +07/25/2024 12:26:57 - INFO - __main__ - Step 13049: {'lr': 0.0004916085171653501, 'samples': 626352, 'steps': 13048, 'loss/train': 2.1929798126220703} +07/25/2024 12:26:58 - INFO - __main__ - Step 13050: {'lr': 0.0004916071656023095, 'samples': 626400, 'steps': 13049, 'loss/train': 2.511641502380371} +07/25/2024 12:26:58 - INFO - __main__ - Step 13051: {'lr': 0.000491605813932292, 'samples': 626448, 'steps': 13050, 'loss/train': 1.9051706790924072} +07/25/2024 12:26:58 - INFO - __main__ - Step 13052: {'lr': 0.0004916044621552982, 'samples': 626496, 'steps': 13051, 'loss/train': 2.5340027809143066} +07/25/2024 12:26:59 - INFO - __main__ - Step 13053: {'lr': 0.0004916031102713287, 'samples': 626544, 'steps': 13052, 'loss/train': 1.812070608139038} +07/25/2024 12:26:59 - INFO - __main__ - Step 13054: {'lr': 0.0004916017582803843, 'samples': 626592, 'steps': 13053, 'loss/train': 1.7555283308029175} +07/25/2024 12:26:59 - INFO - __main__ - Step 13055: {'lr': 0.0004916004061824653, 'samples': 626640, 'steps': 13054, 'loss/train': 2.1106436252593994} +07/25/2024 12:26:59 - INFO - __main__ - Step 13056: {'lr': 0.0004915990539775723, 'samples': 626688, 'steps': 13055, 'loss/train': 1.8305230140686035} +07/25/2024 12:27:00 - INFO - __main__ - Step 13057: {'lr': 0.0004915977016657061, 'samples': 626736, 'steps': 13056, 'loss/train': 1.6963396072387695} +07/25/2024 12:27:00 - INFO - __main__ - Step 13058: {'lr': 0.0004915963492468673, 'samples': 626784, 'steps': 13057, 'loss/train': 1.6642019748687744} +07/25/2024 12:27:00 - INFO - __main__ - Step 13059: {'lr': 0.0004915949967210562, 'samples': 626832, 'steps': 13058, 'loss/train': 2.495590925216675} +07/25/2024 12:27:01 - INFO - __main__ - Step 13060: {'lr': 0.0004915936440882737, 'samples': 626880, 'steps': 13059, 'loss/train': 1.9373692274093628} +07/25/2024 12:27:01 - INFO - __main__ - Step 13061: {'lr': 0.0004915922913485204, 'samples': 626928, 'steps': 13060, 'loss/train': 0.8412851095199585} +07/25/2024 12:27:01 - INFO - __main__ - Step 13062: {'lr': 0.0004915909385017967, 'samples': 626976, 'steps': 13061, 'loss/train': 2.693519115447998} +07/25/2024 12:27:01 - INFO - __main__ - Step 13063: {'lr': 0.0004915895855481033, 'samples': 627024, 'steps': 13062, 'loss/train': 2.0780293941497803} +07/25/2024 12:27:02 - INFO - __main__ - Step 13064: {'lr': 0.0004915882324874408, 'samples': 627072, 'steps': 13063, 'loss/train': 1.590442419052124} +07/25/2024 12:27:02 - INFO - __main__ - Step 13065: {'lr': 0.0004915868793198098, 'samples': 627120, 'steps': 13064, 'loss/train': 2.0302093029022217} +07/25/2024 12:27:02 - INFO - __main__ - Step 13066: {'lr': 0.000491585526045211, 'samples': 627168, 'steps': 13065, 'loss/train': 2.023566246032715} +07/25/2024 12:27:03 - INFO - __main__ - Step 13067: {'lr': 0.0004915841726636448, 'samples': 627216, 'steps': 13066, 'loss/train': 0.64913010597229} +07/25/2024 12:27:03 - INFO - __main__ - Step 13068: {'lr': 0.0004915828191751119, 'samples': 627264, 'steps': 13067, 'loss/train': 2.065307855606079} +07/25/2024 12:27:03 - INFO - __main__ - Step 13069: {'lr': 0.0004915814655796129, 'samples': 627312, 'steps': 13068, 'loss/train': 2.187852621078491} +07/25/2024 12:27:03 - INFO - __main__ - Step 13070: {'lr': 0.0004915801118771484, 'samples': 627360, 'steps': 13069, 'loss/train': 1.8410265445709229} +07/25/2024 12:27:04 - INFO - __main__ - Step 13071: {'lr': 0.0004915787580677191, 'samples': 627408, 'steps': 13070, 'loss/train': 1.153906226158142} +07/25/2024 12:27:04 - INFO - __main__ - Step 13072: {'lr': 0.0004915774041513253, 'samples': 627456, 'steps': 13071, 'loss/train': 1.1502867937088013} +07/25/2024 12:27:04 - INFO - __main__ - Step 13073: {'lr': 0.0004915760501279679, 'samples': 627504, 'steps': 13072, 'loss/train': 1.4283918142318726} +07/25/2024 12:27:05 - INFO - __main__ - Step 13074: {'lr': 0.0004915746959976475, 'samples': 627552, 'steps': 13073, 'loss/train': 1.4370023012161255} +07/25/2024 12:27:05 - INFO - __main__ - Step 13075: {'lr': 0.0004915733417603644, 'samples': 627600, 'steps': 13074, 'loss/train': 1.7884547710418701} +07/25/2024 12:27:05 - INFO - __main__ - Step 13076: {'lr': 0.0004915719874161196, 'samples': 627648, 'steps': 13075, 'loss/train': 1.7157305479049683} +07/25/2024 12:27:05 - INFO - __main__ - Step 13077: {'lr': 0.0004915706329649133, 'samples': 627696, 'steps': 13076, 'loss/train': 1.6321184635162354} +07/25/2024 12:27:06 - INFO - __main__ - Step 13078: {'lr': 0.0004915692784067464, 'samples': 627744, 'steps': 13077, 'loss/train': 2.1818597316741943} +07/25/2024 12:27:06 - INFO - __main__ - Step 13079: {'lr': 0.0004915679237416193, 'samples': 627792, 'steps': 13078, 'loss/train': 2.0499322414398193} +07/25/2024 12:27:06 - INFO - __main__ - Step 13080: {'lr': 0.0004915665689695328, 'samples': 627840, 'steps': 13079, 'loss/train': 1.9641015529632568} +07/25/2024 12:27:07 - INFO - __main__ - Step 13081: {'lr': 0.0004915652140904872, 'samples': 627888, 'steps': 13080, 'loss/train': 1.751596212387085} +07/25/2024 12:27:07 - INFO - __main__ - Step 13082: {'lr': 0.0004915638591044835, 'samples': 627936, 'steps': 13081, 'loss/train': 1.7238956689834595} +07/25/2024 12:27:07 - INFO - __main__ - Step 13083: {'lr': 0.000491562504011522, 'samples': 627984, 'steps': 13082, 'loss/train': 2.3757195472717285} +07/25/2024 12:27:07 - INFO - __main__ - Step 13084: {'lr': 0.0004915611488116035, 'samples': 628032, 'steps': 13083, 'loss/train': 1.8363417387008667} +07/25/2024 12:27:08 - INFO - __main__ - Step 13085: {'lr': 0.0004915597935047283, 'samples': 628080, 'steps': 13084, 'loss/train': 1.7627928256988525} +07/25/2024 12:27:08 - INFO - __main__ - Step 13086: {'lr': 0.0004915584380908974, 'samples': 628128, 'steps': 13085, 'loss/train': 2.9003164768218994} +07/25/2024 12:27:08 - INFO - __main__ - Step 13087: {'lr': 0.000491557082570111, 'samples': 628176, 'steps': 13086, 'loss/train': 1.6130244731903076} +07/25/2024 12:27:09 - INFO - __main__ - Step 13088: {'lr': 0.0004915557269423701, 'samples': 628224, 'steps': 13087, 'loss/train': 1.544352650642395} +07/25/2024 12:27:09 - INFO - __main__ - Step 13089: {'lr': 0.0004915543712076749, 'samples': 628272, 'steps': 13088, 'loss/train': 2.096311092376709} +07/25/2024 12:27:09 - INFO - __main__ - Step 13090: {'lr': 0.0004915530153660263, 'samples': 628320, 'steps': 13089, 'loss/train': 2.3226590156555176} +07/25/2024 12:27:09 - INFO - __main__ - Step 13091: {'lr': 0.0004915516594174248, 'samples': 628368, 'steps': 13090, 'loss/train': 1.9849867820739746} +07/25/2024 12:27:10 - INFO - __main__ - Step 13092: {'lr': 0.0004915503033618709, 'samples': 628416, 'steps': 13091, 'loss/train': 2.3575289249420166} +07/25/2024 12:27:10 - INFO - __main__ - Step 13093: {'lr': 0.0004915489471993653, 'samples': 628464, 'steps': 13092, 'loss/train': 1.9700032472610474} +07/25/2024 12:27:10 - INFO - __main__ - Step 13094: {'lr': 0.0004915475909299087, 'samples': 628512, 'steps': 13093, 'loss/train': 1.730684518814087} +07/25/2024 12:27:10 - INFO - __main__ - Step 13095: {'lr': 0.0004915462345535016, 'samples': 628560, 'steps': 13094, 'loss/train': 1.5583065748214722} +07/25/2024 12:27:11 - INFO - __main__ - Step 13096: {'lr': 0.0004915448780701445, 'samples': 628608, 'steps': 13095, 'loss/train': 2.9164464473724365} +07/25/2024 12:27:11 - INFO - __main__ - Step 13097: {'lr': 0.000491543521479838, 'samples': 628656, 'steps': 13096, 'loss/train': 2.5395877361297607} +07/25/2024 12:27:11 - INFO - __main__ - Step 13098: {'lr': 0.000491542164782583, 'samples': 628704, 'steps': 13097, 'loss/train': 1.9350976943969727} +07/25/2024 12:27:12 - INFO - __main__ - Step 13099: {'lr': 0.0004915408079783798, 'samples': 628752, 'steps': 13098, 'loss/train': 1.4717744588851929} +07/25/2024 12:27:12 - INFO - __main__ - Step 13100: {'lr': 0.0004915394510672292, 'samples': 628800, 'steps': 13099, 'loss/train': 2.495213747024536} +07/25/2024 12:27:12 - INFO - __main__ - Step 13101: {'lr': 0.0004915380940491316, 'samples': 628848, 'steps': 13100, 'loss/train': 1.8670767545700073} +07/25/2024 12:27:12 - INFO - __main__ - Step 13102: {'lr': 0.0004915367369240876, 'samples': 628896, 'steps': 13101, 'loss/train': 1.7654414176940918} +07/25/2024 12:27:13 - INFO - __main__ - Step 13103: {'lr': 0.000491535379692098, 'samples': 628944, 'steps': 13102, 'loss/train': 1.917647361755371} +07/25/2024 12:27:13 - INFO - __main__ - Step 13104: {'lr': 0.0004915340223531634, 'samples': 628992, 'steps': 13103, 'loss/train': 1.811212420463562} +07/25/2024 12:27:13 - INFO - __main__ - Step 13105: {'lr': 0.0004915326649072842, 'samples': 629040, 'steps': 13104, 'loss/train': 1.7998753786087036} +07/25/2024 12:27:14 - INFO - __main__ - Step 13106: {'lr': 0.0004915313073544611, 'samples': 629088, 'steps': 13105, 'loss/train': 1.4310575723648071} +07/25/2024 12:27:14 - INFO - __main__ - Step 13107: {'lr': 0.0004915299496946947, 'samples': 629136, 'steps': 13106, 'loss/train': 2.302128314971924} +07/25/2024 12:27:14 - INFO - __main__ - Step 13108: {'lr': 0.0004915285919279858, 'samples': 629184, 'steps': 13107, 'loss/train': 2.190467596054077} +07/25/2024 12:27:14 - INFO - __main__ - Step 13109: {'lr': 0.0004915272340543346, 'samples': 629232, 'steps': 13108, 'loss/train': 2.0062508583068848} +07/25/2024 12:27:15 - INFO - __main__ - Step 13110: {'lr': 0.0004915258760737419, 'samples': 629280, 'steps': 13109, 'loss/train': 2.0228843688964844} +07/25/2024 12:27:15 - INFO - __main__ - Step 13111: {'lr': 0.0004915245179862083, 'samples': 629328, 'steps': 13110, 'loss/train': 2.5919744968414307} +07/25/2024 12:27:15 - INFO - __main__ - Step 13112: {'lr': 0.0004915231597917345, 'samples': 629376, 'steps': 13111, 'loss/train': 1.9617981910705566} +07/25/2024 12:27:16 - INFO - __main__ - Step 13113: {'lr': 0.000491521801490321, 'samples': 629424, 'steps': 13112, 'loss/train': 1.1260017156600952} +07/25/2024 12:27:16 - INFO - __main__ - Step 13114: {'lr': 0.0004915204430819684, 'samples': 629472, 'steps': 13113, 'loss/train': 1.9847148656845093} +07/25/2024 12:27:16 - INFO - __main__ - Step 13115: {'lr': 0.0004915190845666774, 'samples': 629520, 'steps': 13114, 'loss/train': 2.0960466861724854} +07/25/2024 12:27:16 - INFO - __main__ - Step 13116: {'lr': 0.0004915177259444483, 'samples': 629568, 'steps': 13115, 'loss/train': 1.8247489929199219} +07/25/2024 12:27:17 - INFO - __main__ - Step 13117: {'lr': 0.0004915163672152822, 'samples': 629616, 'steps': 13116, 'loss/train': 1.3413480520248413} +07/25/2024 12:27:17 - INFO - __main__ - Step 13118: {'lr': 0.0004915150083791792, 'samples': 629664, 'steps': 13117, 'loss/train': 1.714104413986206} +07/25/2024 12:27:17 - INFO - __main__ - Step 13119: {'lr': 0.0004915136494361402, 'samples': 629712, 'steps': 13118, 'loss/train': 1.6685950756072998} +07/25/2024 12:27:18 - INFO - __main__ - Step 13120: {'lr': 0.0004915122903861658, 'samples': 629760, 'steps': 13119, 'loss/train': 3.5509207248687744} +07/25/2024 12:27:18 - INFO - __main__ - Step 13121: {'lr': 0.0004915109312292564, 'samples': 629808, 'steps': 13120, 'loss/train': 2.4204697608947754} +07/25/2024 12:27:18 - INFO - __main__ - Step 13122: {'lr': 0.0004915095719654126, 'samples': 629856, 'steps': 13121, 'loss/train': 1.411545991897583} +07/25/2024 12:27:18 - INFO - __main__ - Step 13123: {'lr': 0.0004915082125946355, 'samples': 629904, 'steps': 13122, 'loss/train': 1.7460144758224487} +07/25/2024 12:27:19 - INFO - __main__ - Step 13124: {'lr': 0.000491506853116925, 'samples': 629952, 'steps': 13123, 'loss/train': 1.875819206237793} +07/25/2024 12:27:19 - INFO - __main__ - Step 13125: {'lr': 0.0004915054935322821, 'samples': 630000, 'steps': 13124, 'loss/train': 1.953354001045227} +07/25/2024 12:27:19 - INFO - __main__ - Step 13126: {'lr': 0.0004915041338407074, 'samples': 630048, 'steps': 13125, 'loss/train': 1.0780129432678223} +07/25/2024 12:27:20 - INFO - __main__ - Step 13127: {'lr': 0.0004915027740422015, 'samples': 630096, 'steps': 13126, 'loss/train': 1.7303581237792969} +07/25/2024 12:27:20 - INFO - __main__ - Step 13128: {'lr': 0.0004915014141367649, 'samples': 630144, 'steps': 13127, 'loss/train': 1.81999933719635} +07/25/2024 12:27:20 - INFO - __main__ - Step 13129: {'lr': 0.0004915000541243982, 'samples': 630192, 'steps': 13128, 'loss/train': 1.6911561489105225} +07/25/2024 12:27:20 - INFO - __main__ - Step 13130: {'lr': 0.000491498694005102, 'samples': 630240, 'steps': 13129, 'loss/train': 1.3336849212646484} +07/25/2024 12:27:21 - INFO - __main__ - Step 13131: {'lr': 0.000491497333778877, 'samples': 630288, 'steps': 13130, 'loss/train': 1.97421395778656} +07/25/2024 12:27:21 - INFO - __main__ - Step 13132: {'lr': 0.0004914959734457238, 'samples': 630336, 'steps': 13131, 'loss/train': 2.1010630130767822} +07/25/2024 12:27:21 - INFO - __main__ - Step 13133: {'lr': 0.0004914946130056428, 'samples': 630384, 'steps': 13132, 'loss/train': 2.290815830230713} +07/25/2024 12:27:22 - INFO - __main__ - Step 13134: {'lr': 0.0004914932524586349, 'samples': 630432, 'steps': 13133, 'loss/train': 1.7616511583328247} +07/25/2024 12:27:22 - INFO - __main__ - Step 13135: {'lr': 0.0004914918918047004, 'samples': 630480, 'steps': 13134, 'loss/train': 2.0310330390930176} +07/25/2024 12:27:22 - INFO - __main__ - Step 13136: {'lr': 0.0004914905310438402, 'samples': 630528, 'steps': 13135, 'loss/train': 1.5927605628967285} +07/25/2024 12:27:22 - INFO - __main__ - Step 13137: {'lr': 0.0004914891701760548, 'samples': 630576, 'steps': 13136, 'loss/train': 0.9338072538375854} +07/25/2024 12:27:23 - INFO - __main__ - Step 13138: {'lr': 0.0004914878092013446, 'samples': 630624, 'steps': 13137, 'loss/train': 1.7760968208312988} +07/25/2024 12:27:23 - INFO - __main__ - Step 13139: {'lr': 0.0004914864481197105, 'samples': 630672, 'steps': 13138, 'loss/train': 2.1184897422790527} +07/25/2024 12:27:23 - INFO - __main__ - Step 13140: {'lr': 0.0004914850869311529, 'samples': 630720, 'steps': 13139, 'loss/train': 2.124479293823242} +07/25/2024 12:27:24 - INFO - __main__ - Step 13141: {'lr': 0.0004914837256356725, 'samples': 630768, 'steps': 13140, 'loss/train': 1.360689640045166} +07/25/2024 12:27:24 - INFO - __main__ - Step 13142: {'lr': 0.0004914823642332697, 'samples': 630816, 'steps': 13141, 'loss/train': 1.5208200216293335} +07/25/2024 12:27:24 - INFO - __main__ - Step 13143: {'lr': 0.0004914810027239455, 'samples': 630864, 'steps': 13142, 'loss/train': 1.0453360080718994} +07/25/2024 12:27:24 - INFO - __main__ - Step 13144: {'lr': 0.0004914796411077002, 'samples': 630912, 'steps': 13143, 'loss/train': 2.7522404193878174} +07/25/2024 12:27:25 - INFO - __main__ - Step 13145: {'lr': 0.0004914782793845344, 'samples': 630960, 'steps': 13144, 'loss/train': 2.6690616607666016} +07/25/2024 12:27:25 - INFO - __main__ - Step 13146: {'lr': 0.000491476917554449, 'samples': 631008, 'steps': 13145, 'loss/train': 1.1898927688598633} +07/25/2024 12:27:25 - INFO - __main__ - Step 13147: {'lr': 0.0004914755556174442, 'samples': 631056, 'steps': 13146, 'loss/train': 1.954818606376648} +07/25/2024 12:27:26 - INFO - __main__ - Step 13148: {'lr': 0.0004914741935735208, 'samples': 631104, 'steps': 13147, 'loss/train': 1.8584983348846436} +07/25/2024 12:27:26 - INFO - __main__ - Step 13149: {'lr': 0.0004914728314226795, 'samples': 631152, 'steps': 13148, 'loss/train': 1.9855555295944214} +07/25/2024 12:27:26 - INFO - __main__ - Step 13150: {'lr': 0.0004914714691649207, 'samples': 631200, 'steps': 13149, 'loss/train': 1.3642421960830688} +07/25/2024 12:27:26 - INFO - __main__ - Step 13151: {'lr': 0.0004914701068002452, 'samples': 631248, 'steps': 13150, 'loss/train': 1.5988571643829346} +07/25/2024 12:27:27 - INFO - __main__ - Step 13152: {'lr': 0.0004914687443286534, 'samples': 631296, 'steps': 13151, 'loss/train': 2.3698084354400635} +07/25/2024 12:27:27 - INFO - __main__ - Step 13153: {'lr': 0.000491467381750146, 'samples': 631344, 'steps': 13152, 'loss/train': 1.7245500087738037} +07/25/2024 12:27:27 - INFO - __main__ - Step 13154: {'lr': 0.0004914660190647237, 'samples': 631392, 'steps': 13153, 'loss/train': 2.006492853164673} +07/25/2024 12:27:28 - INFO - __main__ - Step 13155: {'lr': 0.000491464656272387, 'samples': 631440, 'steps': 13154, 'loss/train': 1.9421663284301758} +07/25/2024 12:27:28 - INFO - __main__ - Step 13156: {'lr': 0.0004914632933731365, 'samples': 631488, 'steps': 13155, 'loss/train': 2.1332449913024902} +07/25/2024 12:27:28 - INFO - __main__ - Step 13157: {'lr': 0.0004914619303669727, 'samples': 631536, 'steps': 13156, 'loss/train': 2.539482355117798} +07/25/2024 12:27:28 - INFO - __main__ - Step 13158: {'lr': 0.0004914605672538964, 'samples': 631584, 'steps': 13157, 'loss/train': 2.1857643127441406} +07/25/2024 12:27:29 - INFO - __main__ - Step 13159: {'lr': 0.0004914592040339082, 'samples': 631632, 'steps': 13158, 'loss/train': 1.7915847301483154} +07/25/2024 12:27:29 - INFO - __main__ - Step 13160: {'lr': 0.0004914578407070086, 'samples': 631680, 'steps': 13159, 'loss/train': 1.931534767150879} +07/25/2024 12:27:29 - INFO - __main__ - Step 13161: {'lr': 0.0004914564772731982, 'samples': 631728, 'steps': 13160, 'loss/train': 1.67294180393219} +07/25/2024 12:27:29 - INFO - __main__ - Step 13162: {'lr': 0.0004914551137324777, 'samples': 631776, 'steps': 13161, 'loss/train': 1.949361801147461} +07/25/2024 12:27:30 - INFO - __main__ - Step 13163: {'lr': 0.0004914537500848475, 'samples': 631824, 'steps': 13162, 'loss/train': 1.5109471082687378} +07/25/2024 12:27:30 - INFO - __main__ - Step 13164: {'lr': 0.0004914523863303084, 'samples': 631872, 'steps': 13163, 'loss/train': 2.0886943340301514} +07/25/2024 12:27:30 - INFO - __main__ - Step 13165: {'lr': 0.000491451022468861, 'samples': 631920, 'steps': 13164, 'loss/train': 1.3961039781570435} +07/25/2024 12:27:31 - INFO - __main__ - Step 13166: {'lr': 0.0004914496585005059, 'samples': 631968, 'steps': 13165, 'loss/train': 1.7813376188278198} +07/25/2024 12:27:31 - INFO - __main__ - Step 13167: {'lr': 0.0004914482944252435, 'samples': 632016, 'steps': 13166, 'loss/train': 1.257395625114441} +07/25/2024 12:27:31 - INFO - __main__ - Step 13168: {'lr': 0.0004914469302430747, 'samples': 632064, 'steps': 13167, 'loss/train': 2.268512725830078} +07/25/2024 12:27:31 - INFO - __main__ - Step 13169: {'lr': 0.000491445565954, 'samples': 632112, 'steps': 13168, 'loss/train': 2.6261346340179443} +07/25/2024 12:27:32 - INFO - __main__ - Step 13170: {'lr': 0.0004914442015580198, 'samples': 632160, 'steps': 13169, 'loss/train': 1.5065093040466309} +07/25/2024 12:27:32 - INFO - __main__ - Step 13171: {'lr': 0.0004914428370551351, 'samples': 632208, 'steps': 13170, 'loss/train': 1.4456392526626587} +07/25/2024 12:27:32 - INFO - __main__ - Step 13172: {'lr': 0.000491441472445346, 'samples': 632256, 'steps': 13171, 'loss/train': 1.9237077236175537} +07/25/2024 12:27:33 - INFO - __main__ - Step 13173: {'lr': 0.0004914401077286536, 'samples': 632304, 'steps': 13172, 'loss/train': 0.8819540143013} +07/25/2024 12:27:33 - INFO - __main__ - Step 13174: {'lr': 0.0004914387429050582, 'samples': 632352, 'steps': 13173, 'loss/train': 1.8728277683258057} +07/25/2024 12:27:33 - INFO - __main__ - Step 13175: {'lr': 0.0004914373779745605, 'samples': 632400, 'steps': 13174, 'loss/train': 1.834004282951355} +07/25/2024 12:27:33 - INFO - __main__ - Step 13176: {'lr': 0.000491436012937161, 'samples': 632448, 'steps': 13175, 'loss/train': 1.5208872556686401} +07/25/2024 12:27:34 - INFO - __main__ - Step 13177: {'lr': 0.0004914346477928605, 'samples': 632496, 'steps': 13176, 'loss/train': 1.642264485359192} +07/25/2024 12:27:34 - INFO - __main__ - Step 13178: {'lr': 0.0004914332825416596, 'samples': 632544, 'steps': 13177, 'loss/train': 3.139127016067505} +07/25/2024 12:27:34 - INFO - __main__ - Step 13179: {'lr': 0.0004914319171835587, 'samples': 632592, 'steps': 13178, 'loss/train': 2.115572452545166} +07/25/2024 12:27:35 - INFO - __main__ - Step 13180: {'lr': 0.0004914305517185585, 'samples': 632640, 'steps': 13179, 'loss/train': 1.7133299112319946} +07/25/2024 12:27:35 - INFO - __main__ - Step 13181: {'lr': 0.0004914291861466595, 'samples': 632688, 'steps': 13180, 'loss/train': 2.27622127532959} +07/25/2024 12:27:35 - INFO - __main__ - Step 13182: {'lr': 0.0004914278204678626, 'samples': 632736, 'steps': 13181, 'loss/train': 1.7581911087036133} +07/25/2024 12:27:35 - INFO - __main__ - Step 13183: {'lr': 0.0004914264546821682, 'samples': 632784, 'steps': 13182, 'loss/train': 1.7829824686050415} +07/25/2024 12:27:36 - INFO - __main__ - Step 13184: {'lr': 0.0004914250887895769, 'samples': 632832, 'steps': 13183, 'loss/train': 1.9256325960159302} +07/25/2024 12:27:36 - INFO - __main__ - Step 13185: {'lr': 0.0004914237227900893, 'samples': 632880, 'steps': 13184, 'loss/train': 1.3755074739456177} +07/25/2024 12:27:36 - INFO - __main__ - Step 13186: {'lr': 0.000491422356683706, 'samples': 632928, 'steps': 13185, 'loss/train': 1.829627275466919} +07/25/2024 12:27:37 - INFO - __main__ - Step 13187: {'lr': 0.0004914209904704278, 'samples': 632976, 'steps': 13186, 'loss/train': 1.941195011138916} +07/25/2024 12:27:37 - INFO - __main__ - Step 13188: {'lr': 0.0004914196241502551, 'samples': 633024, 'steps': 13187, 'loss/train': 1.556538462638855} +07/25/2024 12:27:37 - INFO - __main__ - Step 13189: {'lr': 0.0004914182577231885, 'samples': 633072, 'steps': 13188, 'loss/train': 1.7875090837478638} +07/25/2024 12:27:37 - INFO - __main__ - Step 13190: {'lr': 0.0004914168911892287, 'samples': 633120, 'steps': 13189, 'loss/train': 1.6037694215774536} +07/25/2024 12:27:38 - INFO - __main__ - Step 13191: {'lr': 0.0004914155245483762, 'samples': 633168, 'steps': 13190, 'loss/train': 0.9820877909660339} +07/25/2024 12:27:38 - INFO - __main__ - Step 13192: {'lr': 0.0004914141578006317, 'samples': 633216, 'steps': 13191, 'loss/train': 2.3436717987060547} +07/25/2024 12:27:38 - INFO - __main__ - Step 13193: {'lr': 0.0004914127909459959, 'samples': 633264, 'steps': 13192, 'loss/train': 2.093940496444702} +07/25/2024 12:27:39 - INFO - __main__ - Step 13194: {'lr': 0.0004914114239844691, 'samples': 633312, 'steps': 13193, 'loss/train': 1.2923274040222168} +07/25/2024 12:27:39 - INFO - __main__ - Step 13195: {'lr': 0.0004914100569160522, 'samples': 633360, 'steps': 13194, 'loss/train': 1.6280674934387207} +07/25/2024 12:27:39 - INFO - __main__ - Step 13196: {'lr': 0.0004914086897407457, 'samples': 633408, 'steps': 13195, 'loss/train': 2.0987250804901123} +07/25/2024 12:27:39 - INFO - __main__ - Step 13197: {'lr': 0.0004914073224585501, 'samples': 633456, 'steps': 13196, 'loss/train': 1.3628720045089722} +07/25/2024 12:27:40 - INFO - __main__ - Step 13198: {'lr': 0.0004914059550694662, 'samples': 633504, 'steps': 13197, 'loss/train': 1.5550563335418701} +07/25/2024 12:27:40 - INFO - __main__ - Step 13199: {'lr': 0.0004914045875734944, 'samples': 633552, 'steps': 13198, 'loss/train': 1.6542004346847534} +07/25/2024 12:27:40 - INFO - __main__ - Step 13200: {'lr': 0.0004914032199706355, 'samples': 633600, 'steps': 13199, 'loss/train': 2.1378800868988037} +07/25/2024 12:27:41 - INFO - __main__ - Step 13201: {'lr': 0.0004914018522608899, 'samples': 633648, 'steps': 13200, 'loss/train': 1.4323214292526245} +07/25/2024 12:27:41 - INFO - __main__ - Step 13202: {'lr': 0.0004914004844442585, 'samples': 633696, 'steps': 13201, 'loss/train': 4.516005516052246} +07/25/2024 12:27:41 - INFO - __main__ - Step 13203: {'lr': 0.0004913991165207416, 'samples': 633744, 'steps': 13202, 'loss/train': 1.7403435707092285} +07/25/2024 12:27:41 - INFO - __main__ - Step 13204: {'lr': 0.00049139774849034, 'samples': 633792, 'steps': 13203, 'loss/train': 1.258360505104065} +07/25/2024 12:27:42 - INFO - __main__ - Step 13205: {'lr': 0.0004913963803530542, 'samples': 633840, 'steps': 13204, 'loss/train': 1.5962992906570435} +07/25/2024 12:27:42 - INFO - __main__ - Step 13206: {'lr': 0.0004913950121088849, 'samples': 633888, 'steps': 13205, 'loss/train': 2.302356719970703} +07/25/2024 12:27:42 - INFO - __main__ - Step 13207: {'lr': 0.0004913936437578325, 'samples': 633936, 'steps': 13206, 'loss/train': 0.7911639213562012} +07/25/2024 12:27:43 - INFO - __main__ - Step 13208: {'lr': 0.000491392275299898, 'samples': 633984, 'steps': 13207, 'loss/train': 2.138284206390381} +07/25/2024 12:27:43 - INFO - __main__ - Step 13209: {'lr': 0.0004913909067350816, 'samples': 634032, 'steps': 13208, 'loss/train': 1.1483992338180542} +07/25/2024 12:27:43 - INFO - __main__ - Step 13210: {'lr': 0.0004913895380633842, 'samples': 634080, 'steps': 13209, 'loss/train': 2.0587403774261475} +07/25/2024 12:27:43 - INFO - __main__ - Step 13211: {'lr': 0.0004913881692848062, 'samples': 634128, 'steps': 13210, 'loss/train': 1.606871247291565} +07/25/2024 12:27:44 - INFO - __main__ - Step 13212: {'lr': 0.0004913868003993482, 'samples': 634176, 'steps': 13211, 'loss/train': 1.4620691537857056} +07/25/2024 12:27:44 - INFO - __main__ - Step 13213: {'lr': 0.000491385431407011, 'samples': 634224, 'steps': 13212, 'loss/train': 1.72653067111969} +07/25/2024 12:27:44 - INFO - __main__ - Step 13214: {'lr': 0.000491384062307795, 'samples': 634272, 'steps': 13213, 'loss/train': 1.9337986707687378} +07/25/2024 12:27:45 - INFO - __main__ - Step 13215: {'lr': 0.000491382693101701, 'samples': 634320, 'steps': 13214, 'loss/train': 1.0464625358581543} +07/25/2024 12:27:45 - INFO - __main__ - Step 13216: {'lr': 0.0004913813237887295, 'samples': 634368, 'steps': 13215, 'loss/train': 2.2213451862335205} +07/25/2024 12:27:45 - INFO - __main__ - Step 13217: {'lr': 0.000491379954368881, 'samples': 634416, 'steps': 13216, 'loss/train': 2.2245872020721436} +07/25/2024 12:27:45 - INFO - __main__ - Step 13218: {'lr': 0.0004913785848421563, 'samples': 634464, 'steps': 13217, 'loss/train': 1.0607616901397705} +07/25/2024 12:27:46 - INFO - __main__ - Step 13219: {'lr': 0.0004913772152085559, 'samples': 634512, 'steps': 13218, 'loss/train': 1.3146626949310303} +07/25/2024 12:27:46 - INFO - __main__ - Step 13220: {'lr': 0.0004913758454680805, 'samples': 634560, 'steps': 13219, 'loss/train': 1.7654744386672974} +07/25/2024 12:27:46 - INFO - __main__ - Step 13221: {'lr': 0.0004913744756207306, 'samples': 634608, 'steps': 13220, 'loss/train': 2.7008068561553955} +07/25/2024 12:27:47 - INFO - __main__ - Step 13222: {'lr': 0.0004913731056665068, 'samples': 634656, 'steps': 13221, 'loss/train': 2.902430534362793} +07/25/2024 12:27:47 - INFO - __main__ - Step 13223: {'lr': 0.0004913717356054098, 'samples': 634704, 'steps': 13222, 'loss/train': 1.7299665212631226} +07/25/2024 12:27:47 - INFO - __main__ - Step 13224: {'lr': 0.0004913703654374402, 'samples': 634752, 'steps': 13223, 'loss/train': 1.9312810897827148} +07/25/2024 12:27:47 - INFO - __main__ - Step 13225: {'lr': 0.0004913689951625984, 'samples': 634800, 'steps': 13224, 'loss/train': 1.9483956098556519} +07/25/2024 12:27:48 - INFO - __main__ - Step 13226: {'lr': 0.0004913676247808853, 'samples': 634848, 'steps': 13225, 'loss/train': 3.77458119392395} +07/25/2024 12:27:48 - INFO - __main__ - Step 13227: {'lr': 0.0004913662542923014, 'samples': 634896, 'steps': 13226, 'loss/train': 1.0428646802902222} +07/25/2024 12:27:48 - INFO - __main__ - Step 13228: {'lr': 0.0004913648836968472, 'samples': 634944, 'steps': 13227, 'loss/train': 2.1946332454681396} +07/25/2024 12:27:48 - INFO - __main__ - Step 13229: {'lr': 0.0004913635129945235, 'samples': 634992, 'steps': 13228, 'loss/train': 1.0657100677490234} +07/25/2024 12:27:49 - INFO - __main__ - Step 13230: {'lr': 0.0004913621421853307, 'samples': 635040, 'steps': 13229, 'loss/train': 1.574133038520813} +07/25/2024 12:27:49 - INFO - __main__ - Step 13231: {'lr': 0.0004913607712692696, 'samples': 635088, 'steps': 13230, 'loss/train': 1.7420903444290161} +07/25/2024 12:27:49 - INFO - __main__ - Step 13232: {'lr': 0.0004913594002463407, 'samples': 635136, 'steps': 13231, 'loss/train': 2.314578056335449} +07/25/2024 12:27:50 - INFO - __main__ - Step 13233: {'lr': 0.0004913580291165445, 'samples': 635184, 'steps': 13232, 'loss/train': 1.6167923212051392} +07/25/2024 12:27:50 - INFO - __main__ - Step 13234: {'lr': 0.0004913566578798818, 'samples': 635232, 'steps': 13233, 'loss/train': 1.9043642282485962} +07/25/2024 12:27:50 - INFO - __main__ - Step 13235: {'lr': 0.0004913552865363531, 'samples': 635280, 'steps': 13234, 'loss/train': 2.078751802444458} +07/25/2024 12:27:50 - INFO - __main__ - Step 13236: {'lr': 0.0004913539150859591, 'samples': 635328, 'steps': 13235, 'loss/train': 0.404287725687027} +07/25/2024 12:27:51 - INFO - __main__ - Step 13237: {'lr': 0.0004913525435287004, 'samples': 635376, 'steps': 13236, 'loss/train': 2.443236827850342} +07/25/2024 12:27:51 - INFO - __main__ - Step 13238: {'lr': 0.0004913511718645775, 'samples': 635424, 'steps': 13237, 'loss/train': 2.7491934299468994} +07/25/2024 12:27:51 - INFO - __main__ - Step 13239: {'lr': 0.000491349800093591, 'samples': 635472, 'steps': 13238, 'loss/train': 0.9132989048957825} +07/25/2024 12:27:52 - INFO - __main__ - Step 13240: {'lr': 0.0004913484282157417, 'samples': 635520, 'steps': 13239, 'loss/train': 2.3477096557617188} +07/25/2024 12:27:52 - INFO - __main__ - Step 13241: {'lr': 0.00049134705623103, 'samples': 635568, 'steps': 13240, 'loss/train': 2.321152448654175} +07/25/2024 12:27:52 - INFO - __main__ - Step 13242: {'lr': 0.0004913456841394565, 'samples': 635616, 'steps': 13241, 'loss/train': 1.2518643140792847} +07/25/2024 12:27:52 - INFO - __main__ - Step 13243: {'lr': 0.0004913443119410221, 'samples': 635664, 'steps': 13242, 'loss/train': 1.5305359363555908} +07/25/2024 12:27:53 - INFO - __main__ - Step 13244: {'lr': 0.000491342939635727, 'samples': 635712, 'steps': 13243, 'loss/train': 1.5634338855743408} +07/25/2024 12:27:53 - INFO - __main__ - Step 13245: {'lr': 0.000491341567223572, 'samples': 635760, 'steps': 13244, 'loss/train': 1.3268061876296997} +07/25/2024 12:27:53 - INFO - __main__ - Step 13246: {'lr': 0.0004913401947045579, 'samples': 635808, 'steps': 13245, 'loss/train': 2.4381794929504395} +07/25/2024 12:27:54 - INFO - __main__ - Step 13247: {'lr': 0.000491338822078685, 'samples': 635856, 'steps': 13246, 'loss/train': 1.831583023071289} +07/25/2024 12:27:54 - INFO - __main__ - Step 13248: {'lr': 0.000491337449345954, 'samples': 635904, 'steps': 13247, 'loss/train': 2.049135446548462} +07/25/2024 12:27:54 - INFO - __main__ - Step 13249: {'lr': 0.0004913360765063657, 'samples': 635952, 'steps': 13248, 'loss/train': 1.9242275953292847} +07/25/2024 12:27:54 - INFO - __main__ - Step 13250: {'lr': 0.0004913347035599205, 'samples': 636000, 'steps': 13249, 'loss/train': 1.3940311670303345} +07/25/2024 12:27:55 - INFO - __main__ - Step 13251: {'lr': 0.000491333330506619, 'samples': 636048, 'steps': 13250, 'loss/train': 0.7971069812774658} +07/25/2024 12:27:55 - INFO - __main__ - Step 13252: {'lr': 0.0004913319573464618, 'samples': 636096, 'steps': 13251, 'loss/train': 2.1590487957000732} +07/25/2024 12:27:55 - INFO - __main__ - Step 13253: {'lr': 0.0004913305840794496, 'samples': 636144, 'steps': 13252, 'loss/train': 0.24840351939201355} +07/25/2024 12:27:56 - INFO - __main__ - Step 13254: {'lr': 0.0004913292107055831, 'samples': 636192, 'steps': 13253, 'loss/train': 1.356972098350525} +07/25/2024 12:27:56 - INFO - __main__ - Step 13255: {'lr': 0.0004913278372248626, 'samples': 636240, 'steps': 13254, 'loss/train': 1.2287307977676392} +07/25/2024 12:27:56 - INFO - __main__ - Step 13256: {'lr': 0.0004913264636372891, 'samples': 636288, 'steps': 13255, 'loss/train': 1.9756819009780884} +07/25/2024 12:27:56 - INFO - __main__ - Step 13257: {'lr': 0.0004913250899428629, 'samples': 636336, 'steps': 13256, 'loss/train': 1.1047991514205933} +07/25/2024 12:27:57 - INFO - __main__ - Step 13258: {'lr': 0.0004913237161415846, 'samples': 636384, 'steps': 13257, 'loss/train': 1.4666786193847656} +07/25/2024 12:27:57 - INFO - __main__ - Step 13259: {'lr': 0.0004913223422334552, 'samples': 636432, 'steps': 13258, 'loss/train': 2.2563018798828125} +07/25/2024 12:27:57 - INFO - __main__ - Step 13260: {'lr': 0.0004913209682184749, 'samples': 636480, 'steps': 13259, 'loss/train': 1.2111324071884155} +07/25/2024 12:27:58 - INFO - __main__ - Step 13261: {'lr': 0.0004913195940966443, 'samples': 636528, 'steps': 13260, 'loss/train': 2.1727325916290283} +07/25/2024 12:27:58 - INFO - __main__ - Step 13262: {'lr': 0.0004913182198679642, 'samples': 636576, 'steps': 13261, 'loss/train': 1.6040185689926147} +07/25/2024 12:27:58 - INFO - __main__ - Step 13263: {'lr': 0.0004913168455324353, 'samples': 636624, 'steps': 13262, 'loss/train': 0.7651288509368896} +07/25/2024 12:27:58 - INFO - __main__ - Step 13264: {'lr': 0.0004913154710900579, 'samples': 636672, 'steps': 13263, 'loss/train': 1.845858097076416} +07/25/2024 12:27:59 - INFO - __main__ - Step 13265: {'lr': 0.0004913140965408329, 'samples': 636720, 'steps': 13264, 'loss/train': 1.889541506767273} +07/25/2024 12:27:59 - INFO - __main__ - Step 13266: {'lr': 0.0004913127218847608, 'samples': 636768, 'steps': 13265, 'loss/train': 0.6655340790748596} +07/25/2024 12:27:59 - INFO - __main__ - Step 13267: {'lr': 0.000491311347121842, 'samples': 636816, 'steps': 13266, 'loss/train': 1.7041934728622437} +07/25/2024 12:28:00 - INFO - __main__ - Step 13268: {'lr': 0.0004913099722520775, 'samples': 636864, 'steps': 13267, 'loss/train': 1.5044121742248535} +07/25/2024 12:28:00 - INFO - __main__ - Step 13269: {'lr': 0.0004913085972754675, 'samples': 636912, 'steps': 13268, 'loss/train': 1.2026158571243286} +07/25/2024 12:28:00 - INFO - __main__ - Step 13270: {'lr': 0.0004913072221920129, 'samples': 636960, 'steps': 13269, 'loss/train': 2.094424247741699} +07/25/2024 12:28:00 - INFO - __main__ - Step 13271: {'lr': 0.0004913058470017142, 'samples': 637008, 'steps': 13270, 'loss/train': 1.7703417539596558} +07/25/2024 12:28:01 - INFO - __main__ - Step 13272: {'lr': 0.0004913044717045721, 'samples': 637056, 'steps': 13271, 'loss/train': 1.7137724161148071} +07/25/2024 12:28:01 - INFO - __main__ - Step 13273: {'lr': 0.0004913030963005872, 'samples': 637104, 'steps': 13272, 'loss/train': 1.7099714279174805} +07/25/2024 12:28:01 - INFO - __main__ - Step 13274: {'lr': 0.0004913017207897599, 'samples': 637152, 'steps': 13273, 'loss/train': 1.5648396015167236} +07/25/2024 12:28:02 - INFO - __main__ - Step 13275: {'lr': 0.0004913003451720911, 'samples': 637200, 'steps': 13274, 'loss/train': 1.849183201789856} +07/25/2024 12:28:02 - INFO - __main__ - Step 13276: {'lr': 0.0004912989694475812, 'samples': 637248, 'steps': 13275, 'loss/train': 1.8118449449539185} +07/25/2024 12:28:02 - INFO - __main__ - Step 13277: {'lr': 0.0004912975936162308, 'samples': 637296, 'steps': 13276, 'loss/train': 0.14899106323719025} +07/25/2024 12:28:02 - INFO - __main__ - Step 13278: {'lr': 0.0004912962176780406, 'samples': 637344, 'steps': 13277, 'loss/train': 1.3503044843673706} +07/25/2024 12:28:03 - INFO - __main__ - Step 13279: {'lr': 0.0004912948416330113, 'samples': 637392, 'steps': 13278, 'loss/train': 1.9231153726577759} +07/25/2024 12:28:03 - INFO - __main__ - Step 13280: {'lr': 0.0004912934654811434, 'samples': 637440, 'steps': 13279, 'loss/train': 1.9695212841033936} +07/25/2024 12:28:03 - INFO - __main__ - Step 13281: {'lr': 0.0004912920892224375, 'samples': 637488, 'steps': 13280, 'loss/train': 1.506665587425232} +07/25/2024 12:28:04 - INFO - __main__ - Step 13282: {'lr': 0.0004912907128568942, 'samples': 637536, 'steps': 13281, 'loss/train': 1.5282248258590698} +07/25/2024 12:28:04 - INFO - __main__ - Step 13283: {'lr': 0.0004912893363845141, 'samples': 637584, 'steps': 13282, 'loss/train': 2.2236576080322266} +07/25/2024 12:28:04 - INFO - __main__ - Step 13284: {'lr': 0.0004912879598052979, 'samples': 637632, 'steps': 13283, 'loss/train': 2.280377149581909} +07/25/2024 12:28:04 - INFO - __main__ - Step 13285: {'lr': 0.0004912865831192461, 'samples': 637680, 'steps': 13284, 'loss/train': 2.4361488819122314} +07/25/2024 12:28:05 - INFO - __main__ - Step 13286: {'lr': 0.0004912852063263595, 'samples': 637728, 'steps': 13285, 'loss/train': 1.6388635635375977} +07/25/2024 12:28:05 - INFO - __main__ - Step 13287: {'lr': 0.0004912838294266384, 'samples': 637776, 'steps': 13286, 'loss/train': 1.2960827350616455} +07/25/2024 12:28:05 - INFO - __main__ - Step 13288: {'lr': 0.0004912824524200836, 'samples': 637824, 'steps': 13287, 'loss/train': 1.6089376211166382} +07/25/2024 12:28:06 - INFO - __main__ - Step 13289: {'lr': 0.0004912810753066958, 'samples': 637872, 'steps': 13288, 'loss/train': 1.521352767944336} +07/25/2024 12:28:06 - INFO - __main__ - Step 13290: {'lr': 0.0004912796980864754, 'samples': 637920, 'steps': 13289, 'loss/train': 1.2858474254608154} +07/25/2024 12:28:06 - INFO - __main__ - Step 13291: {'lr': 0.0004912783207594231, 'samples': 637968, 'steps': 13290, 'loss/train': 2.3969967365264893} +07/25/2024 12:28:06 - INFO - __main__ - Step 13292: {'lr': 0.0004912769433255397, 'samples': 638016, 'steps': 13291, 'loss/train': 2.076815128326416} +07/25/2024 12:28:07 - INFO - __main__ - Step 13293: {'lr': 0.0004912755657848255, 'samples': 638064, 'steps': 13292, 'loss/train': 0.7953693270683289} +07/25/2024 12:28:07 - INFO - __main__ - Step 13294: {'lr': 0.0004912741881372811, 'samples': 638112, 'steps': 13293, 'loss/train': 1.979844570159912} +07/25/2024 12:28:07 - INFO - __main__ - Step 13295: {'lr': 0.0004912728103829074, 'samples': 638160, 'steps': 13294, 'loss/train': 1.9668259620666504} +07/25/2024 12:28:08 - INFO - __main__ - Step 13296: {'lr': 0.0004912714325217049, 'samples': 638208, 'steps': 13295, 'loss/train': 1.9991750717163086} +07/25/2024 12:28:08 - INFO - __main__ - Step 13297: {'lr': 0.0004912700545536741, 'samples': 638256, 'steps': 13296, 'loss/train': 1.4685794115066528} +07/25/2024 12:28:08 - INFO - __main__ - Step 13298: {'lr': 0.0004912686764788157, 'samples': 638304, 'steps': 13297, 'loss/train': 1.7995026111602783} +07/25/2024 12:28:08 - INFO - __main__ - Step 13299: {'lr': 0.0004912672982971303, 'samples': 638352, 'steps': 13298, 'loss/train': 1.960496187210083} +07/25/2024 12:28:09 - INFO - __main__ - Step 13300: {'lr': 0.0004912659200086185, 'samples': 638400, 'steps': 13299, 'loss/train': 1.2802519798278809} +07/25/2024 12:28:09 - INFO - __main__ - Step 13301: {'lr': 0.0004912645416132809, 'samples': 638448, 'steps': 13300, 'loss/train': 0.12399853765964508} +07/25/2024 12:28:09 - INFO - __main__ - Step 13302: {'lr': 0.0004912631631111181, 'samples': 638496, 'steps': 13301, 'loss/train': 1.7423423528671265} +07/25/2024 12:28:10 - INFO - __main__ - Step 13303: {'lr': 0.0004912617845021307, 'samples': 638544, 'steps': 13302, 'loss/train': 1.9368577003479004} +07/25/2024 12:28:10 - INFO - __main__ - Step 13304: {'lr': 0.0004912604057863194, 'samples': 638592, 'steps': 13303, 'loss/train': 1.6020872592926025} +07/25/2024 12:28:10 - INFO - __main__ - Step 13305: {'lr': 0.0004912590269636847, 'samples': 638640, 'steps': 13304, 'loss/train': 1.619079828262329} +07/25/2024 12:28:10 - INFO - __main__ - Step 13306: {'lr': 0.0004912576480342273, 'samples': 638688, 'steps': 13305, 'loss/train': 2.0240747928619385} +07/25/2024 12:28:11 - INFO - __main__ - Step 13307: {'lr': 0.0004912562689979479, 'samples': 638736, 'steps': 13306, 'loss/train': 2.7379276752471924} +07/25/2024 12:28:11 - INFO - __main__ - Step 13308: {'lr': 0.0004912548898548468, 'samples': 638784, 'steps': 13307, 'loss/train': 1.733580231666565} +07/25/2024 12:28:11 - INFO - __main__ - Step 13309: {'lr': 0.0004912535106049249, 'samples': 638832, 'steps': 13308, 'loss/train': 2.253067970275879} +07/25/2024 12:28:11 - INFO - __main__ - Step 13310: {'lr': 0.0004912521312481825, 'samples': 638880, 'steps': 13309, 'loss/train': 1.8918368816375732} +07/25/2024 12:28:12 - INFO - __main__ - Step 13311: {'lr': 0.0004912507517846206, 'samples': 638928, 'steps': 13310, 'loss/train': 2.020047426223755} +07/25/2024 12:28:12 - INFO - __main__ - Step 13312: {'lr': 0.0004912493722142397, 'samples': 638976, 'steps': 13311, 'loss/train': 1.3468093872070312} +07/25/2024 12:28:12 - INFO - __main__ - Step 13313: {'lr': 0.0004912479925370402, 'samples': 639024, 'steps': 13312, 'loss/train': 1.7795758247375488} +07/25/2024 12:28:13 - INFO - __main__ - Step 13314: {'lr': 0.000491246612753023, 'samples': 639072, 'steps': 13313, 'loss/train': 1.7514125108718872} +07/25/2024 12:28:13 - INFO - __main__ - Step 13315: {'lr': 0.0004912452328621884, 'samples': 639120, 'steps': 13314, 'loss/train': 2.31938099861145} +07/25/2024 12:28:13 - INFO - __main__ - Step 13316: {'lr': 0.0004912438528645372, 'samples': 639168, 'steps': 13315, 'loss/train': 1.8593043088912964} +07/25/2024 12:28:13 - INFO - __main__ - Step 13317: {'lr': 0.00049124247276007, 'samples': 639216, 'steps': 13316, 'loss/train': 0.923693060874939} +07/25/2024 12:28:14 - INFO - __main__ - Step 13318: {'lr': 0.0004912410925487874, 'samples': 639264, 'steps': 13317, 'loss/train': 1.3270361423492432} +07/25/2024 12:28:14 - INFO - __main__ - Step 13319: {'lr': 0.00049123971223069, 'samples': 639312, 'steps': 13318, 'loss/train': 1.694718837738037} +07/25/2024 12:28:14 - INFO - __main__ - Step 13320: {'lr': 0.0004912383318057783, 'samples': 639360, 'steps': 13319, 'loss/train': 1.896934986114502} +07/25/2024 12:28:15 - INFO - __main__ - Step 13321: {'lr': 0.0004912369512740532, 'samples': 639408, 'steps': 13320, 'loss/train': 2.1775429248809814} +07/25/2024 12:28:15 - INFO - __main__ - Step 13322: {'lr': 0.0004912355706355152, 'samples': 639456, 'steps': 13321, 'loss/train': 2.050793170928955} +07/25/2024 12:28:15 - INFO - __main__ - Step 13323: {'lr': 0.0004912341898901647, 'samples': 639504, 'steps': 13322, 'loss/train': 1.6314328908920288} +07/25/2024 12:28:15 - INFO - __main__ - Step 13324: {'lr': 0.0004912328090380025, 'samples': 639552, 'steps': 13323, 'loss/train': 1.5924452543258667} +07/25/2024 12:28:16 - INFO - __main__ - Step 13325: {'lr': 0.0004912314280790291, 'samples': 639600, 'steps': 13324, 'loss/train': 0.19142846763134003} +07/25/2024 12:28:16 - INFO - __main__ - Step 13326: {'lr': 0.0004912300470132452, 'samples': 639648, 'steps': 13325, 'loss/train': 1.9066137075424194} +07/25/2024 12:28:16 - INFO - __main__ - Step 13327: {'lr': 0.0004912286658406515, 'samples': 639696, 'steps': 13326, 'loss/train': 1.9809061288833618} +07/25/2024 12:28:17 - INFO - __main__ - Step 13328: {'lr': 0.0004912272845612485, 'samples': 639744, 'steps': 13327, 'loss/train': 1.8056364059448242} +07/25/2024 12:28:17 - INFO - __main__ - Step 13329: {'lr': 0.0004912259031750367, 'samples': 639792, 'steps': 13328, 'loss/train': 1.4772456884384155} +07/25/2024 12:28:17 - INFO - __main__ - Step 13330: {'lr': 0.0004912245216820169, 'samples': 639840, 'steps': 13329, 'loss/train': 2.2380247116088867} +07/25/2024 12:28:17 - INFO - __main__ - Step 13331: {'lr': 0.0004912231400821896, 'samples': 639888, 'steps': 13330, 'loss/train': 2.787473440170288} +07/25/2024 12:28:18 - INFO - __main__ - Step 13332: {'lr': 0.0004912217583755555, 'samples': 639936, 'steps': 13331, 'loss/train': 1.7470457553863525} +07/25/2024 12:28:18 - INFO - __main__ - Step 13333: {'lr': 0.0004912203765621152, 'samples': 639984, 'steps': 13332, 'loss/train': 1.7835843563079834} +07/25/2024 12:28:18 - INFO - __main__ - Step 13334: {'lr': 0.0004912189946418692, 'samples': 640032, 'steps': 13333, 'loss/train': 1.8261886835098267} +07/25/2024 12:28:19 - INFO - __main__ - Step 13335: {'lr': 0.0004912176126148181, 'samples': 640080, 'steps': 13334, 'loss/train': 2.183887243270874} +07/25/2024 12:28:19 - INFO - __main__ - Step 13336: {'lr': 0.0004912162304809627, 'samples': 640128, 'steps': 13335, 'loss/train': 2.1722826957702637} +07/25/2024 12:28:19 - INFO - __main__ - Step 13337: {'lr': 0.0004912148482403036, 'samples': 640176, 'steps': 13336, 'loss/train': 2.070310354232788} +07/25/2024 12:28:19 - INFO - __main__ - Step 13338: {'lr': 0.0004912134658928412, 'samples': 640224, 'steps': 13337, 'loss/train': 2.0871481895446777} +07/25/2024 12:28:20 - INFO - __main__ - Step 13339: {'lr': 0.0004912120834385763, 'samples': 640272, 'steps': 13338, 'loss/train': 1.743517279624939} +07/25/2024 12:28:20 - INFO - __main__ - Step 13340: {'lr': 0.0004912107008775094, 'samples': 640320, 'steps': 13339, 'loss/train': 1.664880394935608} +07/25/2024 12:28:20 - INFO - __main__ - Step 13341: {'lr': 0.0004912093182096412, 'samples': 640368, 'steps': 13340, 'loss/train': 1.4417724609375} +07/25/2024 12:28:21 - INFO - __main__ - Step 13342: {'lr': 0.0004912079354349723, 'samples': 640416, 'steps': 13341, 'loss/train': 1.9847278594970703} +07/25/2024 12:28:21 - INFO - __main__ - Step 13343: {'lr': 0.0004912065525535032, 'samples': 640464, 'steps': 13342, 'loss/train': 2.032426357269287} +07/25/2024 12:28:21 - INFO - __main__ - Step 13344: {'lr': 0.0004912051695652347, 'samples': 640512, 'steps': 13343, 'loss/train': 1.809719443321228} +07/25/2024 12:28:21 - INFO - __main__ - Step 13345: {'lr': 0.0004912037864701673, 'samples': 640560, 'steps': 13344, 'loss/train': 2.1764252185821533} +07/25/2024 12:28:22 - INFO - __main__ - Step 13346: {'lr': 0.0004912024032683015, 'samples': 640608, 'steps': 13345, 'loss/train': 2.1833300590515137} +07/25/2024 12:28:22 - INFO - __main__ - Step 13347: {'lr': 0.0004912010199596381, 'samples': 640656, 'steps': 13346, 'loss/train': 1.634742259979248} +07/25/2024 12:28:22 - INFO - __main__ - Step 13348: {'lr': 0.0004911996365441776, 'samples': 640704, 'steps': 13347, 'loss/train': 1.8769140243530273} +07/25/2024 12:28:23 - INFO - __main__ - Step 13349: {'lr': 0.0004911982530219208, 'samples': 640752, 'steps': 13348, 'loss/train': 0.14259998500347137} +07/25/2024 12:28:23 - INFO - __main__ - Step 13350: {'lr': 0.0004911968693928681, 'samples': 640800, 'steps': 13349, 'loss/train': 1.847036361694336} +07/25/2024 12:28:23 - INFO - __main__ - Step 13351: {'lr': 0.0004911954856570202, 'samples': 640848, 'steps': 13350, 'loss/train': 1.7926617860794067} +07/25/2024 12:28:23 - INFO - __main__ - Step 13352: {'lr': 0.0004911941018143776, 'samples': 640896, 'steps': 13351, 'loss/train': 1.745679259300232} +07/25/2024 12:28:24 - INFO - __main__ - Step 13353: {'lr': 0.0004911927178649411, 'samples': 640944, 'steps': 13352, 'loss/train': 2.0091919898986816} +07/25/2024 12:28:24 - INFO - __main__ - Step 13354: {'lr': 0.0004911913338087111, 'samples': 640992, 'steps': 13353, 'loss/train': 1.9881552457809448} +07/25/2024 12:28:24 - INFO - __main__ - Step 13355: {'lr': 0.0004911899496456885, 'samples': 641040, 'steps': 13354, 'loss/train': 3.379818916320801} +07/25/2024 12:28:25 - INFO - __main__ - Step 13356: {'lr': 0.0004911885653758738, 'samples': 641088, 'steps': 13355, 'loss/train': 1.8067219257354736} +07/25/2024 12:28:25 - INFO - __main__ - Step 13357: {'lr': 0.0004911871809992674, 'samples': 641136, 'steps': 13356, 'loss/train': 1.6449804306030273} +07/25/2024 12:28:25 - INFO - __main__ - Step 13358: {'lr': 0.0004911857965158701, 'samples': 641184, 'steps': 13357, 'loss/train': 2.0914573669433594} +07/25/2024 12:28:25 - INFO - __main__ - Step 13359: {'lr': 0.0004911844119256826, 'samples': 641232, 'steps': 13358, 'loss/train': 1.7740581035614014} +07/25/2024 12:28:26 - INFO - __main__ - Step 13360: {'lr': 0.0004911830272287053, 'samples': 641280, 'steps': 13359, 'loss/train': 1.2735137939453125} +07/25/2024 12:28:26 - INFO - __main__ - Step 13361: {'lr': 0.0004911816424249388, 'samples': 641328, 'steps': 13360, 'loss/train': 1.6319218873977661} +07/25/2024 12:28:26 - INFO - __main__ - Step 13362: {'lr': 0.000491180257514384, 'samples': 641376, 'steps': 13361, 'loss/train': 1.9376325607299805} +07/25/2024 12:28:27 - INFO - __main__ - Step 13363: {'lr': 0.0004911788724970413, 'samples': 641424, 'steps': 13362, 'loss/train': 2.0083272457122803} +07/25/2024 12:28:27 - INFO - __main__ - Step 13364: {'lr': 0.0004911774873729113, 'samples': 641472, 'steps': 13363, 'loss/train': 1.8253984451293945} +07/25/2024 12:28:27 - INFO - __main__ - Step 13365: {'lr': 0.0004911761021419947, 'samples': 641520, 'steps': 13364, 'loss/train': 1.546409010887146} +07/25/2024 12:28:27 - INFO - __main__ - Step 13366: {'lr': 0.0004911747168042921, 'samples': 641568, 'steps': 13365, 'loss/train': 2.227895736694336} +07/25/2024 12:28:28 - INFO - __main__ - Step 13367: {'lr': 0.0004911733313598041, 'samples': 641616, 'steps': 13366, 'loss/train': 1.320574164390564} +07/25/2024 12:28:28 - INFO - __main__ - Step 13368: {'lr': 0.0004911719458085313, 'samples': 641664, 'steps': 13367, 'loss/train': 1.9509718418121338} +07/25/2024 12:28:28 - INFO - __main__ - Step 13369: {'lr': 0.0004911705601504743, 'samples': 641712, 'steps': 13368, 'loss/train': 1.5374070405960083} +07/25/2024 12:28:29 - INFO - __main__ - Step 13370: {'lr': 0.0004911691743856337, 'samples': 641760, 'steps': 13369, 'loss/train': 2.11950421333313} +07/25/2024 12:28:29 - INFO - __main__ - Step 13371: {'lr': 0.0004911677885140102, 'samples': 641808, 'steps': 13370, 'loss/train': 1.625062346458435} +07/25/2024 12:28:29 - INFO - __main__ - Step 13372: {'lr': 0.0004911664025356044, 'samples': 641856, 'steps': 13371, 'loss/train': 1.6220660209655762} +07/25/2024 12:28:29 - INFO - __main__ - Step 13373: {'lr': 0.0004911650164504168, 'samples': 641904, 'steps': 13372, 'loss/train': 0.2344159036874771} +07/25/2024 12:28:30 - INFO - __main__ - Step 13374: {'lr': 0.0004911636302584482, 'samples': 641952, 'steps': 13373, 'loss/train': 2.0791404247283936} +07/25/2024 12:28:30 - INFO - __main__ - Step 13375: {'lr': 0.0004911622439596989, 'samples': 642000, 'steps': 13374, 'loss/train': 1.6851096153259277} +07/25/2024 12:28:30 - INFO - __main__ - Step 13376: {'lr': 0.0004911608575541699, 'samples': 642048, 'steps': 13375, 'loss/train': 2.1868014335632324} +07/25/2024 12:28:31 - INFO - __main__ - Step 13377: {'lr': 0.0004911594710418615, 'samples': 642096, 'steps': 13376, 'loss/train': 1.8876887559890747} +07/25/2024 12:28:31 - INFO - __main__ - Step 13378: {'lr': 0.0004911580844227744, 'samples': 642144, 'steps': 13377, 'loss/train': 2.501436948776245} +07/25/2024 12:28:31 - INFO - __main__ - Step 13379: {'lr': 0.0004911566976969094, 'samples': 642192, 'steps': 13378, 'loss/train': 1.8041003942489624} +07/25/2024 12:28:31 - INFO - __main__ - Step 13380: {'lr': 0.000491155310864267, 'samples': 642240, 'steps': 13379, 'loss/train': 1.9683562517166138} +07/25/2024 12:28:32 - INFO - __main__ - Step 13381: {'lr': 0.0004911539239248477, 'samples': 642288, 'steps': 13380, 'loss/train': 2.2275311946868896} +07/25/2024 12:28:32 - INFO - __main__ - Step 13382: {'lr': 0.0004911525368786522, 'samples': 642336, 'steps': 13381, 'loss/train': 2.026981830596924} +07/25/2024 12:28:32 - INFO - __main__ - Step 13383: {'lr': 0.0004911511497256812, 'samples': 642384, 'steps': 13382, 'loss/train': 2.0035128593444824} +07/25/2024 12:28:33 - INFO - __main__ - Step 13384: {'lr': 0.0004911497624659351, 'samples': 642432, 'steps': 13383, 'loss/train': 2.000260591506958} +07/25/2024 12:28:33 - INFO - __main__ - Step 13385: {'lr': 0.0004911483750994147, 'samples': 642480, 'steps': 13384, 'loss/train': 1.2579021453857422} +07/25/2024 12:28:33 - INFO - __main__ - Step 13386: {'lr': 0.0004911469876261206, 'samples': 642528, 'steps': 13385, 'loss/train': 2.3133018016815186} +07/25/2024 12:28:33 - INFO - __main__ - Step 13387: {'lr': 0.0004911456000460532, 'samples': 642576, 'steps': 13386, 'loss/train': 2.385984420776367} +07/25/2024 12:28:34 - INFO - __main__ - Step 13388: {'lr': 0.0004911442123592134, 'samples': 642624, 'steps': 13387, 'loss/train': 1.9169155359268188} +07/25/2024 12:28:34 - INFO - __main__ - Step 13389: {'lr': 0.0004911428245656017, 'samples': 642672, 'steps': 13388, 'loss/train': 2.0682666301727295} +07/25/2024 12:28:34 - INFO - __main__ - Step 13390: {'lr': 0.0004911414366652188, 'samples': 642720, 'steps': 13389, 'loss/train': 1.7404731512069702} +07/25/2024 12:28:34 - INFO - __main__ - Step 13391: {'lr': 0.0004911400486580651, 'samples': 642768, 'steps': 13390, 'loss/train': 1.9748308658599854} +07/25/2024 12:28:35 - INFO - __main__ - Step 13392: {'lr': 0.0004911386605441414, 'samples': 642816, 'steps': 13391, 'loss/train': 1.3329551219940186} +07/25/2024 12:28:35 - INFO - __main__ - Step 13393: {'lr': 0.0004911372723234483, 'samples': 642864, 'steps': 13392, 'loss/train': 2.325974702835083} +07/25/2024 12:28:35 - INFO - __main__ - Step 13394: {'lr': 0.0004911358839959864, 'samples': 642912, 'steps': 13393, 'loss/train': 1.8913413286209106} +07/25/2024 12:28:36 - INFO - __main__ - Step 13395: {'lr': 0.0004911344955617562, 'samples': 642960, 'steps': 13394, 'loss/train': 2.2676186561584473} +07/25/2024 12:28:36 - INFO - __main__ - Step 13396: {'lr': 0.0004911331070207584, 'samples': 643008, 'steps': 13395, 'loss/train': 2.32532000541687} +07/25/2024 12:28:36 - INFO - __main__ - Step 13397: {'lr': 0.0004911317183729936, 'samples': 643056, 'steps': 13396, 'loss/train': 0.35452795028686523} +07/25/2024 12:28:36 - INFO - __main__ - Step 13398: {'lr': 0.0004911303296184625, 'samples': 643104, 'steps': 13397, 'loss/train': 1.7151474952697754} +07/25/2024 12:28:37 - INFO - __main__ - Step 13399: {'lr': 0.0004911289407571656, 'samples': 643152, 'steps': 13398, 'loss/train': 1.661702036857605} +07/25/2024 12:28:37 - INFO - __main__ - Step 13400: {'lr': 0.0004911275517891035, 'samples': 643200, 'steps': 13399, 'loss/train': 1.8932552337646484} +07/25/2024 12:28:37 - INFO - __main__ - Step 13401: {'lr': 0.000491126162714277, 'samples': 643248, 'steps': 13400, 'loss/train': 3.596831798553467} +07/25/2024 12:28:38 - INFO - __main__ - Step 13402: {'lr': 0.0004911247735326865, 'samples': 643296, 'steps': 13401, 'loss/train': 2.1423869132995605} +07/25/2024 12:28:38 - INFO - __main__ - Step 13403: {'lr': 0.0004911233842443327, 'samples': 643344, 'steps': 13402, 'loss/train': 2.048011064529419} +07/25/2024 12:28:38 - INFO - __main__ - Step 13404: {'lr': 0.0004911219948492164, 'samples': 643392, 'steps': 13403, 'loss/train': 1.9831857681274414} +07/25/2024 12:28:38 - INFO - __main__ - Step 13405: {'lr': 0.0004911206053473378, 'samples': 643440, 'steps': 13404, 'loss/train': 2.297760009765625} +07/25/2024 12:28:39 - INFO - __main__ - Step 13406: {'lr': 0.0004911192157386979, 'samples': 643488, 'steps': 13405, 'loss/train': 2.122927665710449} +07/25/2024 12:28:39 - INFO - __main__ - Step 13407: {'lr': 0.0004911178260232971, 'samples': 643536, 'steps': 13406, 'loss/train': 1.8039311170578003} +07/25/2024 12:28:39 - INFO - __main__ - Step 13408: {'lr': 0.0004911164362011361, 'samples': 643584, 'steps': 13407, 'loss/train': 1.7823964357376099} +07/25/2024 12:28:40 - INFO - __main__ - Step 13409: {'lr': 0.0004911150462722155, 'samples': 643632, 'steps': 13408, 'loss/train': 2.3664767742156982} +07/25/2024 12:28:40 - INFO - __main__ - Step 13410: {'lr': 0.0004911136562365359, 'samples': 643680, 'steps': 13409, 'loss/train': 2.3585288524627686} +07/25/2024 12:28:40 - INFO - __main__ - Step 13411: {'lr': 0.000491112266094098, 'samples': 643728, 'steps': 13410, 'loss/train': 1.5707921981811523} +07/25/2024 12:28:40 - INFO - __main__ - Step 13412: {'lr': 0.0004911108758449023, 'samples': 643776, 'steps': 13411, 'loss/train': 2.0366060733795166} +07/25/2024 12:28:41 - INFO - __main__ - Step 13413: {'lr': 0.0004911094854889494, 'samples': 643824, 'steps': 13412, 'loss/train': 1.7726466655731201} +07/25/2024 12:28:41 - INFO - __main__ - Step 13414: {'lr': 0.0004911080950262401, 'samples': 643872, 'steps': 13413, 'loss/train': 2.1567535400390625} +07/25/2024 12:28:41 - INFO - __main__ - Step 13415: {'lr': 0.0004911067044567748, 'samples': 643920, 'steps': 13414, 'loss/train': 1.8486573696136475} +07/25/2024 12:28:42 - INFO - __main__ - Step 13416: {'lr': 0.0004911053137805542, 'samples': 643968, 'steps': 13415, 'loss/train': 1.6739470958709717} +07/25/2024 12:28:42 - INFO - __main__ - Step 13417: {'lr': 0.000491103922997579, 'samples': 644016, 'steps': 13416, 'loss/train': 1.9952220916748047} +07/25/2024 12:28:42 - INFO - __main__ - Step 13418: {'lr': 0.0004911025321078496, 'samples': 644064, 'steps': 13417, 'loss/train': 1.9475075006484985} +07/25/2024 12:28:42 - INFO - __main__ - Step 13419: {'lr': 0.0004911011411113669, 'samples': 644112, 'steps': 13418, 'loss/train': 1.6881345510482788} +07/25/2024 12:28:43 - INFO - __main__ - Step 13420: {'lr': 0.0004910997500081313, 'samples': 644160, 'steps': 13419, 'loss/train': 1.63412344455719} +07/25/2024 12:28:43 - INFO - __main__ - Step 13421: {'lr': 0.0004910983587981435, 'samples': 644208, 'steps': 13420, 'loss/train': 0.20721861720085144} +07/25/2024 12:28:43 - INFO - __main__ - Step 13422: {'lr': 0.0004910969674814041, 'samples': 644256, 'steps': 13421, 'loss/train': 2.1462388038635254} +07/25/2024 12:28:44 - INFO - __main__ - Step 13423: {'lr': 0.0004910955760579137, 'samples': 644304, 'steps': 13422, 'loss/train': 1.7583569288253784} +07/25/2024 12:28:44 - INFO - __main__ - Step 13424: {'lr': 0.000491094184527673, 'samples': 644352, 'steps': 13423, 'loss/train': 2.592414617538452} +07/25/2024 12:28:44 - INFO - __main__ - Step 13425: {'lr': 0.0004910927928906824, 'samples': 644400, 'steps': 13424, 'loss/train': 1.3785048723220825} +07/25/2024 12:28:44 - INFO - __main__ - Step 13426: {'lr': 0.0004910914011469428, 'samples': 644448, 'steps': 13425, 'loss/train': 1.5939520597457886} +07/25/2024 12:28:45 - INFO - __main__ - Step 13427: {'lr': 0.0004910900092964547, 'samples': 644496, 'steps': 13426, 'loss/train': 1.668654203414917} +07/25/2024 12:28:45 - INFO - __main__ - Step 13428: {'lr': 0.0004910886173392186, 'samples': 644544, 'steps': 13427, 'loss/train': 2.390727996826172} +07/25/2024 12:28:45 - INFO - __main__ - Step 13429: {'lr': 0.0004910872252752353, 'samples': 644592, 'steps': 13428, 'loss/train': 2.0260682106018066} +07/25/2024 12:28:46 - INFO - __main__ - Step 13430: {'lr': 0.0004910858331045052, 'samples': 644640, 'steps': 13429, 'loss/train': 1.909855604171753} +07/25/2024 12:28:46 - INFO - __main__ - Step 13431: {'lr': 0.0004910844408270292, 'samples': 644688, 'steps': 13430, 'loss/train': 1.6801081895828247} +07/25/2024 12:28:46 - INFO - __main__ - Step 13432: {'lr': 0.0004910830484428077, 'samples': 644736, 'steps': 13431, 'loss/train': 1.7863526344299316} +07/25/2024 12:28:46 - INFO - __main__ - Step 13433: {'lr': 0.0004910816559518414, 'samples': 644784, 'steps': 13432, 'loss/train': 1.567854404449463} +07/25/2024 12:28:47 - INFO - __main__ - Step 13434: {'lr': 0.0004910802633541309, 'samples': 644832, 'steps': 13433, 'loss/train': 1.5065895318984985} +07/25/2024 12:28:47 - INFO - __main__ - Step 13435: {'lr': 0.0004910788706496769, 'samples': 644880, 'steps': 13434, 'loss/train': 0.9286015629768372} +07/25/2024 12:28:47 - INFO - __main__ - Step 13436: {'lr': 0.0004910774778384798, 'samples': 644928, 'steps': 13435, 'loss/train': 2.1213958263397217} +07/25/2024 12:28:48 - INFO - __main__ - Step 13437: {'lr': 0.0004910760849205403, 'samples': 644976, 'steps': 13436, 'loss/train': 1.927733302116394} +07/25/2024 12:28:48 - INFO - __main__ - Step 13438: {'lr': 0.0004910746918958592, 'samples': 645024, 'steps': 13437, 'loss/train': 2.2882087230682373} +07/25/2024 12:28:48 - INFO - __main__ - Step 13439: {'lr': 0.000491073298764437, 'samples': 645072, 'steps': 13438, 'loss/train': 1.7314131259918213} +07/25/2024 12:28:48 - INFO - __main__ - Step 13440: {'lr': 0.0004910719055262741, 'samples': 645120, 'steps': 13439, 'loss/train': 1.714322566986084} +07/25/2024 12:28:49 - INFO - __main__ - Step 13441: {'lr': 0.0004910705121813715, 'samples': 645168, 'steps': 13440, 'loss/train': 1.8150380849838257} +07/25/2024 12:28:49 - INFO - __main__ - Step 13442: {'lr': 0.0004910691187297296, 'samples': 645216, 'steps': 13441, 'loss/train': 1.6558295488357544} +07/25/2024 12:28:49 - INFO - __main__ - Step 13443: {'lr': 0.0004910677251713491, 'samples': 645264, 'steps': 13442, 'loss/train': 1.8736672401428223} +07/25/2024 12:28:50 - INFO - __main__ - Step 13444: {'lr': 0.0004910663315062304, 'samples': 645312, 'steps': 13443, 'loss/train': 2.1523587703704834} +07/25/2024 12:28:50 - INFO - __main__ - Step 13445: {'lr': 0.0004910649377343743, 'samples': 645360, 'steps': 13444, 'loss/train': 0.15140984952449799} +07/25/2024 12:28:50 - INFO - __main__ - Step 13446: {'lr': 0.0004910635438557815, 'samples': 645408, 'steps': 13445, 'loss/train': 2.096038341522217} +07/25/2024 12:28:50 - INFO - __main__ - Step 13447: {'lr': 0.0004910621498704525, 'samples': 645456, 'steps': 13446, 'loss/train': 2.37156081199646} +07/25/2024 12:28:51 - INFO - __main__ - Step 13448: {'lr': 0.000491060755778388, 'samples': 645504, 'steps': 13447, 'loss/train': 2.3023462295532227} +07/25/2024 12:28:51 - INFO - __main__ - Step 13449: {'lr': 0.0004910593615795884, 'samples': 645552, 'steps': 13448, 'loss/train': 1.4616434574127197} +07/25/2024 12:28:51 - INFO - __main__ - Step 13450: {'lr': 0.0004910579672740545, 'samples': 645600, 'steps': 13449, 'loss/train': 1.347130537033081} +07/25/2024 12:28:52 - INFO - __main__ - Step 13451: {'lr': 0.000491056572861787, 'samples': 645648, 'steps': 13450, 'loss/train': 1.9053860902786255} +07/25/2024 12:28:52 - INFO - __main__ - Step 13452: {'lr': 0.0004910551783427863, 'samples': 645696, 'steps': 13451, 'loss/train': 2.394932746887207} +07/25/2024 12:28:52 - INFO - __main__ - Step 13453: {'lr': 0.0004910537837170531, 'samples': 645744, 'steps': 13452, 'loss/train': 1.8661329746246338} +07/25/2024 12:28:52 - INFO - __main__ - Step 13454: {'lr': 0.0004910523889845881, 'samples': 645792, 'steps': 13453, 'loss/train': 1.6317389011383057} +07/25/2024 12:28:53 - INFO - __main__ - Step 13455: {'lr': 0.0004910509941453919, 'samples': 645840, 'steps': 13454, 'loss/train': 0.6678206920623779} +07/25/2024 12:28:53 - INFO - __main__ - Step 13456: {'lr': 0.000491049599199465, 'samples': 645888, 'steps': 13455, 'loss/train': 1.7120766639709473} +07/25/2024 12:28:53 - INFO - __main__ - Step 13457: {'lr': 0.0004910482041468082, 'samples': 645936, 'steps': 13456, 'loss/train': 2.0359857082366943} +07/25/2024 12:28:54 - INFO - __main__ - Step 13458: {'lr': 0.0004910468089874218, 'samples': 645984, 'steps': 13457, 'loss/train': 1.288477897644043} +07/25/2024 12:28:54 - INFO - __main__ - Step 13459: {'lr': 0.0004910454137213068, 'samples': 646032, 'steps': 13458, 'loss/train': 1.796593189239502} +07/25/2024 12:28:54 - INFO - __main__ - Step 13460: {'lr': 0.0004910440183484636, 'samples': 646080, 'steps': 13459, 'loss/train': 1.8867343664169312} +07/25/2024 12:28:54 - INFO - __main__ - Step 13461: {'lr': 0.0004910426228688929, 'samples': 646128, 'steps': 13460, 'loss/train': 1.5215779542922974} +07/25/2024 12:28:55 - INFO - __main__ - Step 13462: {'lr': 0.0004910412272825953, 'samples': 646176, 'steps': 13461, 'loss/train': 2.668735980987549} +07/25/2024 12:28:55 - INFO - __main__ - Step 13463: {'lr': 0.0004910398315895713, 'samples': 646224, 'steps': 13462, 'loss/train': 1.4841381311416626} +07/25/2024 12:28:55 - INFO - __main__ - Step 13464: {'lr': 0.0004910384357898216, 'samples': 646272, 'steps': 13463, 'loss/train': 2.5762979984283447} +07/25/2024 12:28:55 - INFO - __main__ - Step 13465: {'lr': 0.0004910370398833469, 'samples': 646320, 'steps': 13464, 'loss/train': 1.7004687786102295} +07/25/2024 12:28:56 - INFO - __main__ - Step 13466: {'lr': 0.0004910356438701478, 'samples': 646368, 'steps': 13465, 'loss/train': 1.5114305019378662} +07/25/2024 12:28:56 - INFO - __main__ - Step 13467: {'lr': 0.0004910342477502247, 'samples': 646416, 'steps': 13466, 'loss/train': 1.6891345977783203} +07/25/2024 12:28:56 - INFO - __main__ - Step 13468: {'lr': 0.0004910328515235786, 'samples': 646464, 'steps': 13467, 'loss/train': 1.953537106513977} +07/25/2024 12:28:57 - INFO - __main__ - Step 13469: {'lr': 0.0004910314551902098, 'samples': 646512, 'steps': 13468, 'loss/train': 0.11612390726804733} +07/25/2024 12:28:57 - INFO - __main__ - Step 13470: {'lr': 0.0004910300587501191, 'samples': 646560, 'steps': 13469, 'loss/train': 1.4364955425262451} +07/25/2024 12:28:57 - INFO - __main__ - Step 13471: {'lr': 0.0004910286622033069, 'samples': 646608, 'steps': 13470, 'loss/train': 1.6696614027023315} +07/25/2024 12:28:57 - INFO - __main__ - Step 13472: {'lr': 0.000491027265549774, 'samples': 646656, 'steps': 13471, 'loss/train': 0.7662495374679565} +07/25/2024 12:28:58 - INFO - __main__ - Step 13473: {'lr': 0.0004910258687895211, 'samples': 646704, 'steps': 13472, 'loss/train': 2.239151954650879} +07/25/2024 12:28:58 - INFO - __main__ - Step 13474: {'lr': 0.0004910244719225485, 'samples': 646752, 'steps': 13473, 'loss/train': 1.8046971559524536} +07/25/2024 12:28:58 - INFO - __main__ - Step 13475: {'lr': 0.0004910230749488573, 'samples': 646800, 'steps': 13474, 'loss/train': 1.6548664569854736} +07/25/2024 12:28:59 - INFO - __main__ - Step 13476: {'lr': 0.0004910216778684476, 'samples': 646848, 'steps': 13475, 'loss/train': 1.3977783918380737} +07/25/2024 12:28:59 - INFO - __main__ - Step 13477: {'lr': 0.0004910202806813203, 'samples': 646896, 'steps': 13476, 'loss/train': 1.7494174242019653} +07/25/2024 12:28:59 - INFO - __main__ - Step 13478: {'lr': 0.000491018883387476, 'samples': 646944, 'steps': 13477, 'loss/train': 2.124394416809082} +07/25/2024 12:28:59 - INFO - __main__ - Step 13479: {'lr': 0.0004910174859869153, 'samples': 646992, 'steps': 13478, 'loss/train': 2.0266151428222656} +07/25/2024 12:29:00 - INFO - __main__ - Step 13480: {'lr': 0.0004910160884796388, 'samples': 647040, 'steps': 13479, 'loss/train': 1.9710769653320312} +07/25/2024 12:29:00 - INFO - __main__ - Step 13481: {'lr': 0.0004910146908656471, 'samples': 647088, 'steps': 13480, 'loss/train': 0.8629716634750366} +07/25/2024 12:29:00 - INFO - __main__ - Step 13482: {'lr': 0.0004910132931449409, 'samples': 647136, 'steps': 13481, 'loss/train': 1.5974210500717163} +07/25/2024 12:29:01 - INFO - __main__ - Step 13483: {'lr': 0.0004910118953175208, 'samples': 647184, 'steps': 13482, 'loss/train': 2.2008955478668213} +07/25/2024 12:29:01 - INFO - __main__ - Step 13484: {'lr': 0.0004910104973833872, 'samples': 647232, 'steps': 13483, 'loss/train': 1.8096234798431396} +07/25/2024 12:29:01 - INFO - __main__ - Step 13485: {'lr': 0.0004910090993425411, 'samples': 647280, 'steps': 13484, 'loss/train': 2.4124836921691895} +07/25/2024 12:29:01 - INFO - __main__ - Step 13486: {'lr': 0.0004910077011949828, 'samples': 647328, 'steps': 13485, 'loss/train': 2.0637593269348145} +07/25/2024 12:29:02 - INFO - __main__ - Step 13487: {'lr': 0.0004910063029407132, 'samples': 647376, 'steps': 13486, 'loss/train': 1.654440999031067} +07/25/2024 12:29:02 - INFO - __main__ - Step 13488: {'lr': 0.0004910049045797327, 'samples': 647424, 'steps': 13487, 'loss/train': 2.1033618450164795} +07/25/2024 12:29:02 - INFO - __main__ - Step 13489: {'lr': 0.0004910035061120419, 'samples': 647472, 'steps': 13488, 'loss/train': 1.732772707939148} +07/25/2024 12:29:03 - INFO - __main__ - Step 13490: {'lr': 0.0004910021075376415, 'samples': 647520, 'steps': 13489, 'loss/train': 1.5748951435089111} +07/25/2024 12:29:03 - INFO - __main__ - Step 13491: {'lr': 0.0004910007088565322, 'samples': 647568, 'steps': 13490, 'loss/train': 1.1233742237091064} +07/25/2024 12:29:03 - INFO - __main__ - Step 13492: {'lr': 0.0004909993100687145, 'samples': 647616, 'steps': 13491, 'loss/train': 1.4334250688552856} +07/25/2024 12:29:03 - INFO - __main__ - Step 13493: {'lr': 0.0004909979111741891, 'samples': 647664, 'steps': 13492, 'loss/train': 0.18010374903678894} +07/25/2024 12:29:04 - INFO - __main__ - Step 13494: {'lr': 0.0004909965121729564, 'samples': 647712, 'steps': 13493, 'loss/train': 1.7188451290130615} +07/25/2024 12:29:04 - INFO - __main__ - Step 13495: {'lr': 0.0004909951130650175, 'samples': 647760, 'steps': 13494, 'loss/train': 0.868078887462616} +07/25/2024 12:29:04 - INFO - __main__ - Step 13496: {'lr': 0.0004909937138503725, 'samples': 647808, 'steps': 13495, 'loss/train': 1.6324632167816162} +07/25/2024 12:29:05 - INFO - __main__ - Step 13497: {'lr': 0.0004909923145290223, 'samples': 647856, 'steps': 13496, 'loss/train': 2.1553778648376465} +07/25/2024 12:29:05 - INFO - __main__ - Step 13498: {'lr': 0.0004909909151009674, 'samples': 647904, 'steps': 13497, 'loss/train': 1.7915993928909302} +07/25/2024 12:29:05 - INFO - __main__ - Step 13499: {'lr': 0.0004909895155662085, 'samples': 647952, 'steps': 13498, 'loss/train': 1.906795859336853} +07/25/2024 12:29:05 - INFO - __main__ - Step 13500: {'lr': 0.0004909881159247462, 'samples': 648000, 'steps': 13499, 'loss/train': 0.3141467869281769} +07/25/2024 12:29:06 - INFO - __main__ - Step 13501: {'lr': 0.0004909867161765812, 'samples': 648048, 'steps': 13500, 'loss/train': 1.965643286705017} +07/25/2024 12:29:06 - INFO - __main__ - Step 13502: {'lr': 0.0004909853163217139, 'samples': 648096, 'steps': 13501, 'loss/train': 1.6897679567337036} +07/25/2024 12:29:06 - INFO - __main__ - Step 13503: {'lr': 0.0004909839163601452, 'samples': 648144, 'steps': 13502, 'loss/train': 1.7192258834838867} +07/25/2024 12:29:07 - INFO - __main__ - Step 13504: {'lr': 0.0004909825162918755, 'samples': 648192, 'steps': 13503, 'loss/train': 2.0178065299987793} +07/25/2024 12:29:07 - INFO - __main__ - Step 13505: {'lr': 0.0004909811161169054, 'samples': 648240, 'steps': 13504, 'loss/train': 1.7064900398254395} +07/25/2024 12:29:07 - INFO - __main__ - Step 13506: {'lr': 0.0004909797158352358, 'samples': 648288, 'steps': 13505, 'loss/train': 2.0559444427490234} +07/25/2024 12:29:07 - INFO - __main__ - Step 13507: {'lr': 0.000490978315446867, 'samples': 648336, 'steps': 13506, 'loss/train': 1.9218498468399048} +07/25/2024 12:29:08 - INFO - __main__ - Step 13508: {'lr': 0.0004909769149517998, 'samples': 648384, 'steps': 13507, 'loss/train': 2.2786178588867188} +07/25/2024 12:29:08 - INFO - __main__ - Step 13509: {'lr': 0.0004909755143500347, 'samples': 648432, 'steps': 13508, 'loss/train': 1.9017475843429565} +07/25/2024 12:29:08 - INFO - __main__ - Step 13510: {'lr': 0.0004909741136415725, 'samples': 648480, 'steps': 13509, 'loss/train': 1.794054627418518} +07/25/2024 12:29:09 - INFO - __main__ - Step 13511: {'lr': 0.0004909727128264137, 'samples': 648528, 'steps': 13510, 'loss/train': 1.07135808467865} +07/25/2024 12:29:09 - INFO - __main__ - Step 13512: {'lr': 0.0004909713119045589, 'samples': 648576, 'steps': 13511, 'loss/train': 1.906472086906433} +07/25/2024 12:29:09 - INFO - __main__ - Step 13513: {'lr': 0.0004909699108760087, 'samples': 648624, 'steps': 13512, 'loss/train': 1.8974480628967285} +07/25/2024 12:29:09 - INFO - __main__ - Step 13514: {'lr': 0.0004909685097407639, 'samples': 648672, 'steps': 13513, 'loss/train': 1.6106857061386108} +07/25/2024 12:29:10 - INFO - __main__ - Step 13515: {'lr': 0.000490967108498825, 'samples': 648720, 'steps': 13514, 'loss/train': 1.5887906551361084} +07/25/2024 12:29:10 - INFO - __main__ - Step 13516: {'lr': 0.0004909657071501925, 'samples': 648768, 'steps': 13515, 'loss/train': 1.7108287811279297} +07/25/2024 12:29:10 - INFO - __main__ - Step 13517: {'lr': 0.0004909643056948672, 'samples': 648816, 'steps': 13516, 'loss/train': 1.9172062873840332} +07/25/2024 12:29:11 - INFO - __main__ - Step 13518: {'lr': 0.0004909629041328496, 'samples': 648864, 'steps': 13517, 'loss/train': 1.7028217315673828} +07/25/2024 12:29:11 - INFO - __main__ - Step 13519: {'lr': 0.0004909615024641404, 'samples': 648912, 'steps': 13518, 'loss/train': 0.5304627418518066} +07/25/2024 12:29:11 - INFO - __main__ - Step 13520: {'lr': 0.0004909601006887401, 'samples': 648960, 'steps': 13519, 'loss/train': 1.6988261938095093} +07/25/2024 12:29:11 - INFO - __main__ - Step 13521: {'lr': 0.0004909586988066497, 'samples': 649008, 'steps': 13520, 'loss/train': 1.8737449645996094} +07/25/2024 12:29:12 - INFO - __main__ - Step 13522: {'lr': 0.0004909572968178694, 'samples': 649056, 'steps': 13521, 'loss/train': 1.8341715335845947} +07/25/2024 12:29:12 - INFO - __main__ - Step 13523: {'lr': 0.0004909558947223999, 'samples': 649104, 'steps': 13522, 'loss/train': 1.6686766147613525} +07/25/2024 12:29:12 - INFO - __main__ - Step 13524: {'lr': 0.000490954492520242, 'samples': 649152, 'steps': 13523, 'loss/train': 1.771942377090454} +07/25/2024 12:29:13 - INFO - __main__ - Step 13525: {'lr': 0.000490953090211396, 'samples': 649200, 'steps': 13524, 'loss/train': 1.5884582996368408} +07/25/2024 12:29:13 - INFO - __main__ - Step 13526: {'lr': 0.0004909516877958629, 'samples': 649248, 'steps': 13525, 'loss/train': 1.2118364572525024} +07/25/2024 12:29:13 - INFO - __main__ - Step 13527: {'lr': 0.000490950285273643, 'samples': 649296, 'steps': 13526, 'loss/train': 1.509028673171997} +07/25/2024 12:29:13 - INFO - __main__ - Step 13528: {'lr': 0.0004909488826447372, 'samples': 649344, 'steps': 13527, 'loss/train': 1.8594348430633545} +07/25/2024 12:29:14 - INFO - __main__ - Step 13529: {'lr': 0.000490947479909146, 'samples': 649392, 'steps': 13528, 'loss/train': 1.572474718093872} +07/25/2024 12:29:14 - INFO - __main__ - Step 13530: {'lr': 0.0004909460770668698, 'samples': 649440, 'steps': 13529, 'loss/train': 1.4545906782150269} +07/25/2024 12:29:14 - INFO - __main__ - Step 13531: {'lr': 0.0004909446741179097, 'samples': 649488, 'steps': 13530, 'loss/train': 2.158578634262085} +07/25/2024 12:29:15 - INFO - __main__ - Step 13532: {'lr': 0.0004909432710622659, 'samples': 649536, 'steps': 13531, 'loss/train': 2.0636825561523438} +07/25/2024 12:29:15 - INFO - __main__ - Step 13533: {'lr': 0.0004909418678999393, 'samples': 649584, 'steps': 13532, 'loss/train': 2.0689337253570557} +07/25/2024 12:29:15 - INFO - __main__ - Step 13534: {'lr': 0.0004909404646309302, 'samples': 649632, 'steps': 13533, 'loss/train': 2.0558996200561523} +07/25/2024 12:29:15 - INFO - __main__ - Step 13535: {'lr': 0.0004909390612552395, 'samples': 649680, 'steps': 13534, 'loss/train': 1.0035725831985474} +07/25/2024 12:29:16 - INFO - __main__ - Step 13536: {'lr': 0.0004909376577728677, 'samples': 649728, 'steps': 13535, 'loss/train': 1.4105310440063477} +07/25/2024 12:29:16 - INFO - __main__ - Step 13537: {'lr': 0.0004909362541838155, 'samples': 649776, 'steps': 13536, 'loss/train': 2.168311357498169} +07/25/2024 12:29:16 - INFO - __main__ - Step 13538: {'lr': 0.0004909348504880836, 'samples': 649824, 'steps': 13537, 'loss/train': 2.0152604579925537} +07/25/2024 12:29:17 - INFO - __main__ - Step 13539: {'lr': 0.0004909334466856722, 'samples': 649872, 'steps': 13538, 'loss/train': 1.8731167316436768} +07/25/2024 12:29:17 - INFO - __main__ - Step 13540: {'lr': 0.0004909320427765824, 'samples': 649920, 'steps': 13539, 'loss/train': 0.6924852132797241} +07/25/2024 12:29:17 - INFO - __main__ - Step 13541: {'lr': 0.0004909306387608146, 'samples': 649968, 'steps': 13540, 'loss/train': 2.0437192916870117} +07/25/2024 12:29:17 - INFO - __main__ - Step 13542: {'lr': 0.0004909292346383696, 'samples': 650016, 'steps': 13541, 'loss/train': 1.8911538124084473} +07/25/2024 12:29:18 - INFO - __main__ - Step 13543: {'lr': 0.0004909278304092478, 'samples': 650064, 'steps': 13542, 'loss/train': 2.1815457344055176} +07/25/2024 12:29:18 - INFO - __main__ - Step 13544: {'lr': 0.0004909264260734499, 'samples': 650112, 'steps': 13543, 'loss/train': 1.6455719470977783} +07/25/2024 12:29:18 - INFO - __main__ - Step 13545: {'lr': 0.0004909250216309764, 'samples': 650160, 'steps': 13544, 'loss/train': 1.55140221118927} +07/25/2024 12:29:19 - INFO - __main__ - Step 13546: {'lr': 0.0004909236170818282, 'samples': 650208, 'steps': 13545, 'loss/train': 1.882353663444519} +07/25/2024 12:29:19 - INFO - __main__ - Step 13547: {'lr': 0.0004909222124260058, 'samples': 650256, 'steps': 13546, 'loss/train': 1.6194920539855957} +07/25/2024 12:29:19 - INFO - __main__ - Step 13548: {'lr': 0.0004909208076635097, 'samples': 650304, 'steps': 13547, 'loss/train': 2.1353068351745605} +07/25/2024 12:29:19 - INFO - __main__ - Step 13549: {'lr': 0.0004909194027943407, 'samples': 650352, 'steps': 13548, 'loss/train': 1.794211506843567} +07/25/2024 12:29:20 - INFO - __main__ - Step 13550: {'lr': 0.0004909179978184993, 'samples': 650400, 'steps': 13549, 'loss/train': 1.762786626815796} +07/25/2024 12:29:20 - INFO - __main__ - Step 13551: {'lr': 0.0004909165927359861, 'samples': 650448, 'steps': 13550, 'loss/train': 2.074235439300537} +07/25/2024 12:29:20 - INFO - __main__ - Step 13552: {'lr': 0.0004909151875468018, 'samples': 650496, 'steps': 13551, 'loss/train': 1.6902542114257812} +07/25/2024 12:29:20 - INFO - __main__ - Step 13553: {'lr': 0.0004909137822509471, 'samples': 650544, 'steps': 13552, 'loss/train': 2.0393002033233643} +07/25/2024 12:29:21 - INFO - __main__ - Step 13554: {'lr': 0.0004909123768484225, 'samples': 650592, 'steps': 13553, 'loss/train': 1.7145271301269531} +07/25/2024 12:29:21 - INFO - __main__ - Step 13555: {'lr': 0.0004909109713392286, 'samples': 650640, 'steps': 13554, 'loss/train': 2.0523996353149414} +07/25/2024 12:29:21 - INFO - __main__ - Step 13556: {'lr': 0.0004909095657233661, 'samples': 650688, 'steps': 13555, 'loss/train': 2.054743528366089} +07/25/2024 12:29:22 - INFO - __main__ - Step 13557: {'lr': 0.0004909081600008356, 'samples': 650736, 'steps': 13556, 'loss/train': 2.067807674407959} +07/25/2024 12:29:22 - INFO - __main__ - Step 13558: {'lr': 0.0004909067541716376, 'samples': 650784, 'steps': 13557, 'loss/train': 1.8477226495742798} +07/25/2024 12:29:22 - INFO - __main__ - Step 13559: {'lr': 0.000490905348235773, 'samples': 650832, 'steps': 13558, 'loss/train': 1.745383381843567} +07/25/2024 12:29:22 - INFO - __main__ - Step 13560: {'lr': 0.0004909039421932422, 'samples': 650880, 'steps': 13559, 'loss/train': 1.676658272743225} +07/25/2024 12:29:23 - INFO - __main__ - Step 13561: {'lr': 0.0004909025360440459, 'samples': 650928, 'steps': 13560, 'loss/train': 1.6598501205444336} +07/25/2024 12:29:23 - INFO - __main__ - Step 13562: {'lr': 0.0004909011297881847, 'samples': 650976, 'steps': 13561, 'loss/train': 1.9904810190200806} +07/25/2024 12:29:23 - INFO - __main__ - Step 13563: {'lr': 0.0004908997234256592, 'samples': 651024, 'steps': 13562, 'loss/train': 1.841902732849121} +07/25/2024 12:29:24 - INFO - __main__ - Step 13564: {'lr': 0.0004908983169564701, 'samples': 651072, 'steps': 13563, 'loss/train': 2.256208658218384} +07/25/2024 12:29:24 - INFO - __main__ - Step 13565: {'lr': 0.0004908969103806179, 'samples': 651120, 'steps': 13564, 'loss/train': 2.1198136806488037} +07/25/2024 12:29:24 - INFO - __main__ - Step 13566: {'lr': 0.0004908955036981034, 'samples': 651168, 'steps': 13565, 'loss/train': 1.9085631370544434} +07/25/2024 12:29:24 - INFO - __main__ - Step 13567: {'lr': 0.000490894096908927, 'samples': 651216, 'steps': 13566, 'loss/train': 1.9439162015914917} +07/25/2024 12:29:25 - INFO - __main__ - Step 13568: {'lr': 0.0004908926900130896, 'samples': 651264, 'steps': 13567, 'loss/train': 1.3759403228759766} +07/25/2024 12:29:25 - INFO - __main__ - Step 13569: {'lr': 0.0004908912830105916, 'samples': 651312, 'steps': 13568, 'loss/train': 1.5329419374465942} +07/25/2024 12:29:25 - INFO - __main__ - Step 13570: {'lr': 0.0004908898759014336, 'samples': 651360, 'steps': 13569, 'loss/train': 1.9175318479537964} +07/25/2024 12:29:26 - INFO - __main__ - Step 13571: {'lr': 0.0004908884686856164, 'samples': 651408, 'steps': 13570, 'loss/train': 1.7658025026321411} +07/25/2024 12:29:26 - INFO - __main__ - Step 13572: {'lr': 0.0004908870613631406, 'samples': 651456, 'steps': 13571, 'loss/train': 1.7384246587753296} +07/25/2024 12:29:26 - INFO - __main__ - Step 13573: {'lr': 0.0004908856539340066, 'samples': 651504, 'steps': 13572, 'loss/train': 2.0791265964508057} +07/25/2024 12:29:26 - INFO - __main__ - Step 13574: {'lr': 0.0004908842463982152, 'samples': 651552, 'steps': 13573, 'loss/train': 1.7907981872558594} +07/25/2024 12:29:27 - INFO - __main__ - Step 13575: {'lr': 0.0004908828387557671, 'samples': 651600, 'steps': 13574, 'loss/train': 2.0253779888153076} +07/25/2024 12:29:27 - INFO - __main__ - Step 13576: {'lr': 0.0004908814310066628, 'samples': 651648, 'steps': 13575, 'loss/train': 1.8846752643585205} +07/25/2024 12:29:27 - INFO - __main__ - Step 13577: {'lr': 0.0004908800231509029, 'samples': 651696, 'steps': 13576, 'loss/train': 1.460425853729248} +07/25/2024 12:29:28 - INFO - __main__ - Step 13578: {'lr': 0.0004908786151884882, 'samples': 651744, 'steps': 13577, 'loss/train': 1.1198688745498657} +07/25/2024 12:29:28 - INFO - __main__ - Step 13579: {'lr': 0.0004908772071194191, 'samples': 651792, 'steps': 13578, 'loss/train': 2.1122939586639404} +07/25/2024 12:29:28 - INFO - __main__ - Step 13580: {'lr': 0.0004908757989436963, 'samples': 651840, 'steps': 13579, 'loss/train': 2.2138524055480957} +07/25/2024 12:29:28 - INFO - __main__ - Step 13581: {'lr': 0.0004908743906613205, 'samples': 651888, 'steps': 13580, 'loss/train': 2.1384875774383545} +07/25/2024 12:29:29 - INFO - __main__ - Step 13582: {'lr': 0.0004908729822722923, 'samples': 651936, 'steps': 13581, 'loss/train': 2.286118745803833} +07/25/2024 12:29:29 - INFO - __main__ - Step 13583: {'lr': 0.0004908715737766122, 'samples': 651984, 'steps': 13582, 'loss/train': 2.1261982917785645} +07/25/2024 12:29:29 - INFO - __main__ - Step 13584: {'lr': 0.0004908701651742811, 'samples': 652032, 'steps': 13583, 'loss/train': 2.1129677295684814} +07/25/2024 12:29:30 - INFO - __main__ - Step 13585: {'lr': 0.0004908687564652992, 'samples': 652080, 'steps': 13584, 'loss/train': 1.9017224311828613} +07/25/2024 12:29:30 - INFO - __main__ - Step 13586: {'lr': 0.0004908673476496675, 'samples': 652128, 'steps': 13585, 'loss/train': 1.5935612916946411} +07/25/2024 12:29:30 - INFO - __main__ - Step 13587: {'lr': 0.0004908659387273865, 'samples': 652176, 'steps': 13586, 'loss/train': 2.4746668338775635} +07/25/2024 12:29:30 - INFO - __main__ - Step 13588: {'lr': 0.0004908645296984568, 'samples': 652224, 'steps': 13587, 'loss/train': 1.7777888774871826} +07/25/2024 12:29:31 - INFO - __main__ - Step 13589: {'lr': 0.0004908631205628789, 'samples': 652272, 'steps': 13588, 'loss/train': 1.978813648223877} +07/25/2024 12:29:31 - INFO - __main__ - Step 13590: {'lr': 0.0004908617113206536, 'samples': 652320, 'steps': 13589, 'loss/train': 2.1106865406036377} +07/25/2024 12:29:31 - INFO - __main__ - Step 13591: {'lr': 0.0004908603019717816, 'samples': 652368, 'steps': 13590, 'loss/train': 3.41055965423584} +07/25/2024 12:29:32 - INFO - __main__ - Step 13592: {'lr': 0.0004908588925162634, 'samples': 652416, 'steps': 13591, 'loss/train': 1.7860798835754395} +07/25/2024 12:29:32 - INFO - __main__ - Step 13593: {'lr': 0.0004908574829540996, 'samples': 652464, 'steps': 13592, 'loss/train': 1.5820506811141968} +07/25/2024 12:29:32 - INFO - __main__ - Step 13594: {'lr': 0.0004908560732852908, 'samples': 652512, 'steps': 13593, 'loss/train': 1.6725778579711914} +07/25/2024 12:29:32 - INFO - __main__ - Step 13595: {'lr': 0.0004908546635098377, 'samples': 652560, 'steps': 13594, 'loss/train': 1.5674147605895996} +07/25/2024 12:29:33 - INFO - __main__ - Step 13596: {'lr': 0.000490853253627741, 'samples': 652608, 'steps': 13595, 'loss/train': 2.439293384552002} +07/25/2024 12:29:33 - INFO - __main__ - Step 13597: {'lr': 0.000490851843639001, 'samples': 652656, 'steps': 13596, 'loss/train': 1.969218134880066} +07/25/2024 12:29:33 - INFO - __main__ - Step 13598: {'lr': 0.0004908504335436188, 'samples': 652704, 'steps': 13597, 'loss/train': 2.100083589553833} +07/25/2024 12:29:34 - INFO - __main__ - Step 13599: {'lr': 0.0004908490233415946, 'samples': 652752, 'steps': 13598, 'loss/train': 1.9952318668365479} +07/25/2024 12:29:34 - INFO - __main__ - Step 13600: {'lr': 0.0004908476130329293, 'samples': 652800, 'steps': 13599, 'loss/train': 1.9069395065307617} +07/25/2024 12:29:34 - INFO - __main__ - Step 13601: {'lr': 0.0004908462026176234, 'samples': 652848, 'steps': 13600, 'loss/train': 1.4352269172668457} +07/25/2024 12:29:34 - INFO - __main__ - Step 13602: {'lr': 0.0004908447920956776, 'samples': 652896, 'steps': 13601, 'loss/train': 2.0148048400878906} +07/25/2024 12:29:35 - INFO - __main__ - Step 13603: {'lr': 0.0004908433814670924, 'samples': 652944, 'steps': 13602, 'loss/train': 1.9798765182495117} +07/25/2024 12:29:35 - INFO - __main__ - Step 13604: {'lr': 0.0004908419707318686, 'samples': 652992, 'steps': 13603, 'loss/train': 1.6629540920257568} +07/25/2024 12:29:35 - INFO - __main__ - Step 13605: {'lr': 0.0004908405598900066, 'samples': 653040, 'steps': 13604, 'loss/train': 1.8280850648880005} +07/25/2024 12:29:36 - INFO - __main__ - Step 13606: {'lr': 0.0004908391489415072, 'samples': 653088, 'steps': 13605, 'loss/train': 2.0349059104919434} +07/25/2024 12:29:36 - INFO - __main__ - Step 13607: {'lr': 0.000490837737886371, 'samples': 653136, 'steps': 13606, 'loss/train': 2.0097973346710205} +07/25/2024 12:29:36 - INFO - __main__ - Step 13608: {'lr': 0.0004908363267245986, 'samples': 653184, 'steps': 13607, 'loss/train': 1.7512726783752441} +07/25/2024 12:29:36 - INFO - __main__ - Step 13609: {'lr': 0.0004908349154561906, 'samples': 653232, 'steps': 13608, 'loss/train': 1.54532790184021} +07/25/2024 12:29:37 - INFO - __main__ - Step 13610: {'lr': 0.0004908335040811476, 'samples': 653280, 'steps': 13609, 'loss/train': 0.46157336235046387} +07/25/2024 12:29:37 - INFO - __main__ - Step 13611: {'lr': 0.0004908320925994703, 'samples': 653328, 'steps': 13610, 'loss/train': 0.3740866482257843} +07/25/2024 12:29:37 - INFO - __main__ - Step 13612: {'lr': 0.0004908306810111593, 'samples': 653376, 'steps': 13611, 'loss/train': 1.1771001815795898} +07/25/2024 12:29:38 - INFO - __main__ - Step 13613: {'lr': 0.0004908292693162152, 'samples': 653424, 'steps': 13612, 'loss/train': 1.9572323560714722} +07/25/2024 12:29:38 - INFO - __main__ - Step 13614: {'lr': 0.0004908278575146387, 'samples': 653472, 'steps': 13613, 'loss/train': 1.26993989944458} +07/25/2024 12:29:38 - INFO - __main__ - Step 13615: {'lr': 0.0004908264456064303, 'samples': 653520, 'steps': 13614, 'loss/train': 2.61796498298645} +07/25/2024 12:29:38 - INFO - __main__ - Step 13616: {'lr': 0.0004908250335915908, 'samples': 653568, 'steps': 13615, 'loss/train': 2.093352794647217} +07/25/2024 12:29:39 - INFO - __main__ - Step 13617: {'lr': 0.0004908236214701206, 'samples': 653616, 'steps': 13616, 'loss/train': 1.9933408498764038} +07/25/2024 12:29:39 - INFO - __main__ - Step 13618: {'lr': 0.0004908222092420205, 'samples': 653664, 'steps': 13617, 'loss/train': 1.4652793407440186} +07/25/2024 12:29:39 - INFO - __main__ - Step 13619: {'lr': 0.0004908207969072912, 'samples': 653712, 'steps': 13618, 'loss/train': 1.7356969118118286} +07/25/2024 12:29:40 - INFO - __main__ - Step 13620: {'lr': 0.0004908193844659331, 'samples': 653760, 'steps': 13619, 'loss/train': 2.1264560222625732} +07/25/2024 12:29:40 - INFO - __main__ - Step 13621: {'lr': 0.0004908179719179469, 'samples': 653808, 'steps': 13620, 'loss/train': 2.0074915885925293} +07/25/2024 12:29:40 - INFO - __main__ - Step 13622: {'lr': 0.0004908165592633333, 'samples': 653856, 'steps': 13621, 'loss/train': 1.774470329284668} +07/25/2024 12:29:40 - INFO - __main__ - Step 13623: {'lr': 0.0004908151465020928, 'samples': 653904, 'steps': 13622, 'loss/train': 1.9326591491699219} +07/25/2024 12:29:41 - INFO - __main__ - Step 13624: {'lr': 0.0004908137336342262, 'samples': 653952, 'steps': 13623, 'loss/train': 1.5135786533355713} +07/25/2024 12:29:41 - INFO - __main__ - Step 13625: {'lr': 0.0004908123206597339, 'samples': 654000, 'steps': 13624, 'loss/train': 1.3245108127593994} +07/25/2024 12:29:41 - INFO - __main__ - Step 13626: {'lr': 0.0004908109075786168, 'samples': 654048, 'steps': 13625, 'loss/train': 1.9543801546096802} +07/25/2024 12:29:41 - INFO - __main__ - Step 13627: {'lr': 0.0004908094943908753, 'samples': 654096, 'steps': 13626, 'loss/train': 1.7961431741714478} +07/25/2024 12:29:42 - INFO - __main__ - Step 13628: {'lr': 0.0004908080810965101, 'samples': 654144, 'steps': 13627, 'loss/train': 1.6030086278915405} +07/25/2024 12:29:42 - INFO - __main__ - Step 13629: {'lr': 0.0004908066676955218, 'samples': 654192, 'steps': 13628, 'loss/train': 1.6702632904052734} +07/25/2024 12:29:42 - INFO - __main__ - Step 13630: {'lr': 0.0004908052541879111, 'samples': 654240, 'steps': 13629, 'loss/train': 1.7739827632904053} +07/25/2024 12:29:43 - INFO - __main__ - Step 13631: {'lr': 0.0004908038405736785, 'samples': 654288, 'steps': 13630, 'loss/train': 1.6056675910949707} +07/25/2024 12:29:43 - INFO - __main__ - Step 13632: {'lr': 0.0004908024268528248, 'samples': 654336, 'steps': 13631, 'loss/train': 2.242053985595703} +07/25/2024 12:29:43 - INFO - __main__ - Step 13633: {'lr': 0.0004908010130253506, 'samples': 654384, 'steps': 13632, 'loss/train': 2.4771041870117188} +07/25/2024 12:29:43 - INFO - __main__ - Step 13634: {'lr': 0.0004907995990912563, 'samples': 654432, 'steps': 13633, 'loss/train': 0.39944979548454285} +07/25/2024 12:29:44 - INFO - __main__ - Step 13635: {'lr': 0.0004907981850505428, 'samples': 654480, 'steps': 13634, 'loss/train': 0.35275566577911377} +07/25/2024 12:29:44 - INFO - __main__ - Step 13636: {'lr': 0.0004907967709032105, 'samples': 654528, 'steps': 13635, 'loss/train': 2.2655715942382812} +07/25/2024 12:29:44 - INFO - __main__ - Step 13637: {'lr': 0.0004907953566492602, 'samples': 654576, 'steps': 13636, 'loss/train': 2.1632044315338135} +07/25/2024 12:29:45 - INFO - __main__ - Step 13638: {'lr': 0.0004907939422886925, 'samples': 654624, 'steps': 13637, 'loss/train': 1.7957149744033813} +07/25/2024 12:29:45 - INFO - __main__ - Step 13639: {'lr': 0.000490792527821508, 'samples': 654672, 'steps': 13638, 'loss/train': 1.6297082901000977} +07/25/2024 12:29:45 - INFO - __main__ - Step 13640: {'lr': 0.0004907911132477072, 'samples': 654720, 'steps': 13639, 'loss/train': 1.9696201086044312} +07/25/2024 12:29:45 - INFO - __main__ - Step 13641: {'lr': 0.0004907896985672909, 'samples': 654768, 'steps': 13640, 'loss/train': 1.5686264038085938} +07/25/2024 12:29:46 - INFO - __main__ - Step 13642: {'lr': 0.0004907882837802596, 'samples': 654816, 'steps': 13641, 'loss/train': 1.7687355279922485} +07/25/2024 12:29:46 - INFO - __main__ - Step 13643: {'lr': 0.0004907868688866141, 'samples': 654864, 'steps': 13642, 'loss/train': 2.303760528564453} +07/25/2024 12:29:46 - INFO - __main__ - Step 13644: {'lr': 0.0004907854538863549, 'samples': 654912, 'steps': 13643, 'loss/train': 1.8281103372573853} +07/25/2024 12:29:47 - INFO - __main__ - Step 13645: {'lr': 0.0004907840387794826, 'samples': 654960, 'steps': 13644, 'loss/train': 2.2291038036346436} +07/25/2024 12:29:47 - INFO - __main__ - Step 13646: {'lr': 0.000490782623565998, 'samples': 655008, 'steps': 13645, 'loss/train': 2.031863212585449} +07/25/2024 12:29:47 - INFO - __main__ - Step 13647: {'lr': 0.0004907812082459015, 'samples': 655056, 'steps': 13646, 'loss/train': 2.064377784729004} +07/25/2024 12:29:47 - INFO - __main__ - Step 13648: {'lr': 0.0004907797928191939, 'samples': 655104, 'steps': 13647, 'loss/train': 1.4702794551849365} +07/25/2024 12:29:48 - INFO - __main__ - Step 13649: {'lr': 0.0004907783772858756, 'samples': 655152, 'steps': 13648, 'loss/train': 1.196541666984558} +07/25/2024 12:29:48 - INFO - __main__ - Step 13650: {'lr': 0.0004907769616459475, 'samples': 655200, 'steps': 13649, 'loss/train': 1.9449940919876099} +07/25/2024 12:29:48 - INFO - __main__ - Step 13651: {'lr': 0.0004907755458994101, 'samples': 655248, 'steps': 13650, 'loss/train': 2.1558799743652344} +07/25/2024 12:29:49 - INFO - __main__ - Step 13652: {'lr': 0.000490774130046264, 'samples': 655296, 'steps': 13651, 'loss/train': 1.7425203323364258} +07/25/2024 12:29:49 - INFO - __main__ - Step 13653: {'lr': 0.00049077271408651, 'samples': 655344, 'steps': 13652, 'loss/train': 1.6463584899902344} +07/25/2024 12:29:49 - INFO - __main__ - Step 13654: {'lr': 0.0004907712980201484, 'samples': 655392, 'steps': 13653, 'loss/train': 1.6699265241622925} +07/25/2024 12:29:49 - INFO - __main__ - Step 13655: {'lr': 0.0004907698818471801, 'samples': 655440, 'steps': 13654, 'loss/train': 2.172133684158325} +07/25/2024 12:29:50 - INFO - __main__ - Step 13656: {'lr': 0.0004907684655676056, 'samples': 655488, 'steps': 13655, 'loss/train': 1.8942437171936035} +07/25/2024 12:29:50 - INFO - __main__ - Step 13657: {'lr': 0.0004907670491814256, 'samples': 655536, 'steps': 13656, 'loss/train': 2.0885112285614014} +07/25/2024 12:29:50 - INFO - __main__ - Step 13658: {'lr': 0.0004907656326886406, 'samples': 655584, 'steps': 13657, 'loss/train': 0.3526363968849182} +07/25/2024 12:29:51 - INFO - __main__ - Step 13659: {'lr': 0.0004907642160892515, 'samples': 655632, 'steps': 13658, 'loss/train': 0.6044798493385315} +07/25/2024 12:29:51 - INFO - __main__ - Step 13660: {'lr': 0.0004907627993832586, 'samples': 655680, 'steps': 13659, 'loss/train': 1.660314917564392} +07/25/2024 12:29:51 - INFO - __main__ - Step 13661: {'lr': 0.0004907613825706628, 'samples': 655728, 'steps': 13660, 'loss/train': 1.8951882123947144} +07/25/2024 12:29:51 - INFO - __main__ - Step 13662: {'lr': 0.0004907599656514645, 'samples': 655776, 'steps': 13661, 'loss/train': 2.2195370197296143} +07/25/2024 12:29:52 - INFO - __main__ - Step 13663: {'lr': 0.0004907585486256645, 'samples': 655824, 'steps': 13662, 'loss/train': 1.8485620021820068} +07/25/2024 12:29:52 - INFO - __main__ - Step 13664: {'lr': 0.0004907571314932634, 'samples': 655872, 'steps': 13663, 'loss/train': 1.7512317895889282} +07/25/2024 12:29:52 - INFO - __main__ - Step 13665: {'lr': 0.0004907557142542617, 'samples': 655920, 'steps': 13664, 'loss/train': 1.9185022115707397} +07/25/2024 12:29:53 - INFO - __main__ - Step 13666: {'lr': 0.0004907542969086602, 'samples': 655968, 'steps': 13665, 'loss/train': 1.6271826028823853} +07/25/2024 12:29:53 - INFO - __main__ - Step 13667: {'lr': 0.0004907528794564594, 'samples': 656016, 'steps': 13666, 'loss/train': 2.3278417587280273} +07/25/2024 12:29:53 - INFO - __main__ - Step 13668: {'lr': 0.00049075146189766, 'samples': 656064, 'steps': 13667, 'loss/train': 2.2901453971862793} +07/25/2024 12:29:53 - INFO - __main__ - Step 13669: {'lr': 0.0004907500442322626, 'samples': 656112, 'steps': 13668, 'loss/train': 1.9810197353363037} +07/25/2024 12:29:54 - INFO - __main__ - Step 13670: {'lr': 0.0004907486264602677, 'samples': 656160, 'steps': 13669, 'loss/train': 2.111443519592285} +07/25/2024 12:29:54 - INFO - __main__ - Step 13671: {'lr': 0.0004907472085816762, 'samples': 656208, 'steps': 13670, 'loss/train': 2.0735526084899902} +07/25/2024 12:29:54 - INFO - __main__ - Step 13672: {'lr': 0.0004907457905964885, 'samples': 656256, 'steps': 13671, 'loss/train': 1.4397896528244019} +07/25/2024 12:29:55 - INFO - __main__ - Step 13673: {'lr': 0.0004907443725047052, 'samples': 656304, 'steps': 13672, 'loss/train': 1.0385754108428955} +07/25/2024 12:29:55 - INFO - __main__ - Step 13674: {'lr': 0.0004907429543063273, 'samples': 656352, 'steps': 13673, 'loss/train': 1.912104606628418} +07/25/2024 12:29:55 - INFO - __main__ - Step 13675: {'lr': 0.000490741536001355, 'samples': 656400, 'steps': 13674, 'loss/train': 2.0478169918060303} +07/25/2024 12:29:55 - INFO - __main__ - Step 13676: {'lr': 0.0004907401175897892, 'samples': 656448, 'steps': 13675, 'loss/train': 1.9635009765625} +07/25/2024 12:29:56 - INFO - __main__ - Step 13677: {'lr': 0.0004907386990716302, 'samples': 656496, 'steps': 13676, 'loss/train': 1.6492599248886108} +07/25/2024 12:29:56 - INFO - __main__ - Step 13678: {'lr': 0.0004907372804468791, 'samples': 656544, 'steps': 13677, 'loss/train': 1.821710228919983} +07/25/2024 12:29:56 - INFO - __main__ - Step 13679: {'lr': 0.0004907358617155362, 'samples': 656592, 'steps': 13678, 'loss/train': 1.913407325744629} +07/25/2024 12:29:57 - INFO - __main__ - Step 13680: {'lr': 0.0004907344428776022, 'samples': 656640, 'steps': 13679, 'loss/train': 1.6695975065231323} +07/25/2024 12:29:57 - INFO - __main__ - Step 13681: {'lr': 0.0004907330239330778, 'samples': 656688, 'steps': 13680, 'loss/train': 1.9279483556747437} +07/25/2024 12:29:57 - INFO - __main__ - Step 13682: {'lr': 0.0004907316048819634, 'samples': 656736, 'steps': 13681, 'loss/train': 1.078069806098938} +07/25/2024 12:29:57 - INFO - __main__ - Step 13683: {'lr': 0.0004907301857242599, 'samples': 656784, 'steps': 13682, 'loss/train': 0.8285669088363647} +07/25/2024 12:29:58 - INFO - __main__ - Step 13684: {'lr': 0.0004907287664599678, 'samples': 656832, 'steps': 13683, 'loss/train': 0.9778413772583008} +07/25/2024 12:29:58 - INFO - __main__ - Step 13685: {'lr': 0.0004907273470890877, 'samples': 656880, 'steps': 13684, 'loss/train': 1.9049923419952393} +07/25/2024 12:29:58 - INFO - __main__ - Step 13686: {'lr': 0.0004907259276116202, 'samples': 656928, 'steps': 13685, 'loss/train': 1.9837132692337036} +07/25/2024 12:29:59 - INFO - __main__ - Step 13687: {'lr': 0.0004907245080275662, 'samples': 656976, 'steps': 13686, 'loss/train': 1.2264801263809204} +07/25/2024 12:29:59 - INFO - __main__ - Step 13688: {'lr': 0.0004907230883369261, 'samples': 657024, 'steps': 13687, 'loss/train': 1.7529562711715698} +07/25/2024 12:29:59 - INFO - __main__ - Step 13689: {'lr': 0.0004907216685397004, 'samples': 657072, 'steps': 13688, 'loss/train': 1.9265503883361816} +07/25/2024 12:29:59 - INFO - __main__ - Step 13690: {'lr': 0.0004907202486358901, 'samples': 657120, 'steps': 13689, 'loss/train': 1.5373306274414062} +07/25/2024 12:30:00 - INFO - __main__ - Step 13691: {'lr': 0.0004907188286254956, 'samples': 657168, 'steps': 13690, 'loss/train': 2.3526265621185303} +07/25/2024 12:30:00 - INFO - __main__ - Step 13692: {'lr': 0.0004907174085085174, 'samples': 657216, 'steps': 13691, 'loss/train': 1.9371371269226074} +07/25/2024 12:30:00 - INFO - __main__ - Step 13693: {'lr': 0.0004907159882849563, 'samples': 657264, 'steps': 13692, 'loss/train': 2.1455132961273193} +07/25/2024 12:30:01 - INFO - __main__ - Step 13694: {'lr': 0.0004907145679548129, 'samples': 657312, 'steps': 13693, 'loss/train': 2.043491840362549} +07/25/2024 12:30:01 - INFO - __main__ - Step 13695: {'lr': 0.000490713147518088, 'samples': 657360, 'steps': 13694, 'loss/train': 1.752427577972412} +07/25/2024 12:30:01 - INFO - __main__ - Step 13696: {'lr': 0.0004907117269747819, 'samples': 657408, 'steps': 13695, 'loss/train': 1.1354999542236328} +07/25/2024 12:30:01 - INFO - __main__ - Step 13697: {'lr': 0.0004907103063248955, 'samples': 657456, 'steps': 13696, 'loss/train': 1.0853370428085327} +07/25/2024 12:30:02 - INFO - __main__ - Step 13698: {'lr': 0.0004907088855684292, 'samples': 657504, 'steps': 13697, 'loss/train': 1.6759161949157715} +07/25/2024 12:30:02 - INFO - __main__ - Step 13699: {'lr': 0.0004907074647053838, 'samples': 657552, 'steps': 13698, 'loss/train': 2.0699210166931152} +07/25/2024 12:30:02 - INFO - __main__ - Step 13700: {'lr': 0.0004907060437357598, 'samples': 657600, 'steps': 13699, 'loss/train': 1.7131935358047485} +07/25/2024 12:30:02 - INFO - __main__ - Step 13701: {'lr': 0.0004907046226595581, 'samples': 657648, 'steps': 13700, 'loss/train': 1.8006808757781982} +07/25/2024 12:30:03 - INFO - __main__ - Step 13702: {'lr': 0.000490703201476779, 'samples': 657696, 'steps': 13701, 'loss/train': 1.224454402923584} +07/25/2024 12:30:03 - INFO - __main__ - Step 13703: {'lr': 0.0004907017801874234, 'samples': 657744, 'steps': 13702, 'loss/train': 1.8110318183898926} +07/25/2024 12:30:03 - INFO - __main__ - Step 13704: {'lr': 0.0004907003587914917, 'samples': 657792, 'steps': 13703, 'loss/train': 1.7013790607452393} +07/25/2024 12:30:04 - INFO - __main__ - Step 13705: {'lr': 0.0004906989372889846, 'samples': 657840, 'steps': 13704, 'loss/train': 1.67000412940979} +07/25/2024 12:30:04 - INFO - __main__ - Step 13706: {'lr': 0.0004906975156799028, 'samples': 657888, 'steps': 13705, 'loss/train': 1.895196795463562} +07/25/2024 12:30:04 - INFO - __main__ - Step 13707: {'lr': 0.0004906960939642469, 'samples': 657936, 'steps': 13706, 'loss/train': 0.35970398783683777} +07/25/2024 12:30:04 - INFO - __main__ - Step 13708: {'lr': 0.0004906946721420175, 'samples': 657984, 'steps': 13707, 'loss/train': 1.3514595031738281} +07/25/2024 12:30:05 - INFO - __main__ - Step 13709: {'lr': 0.0004906932502132151, 'samples': 658032, 'steps': 13708, 'loss/train': 3.03909969329834} +07/25/2024 12:30:05 - INFO - __main__ - Step 13710: {'lr': 0.0004906918281778407, 'samples': 658080, 'steps': 13709, 'loss/train': 1.337239146232605} +07/25/2024 12:30:05 - INFO - __main__ - Step 13711: {'lr': 0.0004906904060358946, 'samples': 658128, 'steps': 13710, 'loss/train': 0.5470759272575378} +07/25/2024 12:30:06 - INFO - __main__ - Step 13712: {'lr': 0.0004906889837873775, 'samples': 658176, 'steps': 13711, 'loss/train': 1.0221425294876099} +07/25/2024 12:30:06 - INFO - __main__ - Step 13713: {'lr': 0.00049068756143229, 'samples': 658224, 'steps': 13712, 'loss/train': 1.5352860689163208} +07/25/2024 12:30:06 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488853 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 12:30:06 - INFO - __main__ - Step 13714: {'lr': 0.0004906861389706329, 'samples': 658272, 'steps': 13713, 'loss/train': 1.8719518184661865} +07/25/2024 12:30:06 - INFO - __main__ - Step 13715: {'lr': 0.0004906847164024067, 'samples': 658320, 'steps': 13714, 'loss/train': 1.9155436754226685} +07/25/2024 12:30:07 - INFO - __main__ - Step 13716: {'lr': 0.0004906832937276121, 'samples': 658368, 'steps': 13715, 'loss/train': 2.026272773742676} +07/25/2024 12:30:07 - INFO - __main__ - Step 13717: {'lr': 0.0004906818709462496, 'samples': 658416, 'steps': 13716, 'loss/train': 1.621715784072876} +07/25/2024 12:30:07 - INFO - __main__ - Step 13718: {'lr': 0.00049068044805832, 'samples': 658464, 'steps': 13717, 'loss/train': 2.15860915184021} +07/25/2024 12:30:08 - INFO - __main__ - Step 13719: {'lr': 0.0004906790250638238, 'samples': 658512, 'steps': 13718, 'loss/train': 2.1262311935424805} +07/25/2024 12:30:08 - INFO - __main__ - Step 13720: {'lr': 0.0004906776019627617, 'samples': 658560, 'steps': 13719, 'loss/train': 1.250451922416687} +07/25/2024 12:30:08 - INFO - __main__ - Step 13721: {'lr': 0.0004906761787551342, 'samples': 658608, 'steps': 13720, 'loss/train': 1.0815017223358154} +07/25/2024 12:30:08 - INFO - __main__ - Step 13722: {'lr': 0.0004906747554409421, 'samples': 658656, 'steps': 13721, 'loss/train': 1.4339845180511475} +07/25/2024 12:30:09 - INFO - __main__ - Step 13723: {'lr': 0.0004906733320201859, 'samples': 658704, 'steps': 13722, 'loss/train': 1.8243019580841064} +07/25/2024 12:30:09 - INFO - __main__ - Step 13724: {'lr': 0.0004906719084928664, 'samples': 658752, 'steps': 13723, 'loss/train': 1.7540957927703857} +07/25/2024 12:30:09 - INFO - __main__ - Step 13725: {'lr': 0.000490670484858984, 'samples': 658800, 'steps': 13724, 'loss/train': 1.5833041667938232} +07/25/2024 12:30:10 - INFO - __main__ - Step 13726: {'lr': 0.0004906690611185396, 'samples': 658848, 'steps': 13725, 'loss/train': 1.5715539455413818} +07/25/2024 12:30:10 - INFO - __main__ - Step 13727: {'lr': 0.0004906676372715336, 'samples': 658896, 'steps': 13726, 'loss/train': 0.4817504584789276} +07/25/2024 12:30:10 - INFO - __main__ - Step 13728: {'lr': 0.0004906662133179667, 'samples': 658944, 'steps': 13727, 'loss/train': 1.3298993110656738} +07/25/2024 12:30:10 - INFO - __main__ - Step 13729: {'lr': 0.0004906647892578396, 'samples': 658992, 'steps': 13728, 'loss/train': 2.00264310836792} +07/25/2024 12:30:11 - INFO - __main__ - Step 13730: {'lr': 0.0004906633650911529, 'samples': 659040, 'steps': 13729, 'loss/train': 1.9383662939071655} +07/25/2024 12:30:11 - INFO - __main__ - Step 13731: {'lr': 0.0004906619408179071, 'samples': 659088, 'steps': 13730, 'loss/train': 0.33990225195884705} +07/25/2024 12:30:11 - INFO - __main__ - Step 13732: {'lr': 0.000490660516438103, 'samples': 659136, 'steps': 13731, 'loss/train': 2.1342966556549072} +07/25/2024 12:30:12 - INFO - __main__ - Step 13733: {'lr': 0.0004906590919517411, 'samples': 659184, 'steps': 13732, 'loss/train': 2.4896984100341797} +07/25/2024 12:30:12 - INFO - __main__ - Step 13734: {'lr': 0.0004906576673588221, 'samples': 659232, 'steps': 13733, 'loss/train': 1.3946561813354492} +07/25/2024 12:30:12 - INFO - __main__ - Step 13735: {'lr': 0.0004906562426593467, 'samples': 659280, 'steps': 13734, 'loss/train': 2.666571617126465} +07/25/2024 12:30:12 - INFO - __main__ - Step 13736: {'lr': 0.0004906548178533154, 'samples': 659328, 'steps': 13735, 'loss/train': 1.9962598085403442} +07/25/2024 12:30:13 - INFO - __main__ - Step 13737: {'lr': 0.000490653392940729, 'samples': 659376, 'steps': 13736, 'loss/train': 1.8706122636795044} +07/25/2024 12:30:13 - INFO - __main__ - Step 13738: {'lr': 0.0004906519679215879, 'samples': 659424, 'steps': 13737, 'loss/train': 1.6605336666107178} +07/25/2024 12:30:13 - INFO - __main__ - Step 13739: {'lr': 0.0004906505427958929, 'samples': 659472, 'steps': 13738, 'loss/train': 1.930383563041687} +07/25/2024 12:30:14 - INFO - __main__ - Step 13740: {'lr': 0.0004906491175636446, 'samples': 659520, 'steps': 13739, 'loss/train': 1.9183728694915771} +07/25/2024 12:30:14 - INFO - __main__ - Step 13741: {'lr': 0.0004906476922248436, 'samples': 659568, 'steps': 13740, 'loss/train': 2.1341192722320557} +07/25/2024 12:30:14 - INFO - __main__ - Step 13742: {'lr': 0.0004906462667794905, 'samples': 659616, 'steps': 13741, 'loss/train': 1.8389959335327148} +07/25/2024 12:30:14 - INFO - __main__ - Step 13743: {'lr': 0.000490644841227586, 'samples': 659664, 'steps': 13742, 'loss/train': 1.8327505588531494} +07/25/2024 12:30:15 - INFO - __main__ - Step 13744: {'lr': 0.0004906434155691307, 'samples': 659712, 'steps': 13743, 'loss/train': 2.7746763229370117} +07/25/2024 12:30:15 - INFO - __main__ - Step 13745: {'lr': 0.0004906419898041253, 'samples': 659760, 'steps': 13744, 'loss/train': 0.9550520181655884} +07/25/2024 12:30:15 - INFO - __main__ - Step 13746: {'lr': 0.0004906405639325703, 'samples': 659808, 'steps': 13745, 'loss/train': 0.7585316300392151} +07/25/2024 12:30:16 - INFO - __main__ - Step 13747: {'lr': 0.0004906391379544665, 'samples': 659856, 'steps': 13746, 'loss/train': 1.8061224222183228} +07/25/2024 12:30:16 - INFO - __main__ - Step 13748: {'lr': 0.0004906377118698143, 'samples': 659904, 'steps': 13747, 'loss/train': 1.9268144369125366} +07/25/2024 12:30:16 - INFO - __main__ - Step 13749: {'lr': 0.0004906362856786145, 'samples': 659952, 'steps': 13748, 'loss/train': 1.4798182249069214} +07/25/2024 12:30:16 - INFO - __main__ - Step 13750: {'lr': 0.0004906348593808677, 'samples': 660000, 'steps': 13749, 'loss/train': 0.2982509136199951} +07/25/2024 12:30:17 - INFO - __main__ - Step 13751: {'lr': 0.0004906334329765746, 'samples': 660048, 'steps': 13750, 'loss/train': 0.3581664562225342} +07/25/2024 12:30:17 - INFO - __main__ - Step 13752: {'lr': 0.0004906320064657357, 'samples': 660096, 'steps': 13751, 'loss/train': 1.5613757371902466} +07/25/2024 12:30:17 - INFO - __main__ - Step 13753: {'lr': 0.0004906305798483517, 'samples': 660144, 'steps': 13752, 'loss/train': 1.8073068857192993} +07/25/2024 12:30:18 - INFO - __main__ - Step 13754: {'lr': 0.0004906291531244232, 'samples': 660192, 'steps': 13753, 'loss/train': 0.6058164834976196} +07/25/2024 12:30:18 - INFO - __main__ - Step 13755: {'lr': 0.0004906277262939508, 'samples': 660240, 'steps': 13754, 'loss/train': 0.5169596672058105} +07/25/2024 12:30:18 - INFO - __main__ - Step 13756: {'lr': 0.0004906262993569353, 'samples': 660288, 'steps': 13755, 'loss/train': 0.8938136100769043} +07/25/2024 12:30:18 - INFO - __main__ - Step 13757: {'lr': 0.0004906248723133772, 'samples': 660336, 'steps': 13756, 'loss/train': 2.4260008335113525} +07/25/2024 12:30:19 - INFO - __main__ - Step 13758: {'lr': 0.000490623445163277, 'samples': 660384, 'steps': 13757, 'loss/train': 1.5761555433273315} +07/25/2024 12:30:19 - INFO - __main__ - Step 13759: {'lr': 0.0004906220179066357, 'samples': 660432, 'steps': 13758, 'loss/train': 2.538360118865967} +07/25/2024 12:30:19 - INFO - __main__ - Step 13760: {'lr': 0.0004906205905434536, 'samples': 660480, 'steps': 13759, 'loss/train': 1.918597936630249} +07/25/2024 12:30:20 - INFO - __main__ - Step 13761: {'lr': 0.0004906191630737315, 'samples': 660528, 'steps': 13760, 'loss/train': 1.384584903717041} +07/25/2024 12:30:20 - INFO - __main__ - Step 13762: {'lr': 0.0004906177354974699, 'samples': 660576, 'steps': 13761, 'loss/train': 1.6930502653121948} +07/25/2024 12:30:20 - INFO - __main__ - Step 13763: {'lr': 0.0004906163078146695, 'samples': 660624, 'steps': 13762, 'loss/train': 2.1046245098114014} +07/25/2024 12:30:20 - INFO - __main__ - Step 13764: {'lr': 0.000490614880025331, 'samples': 660672, 'steps': 13763, 'loss/train': 1.9234728813171387} +07/25/2024 12:30:21 - INFO - __main__ - Step 13765: {'lr': 0.000490613452129455, 'samples': 660720, 'steps': 13764, 'loss/train': 1.8114545345306396} +07/25/2024 12:30:21 - INFO - __main__ - Step 13766: {'lr': 0.000490612024127042, 'samples': 660768, 'steps': 13765, 'loss/train': 1.6999633312225342} +07/25/2024 12:30:21 - INFO - __main__ - Step 13767: {'lr': 0.0004906105960180928, 'samples': 660816, 'steps': 13766, 'loss/train': 2.030719757080078} +07/25/2024 12:30:22 - INFO - __main__ - Step 13768: {'lr': 0.000490609167802608, 'samples': 660864, 'steps': 13767, 'loss/train': 1.7053433656692505} +07/25/2024 12:30:22 - INFO - __main__ - Step 13769: {'lr': 0.0004906077394805881, 'samples': 660912, 'steps': 13768, 'loss/train': 0.9278839230537415} +07/25/2024 12:30:22 - INFO - __main__ - Step 13770: {'lr': 0.0004906063110520339, 'samples': 660960, 'steps': 13769, 'loss/train': 1.8961818218231201} +07/25/2024 12:30:22 - INFO - __main__ - Step 13771: {'lr': 0.000490604882516946, 'samples': 661008, 'steps': 13770, 'loss/train': 1.3852005004882812} +07/25/2024 12:30:23 - INFO - __main__ - Step 13772: {'lr': 0.0004906034538753251, 'samples': 661056, 'steps': 13771, 'loss/train': 2.067913055419922} +07/25/2024 12:30:23 - INFO - __main__ - Step 13773: {'lr': 0.0004906020251271715, 'samples': 661104, 'steps': 13772, 'loss/train': 1.966786503791809} +07/25/2024 12:30:23 - INFO - __main__ - Step 13774: {'lr': 0.0004906005962724862, 'samples': 661152, 'steps': 13773, 'loss/train': 0.1670711189508438} +07/25/2024 12:30:24 - INFO - __main__ - Step 13775: {'lr': 0.0004905991673112696, 'samples': 661200, 'steps': 13774, 'loss/train': 0.23820866644382477} +07/25/2024 12:30:24 - INFO - __main__ - Step 13776: {'lr': 0.0004905977382435226, 'samples': 661248, 'steps': 13775, 'loss/train': 1.6176629066467285} +07/25/2024 12:30:24 - INFO - __main__ - Step 13777: {'lr': 0.0004905963090692455, 'samples': 661296, 'steps': 13776, 'loss/train': 2.054833173751831} +07/25/2024 12:30:24 - INFO - __main__ - Step 13778: {'lr': 0.0004905948797884392, 'samples': 661344, 'steps': 13777, 'loss/train': 1.897213339805603} +07/25/2024 12:30:25 - INFO - __main__ - Step 13779: {'lr': 0.0004905934504011041, 'samples': 661392, 'steps': 13778, 'loss/train': 2.0827338695526123} +07/25/2024 12:30:25 - INFO - __main__ - Step 13780: {'lr': 0.0004905920209072411, 'samples': 661440, 'steps': 13779, 'loss/train': 1.0999541282653809} +07/25/2024 12:30:25 - INFO - __main__ - Step 13781: {'lr': 0.0004905905913068507, 'samples': 661488, 'steps': 13780, 'loss/train': 0.8999850749969482} +07/25/2024 12:30:26 - INFO - __main__ - Step 13782: {'lr': 0.0004905891615999333, 'samples': 661536, 'steps': 13781, 'loss/train': 1.5268484354019165} +07/25/2024 12:30:26 - INFO - __main__ - Step 13783: {'lr': 0.00049058773178649, 'samples': 661584, 'steps': 13782, 'loss/train': 2.5195157527923584} +07/25/2024 12:30:26 - INFO - __main__ - Step 13784: {'lr': 0.0004905863018665211, 'samples': 661632, 'steps': 13783, 'loss/train': 1.852459192276001} +07/25/2024 12:30:26 - INFO - __main__ - Step 13785: {'lr': 0.0004905848718400274, 'samples': 661680, 'steps': 13784, 'loss/train': 1.7866814136505127} +07/25/2024 12:30:27 - INFO - __main__ - Step 13786: {'lr': 0.0004905834417070095, 'samples': 661728, 'steps': 13785, 'loss/train': 2.054051637649536} +07/25/2024 12:30:27 - INFO - __main__ - Step 13787: {'lr': 0.0004905820114674679, 'samples': 661776, 'steps': 13786, 'loss/train': 1.5362145900726318} +07/25/2024 12:30:27 - INFO - __main__ - Step 13788: {'lr': 0.0004905805811214033, 'samples': 661824, 'steps': 13787, 'loss/train': 1.8546197414398193} +07/25/2024 12:30:27 - INFO - __main__ - Step 13789: {'lr': 0.0004905791506688163, 'samples': 661872, 'steps': 13788, 'loss/train': 1.9422738552093506} +07/25/2024 12:30:28 - INFO - __main__ - Step 13790: {'lr': 0.0004905777201097077, 'samples': 661920, 'steps': 13789, 'loss/train': 2.1670267581939697} +07/25/2024 12:30:28 - INFO - __main__ - Step 13791: {'lr': 0.000490576289444078, 'samples': 661968, 'steps': 13790, 'loss/train': 1.8605972528457642} +07/25/2024 12:30:28 - INFO - __main__ - Step 13792: {'lr': 0.0004905748586719279, 'samples': 662016, 'steps': 13791, 'loss/train': 1.721867322921753} +07/25/2024 12:30:29 - INFO - __main__ - Step 13793: {'lr': 0.000490573427793258, 'samples': 662064, 'steps': 13792, 'loss/train': 0.909599781036377} +07/25/2024 12:30:29 - INFO - __main__ - Step 13794: {'lr': 0.0004905719968080689, 'samples': 662112, 'steps': 13793, 'loss/train': 2.009373903274536} +07/25/2024 12:30:29 - INFO - __main__ - Step 13795: {'lr': 0.0004905705657163612, 'samples': 662160, 'steps': 13794, 'loss/train': 2.4983506202697754} +07/25/2024 12:30:29 - INFO - __main__ - Step 13796: {'lr': 0.0004905691345181356, 'samples': 662208, 'steps': 13795, 'loss/train': 1.5177065134048462} +07/25/2024 12:30:30 - INFO - __main__ - Step 13797: {'lr': 0.0004905677032133928, 'samples': 662256, 'steps': 13796, 'loss/train': 1.3907166719436646} +07/25/2024 12:30:30 - INFO - __main__ - Step 13798: {'lr': 0.0004905662718021333, 'samples': 662304, 'steps': 13797, 'loss/train': 0.1972341388463974} +07/25/2024 12:30:30 - INFO - __main__ - Step 13799: {'lr': 0.0004905648402843579, 'samples': 662352, 'steps': 13798, 'loss/train': 0.12391670048236847} +07/25/2024 12:30:31 - INFO - __main__ - Step 13800: {'lr': 0.000490563408660067, 'samples': 662400, 'steps': 13799, 'loss/train': 1.799787163734436} +07/25/2024 12:30:31 - INFO - __main__ - Step 13801: {'lr': 0.0004905619769292614, 'samples': 662448, 'steps': 13800, 'loss/train': 2.4513304233551025} +07/25/2024 12:30:31 - INFO - __main__ - Step 13802: {'lr': 0.0004905605450919417, 'samples': 662496, 'steps': 13801, 'loss/train': 2.1620354652404785} +07/25/2024 12:30:31 - INFO - __main__ - Step 13803: {'lr': 0.0004905591131481086, 'samples': 662544, 'steps': 13802, 'loss/train': 1.8586703538894653} +07/25/2024 12:30:32 - INFO - __main__ - Step 13804: {'lr': 0.0004905576810977626, 'samples': 662592, 'steps': 13803, 'loss/train': 2.098703145980835} +07/25/2024 12:30:32 - INFO - __main__ - Step 13805: {'lr': 0.0004905562489409044, 'samples': 662640, 'steps': 13804, 'loss/train': 1.3266103267669678} +07/25/2024 12:30:32 - INFO - __main__ - Step 13806: {'lr': 0.0004905548166775347, 'samples': 662688, 'steps': 13805, 'loss/train': 1.3109967708587646} +07/25/2024 12:30:33 - INFO - __main__ - Step 13807: {'lr': 0.000490553384307654, 'samples': 662736, 'steps': 13806, 'loss/train': 2.055807590484619} +07/25/2024 12:30:33 - INFO - __main__ - Step 13808: {'lr': 0.000490551951831263, 'samples': 662784, 'steps': 13807, 'loss/train': 1.9317667484283447} +07/25/2024 12:30:33 - INFO - __main__ - Step 13809: {'lr': 0.0004905505192483624, 'samples': 662832, 'steps': 13808, 'loss/train': 1.8947091102600098} +07/25/2024 12:30:33 - INFO - __main__ - Step 13810: {'lr': 0.0004905490865589527, 'samples': 662880, 'steps': 13809, 'loss/train': 1.6695144176483154} +07/25/2024 12:30:34 - INFO - __main__ - Step 13811: {'lr': 0.0004905476537630345, 'samples': 662928, 'steps': 13810, 'loss/train': 2.29260516166687} +07/25/2024 12:30:34 - INFO - __main__ - Step 13812: {'lr': 0.0004905462208606088, 'samples': 662976, 'steps': 13811, 'loss/train': 1.4902642965316772} +07/25/2024 12:30:34 - INFO - __main__ - Step 13813: {'lr': 0.0004905447878516758, 'samples': 663024, 'steps': 13812, 'loss/train': 1.4741239547729492} +07/25/2024 12:30:35 - INFO - __main__ - Step 13814: {'lr': 0.0004905433547362363, 'samples': 663072, 'steps': 13813, 'loss/train': 1.9679183959960938} +07/25/2024 12:30:35 - INFO - __main__ - Step 13815: {'lr': 0.000490541921514291, 'samples': 663120, 'steps': 13814, 'loss/train': 1.684737205505371} +07/25/2024 12:30:35 - INFO - __main__ - Step 13816: {'lr': 0.0004905404881858404, 'samples': 663168, 'steps': 13815, 'loss/train': 1.6827925443649292} +07/25/2024 12:30:35 - INFO - __main__ - Step 13817: {'lr': 0.0004905390547508854, 'samples': 663216, 'steps': 13816, 'loss/train': 0.8565113544464111} +07/25/2024 12:30:36 - INFO - __main__ - Step 13818: {'lr': 0.0004905376212094262, 'samples': 663264, 'steps': 13817, 'loss/train': 1.8695913553237915} +07/25/2024 12:30:36 - INFO - __main__ - Step 13819: {'lr': 0.0004905361875614639, 'samples': 663312, 'steps': 13818, 'loss/train': 1.8546912670135498} +07/25/2024 12:30:36 - INFO - __main__ - Step 13820: {'lr': 0.0004905347538069989, 'samples': 663360, 'steps': 13819, 'loss/train': 1.9381946325302124} +07/25/2024 12:30:37 - INFO - __main__ - Step 13821: {'lr': 0.0004905333199460317, 'samples': 663408, 'steps': 13820, 'loss/train': 1.36117422580719} +07/25/2024 12:30:37 - INFO - __main__ - Step 13822: {'lr': 0.0004905318859785631, 'samples': 663456, 'steps': 13821, 'loss/train': 0.17059940099716187} +07/25/2024 12:30:37 - INFO - __main__ - Step 13823: {'lr': 0.0004905304519045938, 'samples': 663504, 'steps': 13822, 'loss/train': 0.2366388887166977} +07/25/2024 12:30:37 - INFO - __main__ - Step 13824: {'lr': 0.0004905290177241243, 'samples': 663552, 'steps': 13823, 'loss/train': 1.744413137435913} +07/25/2024 12:30:38 - INFO - __main__ - Step 13825: {'lr': 0.0004905275834371553, 'samples': 663600, 'steps': 13824, 'loss/train': 0.6916587948799133} +07/25/2024 12:30:38 - INFO - __main__ - Step 13826: {'lr': 0.0004905261490436876, 'samples': 663648, 'steps': 13825, 'loss/train': 1.6723392009735107} +07/25/2024 12:30:38 - INFO - __main__ - Step 13827: {'lr': 0.0004905247145437214, 'samples': 663696, 'steps': 13826, 'loss/train': 1.7711338996887207} +07/25/2024 12:30:39 - INFO - __main__ - Step 13828: {'lr': 0.0004905232799372577, 'samples': 663744, 'steps': 13827, 'loss/train': 1.711762547492981} +07/25/2024 12:30:39 - INFO - __main__ - Step 13829: {'lr': 0.0004905218452242971, 'samples': 663792, 'steps': 13828, 'loss/train': 1.902468204498291} +07/25/2024 12:30:39 - INFO - __main__ - Step 13830: {'lr': 0.00049052041040484, 'samples': 663840, 'steps': 13829, 'loss/train': 2.14300537109375} +07/25/2024 12:30:39 - INFO - __main__ - Step 13831: {'lr': 0.0004905189754788873, 'samples': 663888, 'steps': 13830, 'loss/train': 1.8443189859390259} +07/25/2024 12:30:40 - INFO - __main__ - Step 13832: {'lr': 0.0004905175404464395, 'samples': 663936, 'steps': 13831, 'loss/train': 2.0167338848114014} +07/25/2024 12:30:40 - INFO - __main__ - Step 13833: {'lr': 0.0004905161053074974, 'samples': 663984, 'steps': 13832, 'loss/train': 1.9289520978927612} +07/25/2024 12:30:40 - INFO - __main__ - Step 13834: {'lr': 0.0004905146700620614, 'samples': 664032, 'steps': 13833, 'loss/train': 1.5421019792556763} +07/25/2024 12:30:41 - INFO - __main__ - Step 13835: {'lr': 0.0004905132347101323, 'samples': 664080, 'steps': 13834, 'loss/train': 2.560851812362671} +07/25/2024 12:30:41 - INFO - __main__ - Step 13836: {'lr': 0.0004905117992517106, 'samples': 664128, 'steps': 13835, 'loss/train': 1.9191174507141113} +07/25/2024 12:30:41 - INFO - __main__ - Step 13837: {'lr': 0.000490510363686797, 'samples': 664176, 'steps': 13836, 'loss/train': 1.8814870119094849} +07/25/2024 12:30:41 - INFO - __main__ - Step 13838: {'lr': 0.0004905089280153922, 'samples': 664224, 'steps': 13837, 'loss/train': 1.4993077516555786} +07/25/2024 12:30:42 - INFO - __main__ - Step 13839: {'lr': 0.0004905074922374969, 'samples': 664272, 'steps': 13838, 'loss/train': 2.20436429977417} +07/25/2024 12:30:42 - INFO - __main__ - Step 13840: {'lr': 0.0004905060563531116, 'samples': 664320, 'steps': 13839, 'loss/train': 1.731062650680542} +07/25/2024 12:30:42 - INFO - __main__ - Step 13841: {'lr': 0.0004905046203622367, 'samples': 664368, 'steps': 13840, 'loss/train': 0.7757837772369385} +07/25/2024 12:30:43 - INFO - __main__ - Step 13842: {'lr': 0.0004905031842648733, 'samples': 664416, 'steps': 13841, 'loss/train': 1.8717844486236572} +07/25/2024 12:30:43 - INFO - __main__ - Step 13843: {'lr': 0.0004905017480610218, 'samples': 664464, 'steps': 13842, 'loss/train': 0.8724381327629089} +07/25/2024 12:30:43 - INFO - __main__ - Step 13844: {'lr': 0.0004905003117506828, 'samples': 664512, 'steps': 13843, 'loss/train': 1.9266494512557983} +07/25/2024 12:30:43 - INFO - __main__ - Step 13845: {'lr': 0.0004904988753338571, 'samples': 664560, 'steps': 13844, 'loss/train': 1.2967902421951294} +07/25/2024 12:30:44 - INFO - __main__ - Step 13846: {'lr': 0.0004904974388105452, 'samples': 664608, 'steps': 13845, 'loss/train': 0.09311746060848236} +07/25/2024 12:30:44 - INFO - __main__ - Step 13847: {'lr': 0.0004904960021807478, 'samples': 664656, 'steps': 13846, 'loss/train': 0.19031836092472076} +07/25/2024 12:30:44 - INFO - __main__ - Step 13848: {'lr': 0.0004904945654444655, 'samples': 664704, 'steps': 13847, 'loss/train': 2.0763957500457764} +07/25/2024 12:30:45 - INFO - __main__ - Step 13849: {'lr': 0.0004904931286016989, 'samples': 664752, 'steps': 13848, 'loss/train': 0.43671467900276184} +07/25/2024 12:30:45 - INFO - __main__ - Step 13850: {'lr': 0.0004904916916524487, 'samples': 664800, 'steps': 13849, 'loss/train': 1.8915315866470337} +07/25/2024 12:30:45 - INFO - __main__ - Step 13851: {'lr': 0.0004904902545967155, 'samples': 664848, 'steps': 13850, 'loss/train': 1.5623259544372559} +07/25/2024 12:30:45 - INFO - __main__ - Step 13852: {'lr': 0.0004904888174344999, 'samples': 664896, 'steps': 13851, 'loss/train': 1.6907731294631958} +07/25/2024 12:30:46 - INFO - __main__ - Step 13853: {'lr': 0.0004904873801658028, 'samples': 664944, 'steps': 13852, 'loss/train': 1.8816419839859009} +07/25/2024 12:30:46 - INFO - __main__ - Step 13854: {'lr': 0.0004904859427906245, 'samples': 664992, 'steps': 13853, 'loss/train': 2.0454390048980713} +07/25/2024 12:30:46 - INFO - __main__ - Step 13855: {'lr': 0.0004904845053089657, 'samples': 665040, 'steps': 13854, 'loss/train': 1.9192161560058594} +07/25/2024 12:30:47 - INFO - __main__ - Step 13856: {'lr': 0.0004904830677208272, 'samples': 665088, 'steps': 13855, 'loss/train': 1.4856747388839722} +07/25/2024 12:30:47 - INFO - __main__ - Step 13857: {'lr': 0.0004904816300262094, 'samples': 665136, 'steps': 13856, 'loss/train': 1.6665972471237183} +07/25/2024 12:30:47 - INFO - __main__ - Step 13858: {'lr': 0.0004904801922251132, 'samples': 665184, 'steps': 13857, 'loss/train': 2.017258405685425} +07/25/2024 12:30:47 - INFO - __main__ - Step 13859: {'lr': 0.0004904787543175391, 'samples': 665232, 'steps': 13858, 'loss/train': 2.4994611740112305} +07/25/2024 12:30:48 - INFO - __main__ - Step 13860: {'lr': 0.0004904773163034876, 'samples': 665280, 'steps': 13859, 'loss/train': 1.6867344379425049} +07/25/2024 12:30:48 - INFO - __main__ - Step 13861: {'lr': 0.0004904758781829597, 'samples': 665328, 'steps': 13860, 'loss/train': 1.5943603515625} +07/25/2024 12:30:48 - INFO - __main__ - Step 13862: {'lr': 0.0004904744399559557, 'samples': 665376, 'steps': 13861, 'loss/train': 1.8428531885147095} +07/25/2024 12:30:49 - INFO - __main__ - Step 13863: {'lr': 0.0004904730016224763, 'samples': 665424, 'steps': 13862, 'loss/train': 2.148012399673462} +07/25/2024 12:30:49 - INFO - __main__ - Step 13864: {'lr': 0.0004904715631825222, 'samples': 665472, 'steps': 13863, 'loss/train': 1.428911566734314} +07/25/2024 12:30:49 - INFO - __main__ - Step 13865: {'lr': 0.0004904701246360942, 'samples': 665520, 'steps': 13864, 'loss/train': 0.7427608370780945} +07/25/2024 12:30:49 - INFO - __main__ - Step 13866: {'lr': 0.0004904686859831928, 'samples': 665568, 'steps': 13865, 'loss/train': 1.6868351697921753} +07/25/2024 12:30:50 - INFO - __main__ - Step 13867: {'lr': 0.0004904672472238183, 'samples': 665616, 'steps': 13866, 'loss/train': 0.7749272584915161} +07/25/2024 12:30:50 - INFO - __main__ - Step 13868: {'lr': 0.0004904658083579719, 'samples': 665664, 'steps': 13867, 'loss/train': 1.7112808227539062} +07/25/2024 12:30:50 - INFO - __main__ - Step 13869: {'lr': 0.0004904643693856538, 'samples': 665712, 'steps': 13868, 'loss/train': 1.6578166484832764} +07/25/2024 12:30:50 - INFO - __main__ - Step 13870: {'lr': 0.000490462930306865, 'samples': 665760, 'steps': 13869, 'loss/train': 0.3550284504890442} +07/25/2024 12:30:51 - INFO - __main__ - Step 13871: {'lr': 0.0004904614911216057, 'samples': 665808, 'steps': 13870, 'loss/train': 0.17633438110351562} +07/25/2024 12:30:51 - INFO - __main__ - Step 13872: {'lr': 0.000490460051829877, 'samples': 665856, 'steps': 13871, 'loss/train': 1.619014024734497} +07/25/2024 12:30:51 - INFO - __main__ - Step 13873: {'lr': 0.0004904586124316793, 'samples': 665904, 'steps': 13872, 'loss/train': 0.3498995304107666} +07/25/2024 12:30:52 - INFO - __main__ - Step 13874: {'lr': 0.0004904571729270132, 'samples': 665952, 'steps': 13873, 'loss/train': 1.5728845596313477} +07/25/2024 12:30:52 - INFO - __main__ - Step 13875: {'lr': 0.0004904557333158795, 'samples': 666000, 'steps': 13874, 'loss/train': 1.9480992555618286} +07/25/2024 12:30:52 - INFO - __main__ - Step 13876: {'lr': 0.0004904542935982787, 'samples': 666048, 'steps': 13875, 'loss/train': 0.3457915484905243} +07/25/2024 12:30:52 - INFO - __main__ - Step 13877: {'lr': 0.0004904528537742113, 'samples': 666096, 'steps': 13876, 'loss/train': 2.0000734329223633} +07/25/2024 12:30:53 - INFO - __main__ - Step 13878: {'lr': 0.0004904514138436783, 'samples': 666144, 'steps': 13877, 'loss/train': 2.1519339084625244} +07/25/2024 12:30:53 - INFO - __main__ - Step 13879: {'lr': 0.0004904499738066802, 'samples': 666192, 'steps': 13878, 'loss/train': 1.4738364219665527} +07/25/2024 12:30:53 - INFO - __main__ - Step 13880: {'lr': 0.0004904485336632174, 'samples': 666240, 'steps': 13879, 'loss/train': 1.5966142416000366} +07/25/2024 12:30:54 - INFO - __main__ - Step 13881: {'lr': 0.0004904470934132909, 'samples': 666288, 'steps': 13880, 'loss/train': 2.0330493450164795} +07/25/2024 12:30:54 - INFO - __main__ - Step 13882: {'lr': 0.000490445653056901, 'samples': 666336, 'steps': 13881, 'loss/train': 1.919031023979187} +07/25/2024 12:30:54 - INFO - __main__ - Step 13883: {'lr': 0.0004904442125940487, 'samples': 666384, 'steps': 13882, 'loss/train': 2.2739336490631104} +07/25/2024 12:30:54 - INFO - __main__ - Step 13884: {'lr': 0.0004904427720247343, 'samples': 666432, 'steps': 13883, 'loss/train': 1.9988421201705933} +07/25/2024 12:30:55 - INFO - __main__ - Step 13885: {'lr': 0.0004904413313489587, 'samples': 666480, 'steps': 13884, 'loss/train': 1.5517899990081787} +07/25/2024 12:30:55 - INFO - __main__ - Step 13886: {'lr': 0.0004904398905667222, 'samples': 666528, 'steps': 13885, 'loss/train': 1.1910673379898071} +07/25/2024 12:30:55 - INFO - __main__ - Step 13887: {'lr': 0.0004904384496780258, 'samples': 666576, 'steps': 13886, 'loss/train': 1.7715325355529785} +07/25/2024 12:30:56 - INFO - __main__ - Step 13888: {'lr': 0.00049043700868287, 'samples': 666624, 'steps': 13887, 'loss/train': 1.6618428230285645} +07/25/2024 12:30:56 - INFO - __main__ - Step 13889: {'lr': 0.0004904355675812554, 'samples': 666672, 'steps': 13888, 'loss/train': 0.7064880728721619} +07/25/2024 12:30:56 - INFO - __main__ - Step 13890: {'lr': 0.0004904341263731826, 'samples': 666720, 'steps': 13889, 'loss/train': 1.9264905452728271} +07/25/2024 12:30:56 - INFO - __main__ - Step 13891: {'lr': 0.0004904326850586524, 'samples': 666768, 'steps': 13890, 'loss/train': 2.2908034324645996} +07/25/2024 12:30:57 - INFO - __main__ - Step 13892: {'lr': 0.0004904312436376653, 'samples': 666816, 'steps': 13891, 'loss/train': 1.771439552307129} +07/25/2024 12:30:57 - INFO - __main__ - Step 13893: {'lr': 0.000490429802110222, 'samples': 666864, 'steps': 13892, 'loss/train': 1.5881978273391724} +07/25/2024 12:30:57 - INFO - __main__ - Step 13894: {'lr': 0.0004904283604763233, 'samples': 666912, 'steps': 13893, 'loss/train': 1.9186898469924927} +07/25/2024 12:30:58 - INFO - __main__ - Step 13895: {'lr': 0.0004904269187359693, 'samples': 666960, 'steps': 13894, 'loss/train': 0.1581931710243225} +07/25/2024 12:30:58 - INFO - __main__ - Step 13896: {'lr': 0.0004904254768891613, 'samples': 667008, 'steps': 13895, 'loss/train': 1.854894995689392} +07/25/2024 12:30:58 - INFO - __main__ - Step 13897: {'lr': 0.0004904240349358995, 'samples': 667056, 'steps': 13896, 'loss/train': 0.48199063539505005} +07/25/2024 12:30:58 - INFO - __main__ - Step 13898: {'lr': 0.0004904225928761846, 'samples': 667104, 'steps': 13897, 'loss/train': 1.307550072669983} +07/25/2024 12:30:59 - INFO - __main__ - Step 13899: {'lr': 0.0004904211507100175, 'samples': 667152, 'steps': 13898, 'loss/train': 1.740189790725708} +07/25/2024 12:30:59 - INFO - __main__ - Step 13900: {'lr': 0.0004904197084373984, 'samples': 667200, 'steps': 13899, 'loss/train': 2.05965256690979} +07/25/2024 12:30:59 - INFO - __main__ - Step 13901: {'lr': 0.0004904182660583284, 'samples': 667248, 'steps': 13900, 'loss/train': 1.5989974737167358} +07/25/2024 12:31:00 - INFO - __main__ - Step 13902: {'lr': 0.000490416823572808, 'samples': 667296, 'steps': 13901, 'loss/train': 1.2491470575332642} +07/25/2024 12:31:00 - INFO - __main__ - Step 13903: {'lr': 0.0004904153809808375, 'samples': 667344, 'steps': 13902, 'loss/train': 1.0993424654006958} +07/25/2024 12:31:00 - INFO - __main__ - Step 13904: {'lr': 0.000490413938282418, 'samples': 667392, 'steps': 13903, 'loss/train': 0.42987707257270813} +07/25/2024 12:31:00 - INFO - __main__ - Step 13905: {'lr': 0.0004904124954775498, 'samples': 667440, 'steps': 13904, 'loss/train': 1.8640011548995972} +07/25/2024 12:31:01 - INFO - __main__ - Step 13906: {'lr': 0.0004904110525662339, 'samples': 667488, 'steps': 13905, 'loss/train': 1.6138999462127686} +07/25/2024 12:31:01 - INFO - __main__ - Step 13907: {'lr': 0.0004904096095484705, 'samples': 667536, 'steps': 13906, 'loss/train': 2.1750524044036865} +07/25/2024 12:31:01 - INFO - __main__ - Step 13908: {'lr': 0.0004904081664242606, 'samples': 667584, 'steps': 13907, 'loss/train': 1.8169482946395874} +07/25/2024 12:31:02 - INFO - __main__ - Step 13909: {'lr': 0.0004904067231936046, 'samples': 667632, 'steps': 13908, 'loss/train': 2.021773099899292} +07/25/2024 12:31:02 - INFO - __main__ - Step 13910: {'lr': 0.0004904052798565033, 'samples': 667680, 'steps': 13909, 'loss/train': 1.9504534006118774} +07/25/2024 12:31:02 - INFO - __main__ - Step 13911: {'lr': 0.0004904038364129573, 'samples': 667728, 'steps': 13910, 'loss/train': 1.9858551025390625} +07/25/2024 12:31:02 - INFO - __main__ - Step 13912: {'lr': 0.0004904023928629672, 'samples': 667776, 'steps': 13911, 'loss/train': 1.4586315155029297} +07/25/2024 12:31:03 - INFO - __main__ - Step 13913: {'lr': 0.0004904009492065336, 'samples': 667824, 'steps': 13912, 'loss/train': 0.7078585624694824} +07/25/2024 12:31:03 - INFO - __main__ - Step 13914: {'lr': 0.0004903995054436573, 'samples': 667872, 'steps': 13913, 'loss/train': 2.073410749435425} +07/25/2024 12:31:03 - INFO - __main__ - Step 13915: {'lr': 0.0004903980615743388, 'samples': 667920, 'steps': 13914, 'loss/train': 2.404108762741089} +07/25/2024 12:31:04 - INFO - __main__ - Step 13916: {'lr': 0.0004903966175985787, 'samples': 667968, 'steps': 13915, 'loss/train': 1.5618705749511719} +07/25/2024 12:31:04 - INFO - __main__ - Step 13917: {'lr': 0.0004903951735163777, 'samples': 668016, 'steps': 13916, 'loss/train': 1.2638307809829712} +07/25/2024 12:31:04 - INFO - __main__ - Step 13918: {'lr': 0.0004903937293277365, 'samples': 668064, 'steps': 13917, 'loss/train': 1.6805168390274048} +07/25/2024 12:31:04 - INFO - __main__ - Step 13919: {'lr': 0.0004903922850326558, 'samples': 668112, 'steps': 13918, 'loss/train': 0.10232653468847275} +07/25/2024 12:31:05 - INFO - __main__ - Step 13920: {'lr': 0.0004903908406311359, 'samples': 668160, 'steps': 13919, 'loss/train': 1.9335681200027466} +07/25/2024 12:31:05 - INFO - __main__ - Step 13921: {'lr': 0.0004903893961231779, 'samples': 668208, 'steps': 13920, 'loss/train': 0.44992145895957947} +07/25/2024 12:31:05 - INFO - __main__ - Step 13922: {'lr': 0.0004903879515087821, 'samples': 668256, 'steps': 13921, 'loss/train': 1.5879367589950562} +07/25/2024 12:31:06 - INFO - __main__ - Step 13923: {'lr': 0.0004903865067879493, 'samples': 668304, 'steps': 13922, 'loss/train': 1.4761993885040283} +07/25/2024 12:31:06 - INFO - __main__ - Step 13924: {'lr': 0.00049038506196068, 'samples': 668352, 'steps': 13923, 'loss/train': 1.8864130973815918} +07/25/2024 12:31:06 - INFO - __main__ - Step 13925: {'lr': 0.000490383617026975, 'samples': 668400, 'steps': 13924, 'loss/train': 0.4564392864704132} +07/25/2024 12:31:06 - INFO - __main__ - Step 13926: {'lr': 0.0004903821719868349, 'samples': 668448, 'steps': 13925, 'loss/train': 1.917463779449463} +07/25/2024 12:31:07 - INFO - __main__ - Step 13927: {'lr': 0.0004903807268402603, 'samples': 668496, 'steps': 13926, 'loss/train': 1.7410387992858887} +07/25/2024 12:31:07 - INFO - __main__ - Step 13928: {'lr': 0.0004903792815872518, 'samples': 668544, 'steps': 13927, 'loss/train': 1.68401300907135} +07/25/2024 12:31:07 - INFO - __main__ - Step 13929: {'lr': 0.00049037783622781, 'samples': 668592, 'steps': 13928, 'loss/train': 1.9867043495178223} +07/25/2024 12:31:08 - INFO - __main__ - Step 13930: {'lr': 0.0004903763907619359, 'samples': 668640, 'steps': 13929, 'loss/train': 2.049704074859619} +07/25/2024 12:31:08 - INFO - __main__ - Step 13931: {'lr': 0.0004903749451896297, 'samples': 668688, 'steps': 13930, 'loss/train': 2.3457345962524414} +07/25/2024 12:31:08 - INFO - __main__ - Step 13932: {'lr': 0.0004903734995108922, 'samples': 668736, 'steps': 13931, 'loss/train': 1.5321263074874878} +07/25/2024 12:31:08 - INFO - __main__ - Step 13933: {'lr': 0.000490372053725724, 'samples': 668784, 'steps': 13932, 'loss/train': 1.673532485961914} +07/25/2024 12:31:09 - INFO - __main__ - Step 13934: {'lr': 0.0004903706078341259, 'samples': 668832, 'steps': 13933, 'loss/train': 1.6890109777450562} +07/25/2024 12:31:09 - INFO - __main__ - Step 13935: {'lr': 0.0004903691618360984, 'samples': 668880, 'steps': 13934, 'loss/train': 1.3509515523910522} +07/25/2024 12:31:09 - INFO - __main__ - Step 13936: {'lr': 0.0004903677157316422, 'samples': 668928, 'steps': 13935, 'loss/train': 1.9112952947616577} +07/25/2024 12:31:10 - INFO - __main__ - Step 13937: {'lr': 0.0004903662695207578, 'samples': 668976, 'steps': 13936, 'loss/train': 0.7290246486663818} +07/25/2024 12:31:10 - INFO - __main__ - Step 13938: {'lr': 0.000490364823203446, 'samples': 669024, 'steps': 13937, 'loss/train': 1.951779842376709} +07/25/2024 12:31:10 - INFO - __main__ - Step 13939: {'lr': 0.0004903633767797074, 'samples': 669072, 'steps': 13938, 'loss/train': 2.4047584533691406} +07/25/2024 12:31:10 - INFO - __main__ - Step 13940: {'lr': 0.0004903619302495427, 'samples': 669120, 'steps': 13939, 'loss/train': 2.2110049724578857} +07/25/2024 12:31:11 - INFO - __main__ - Step 13941: {'lr': 0.0004903604836129524, 'samples': 669168, 'steps': 13940, 'loss/train': 1.133800983428955} +07/25/2024 12:31:11 - INFO - __main__ - Step 13942: {'lr': 0.0004903590368699372, 'samples': 669216, 'steps': 13941, 'loss/train': 3.4725162982940674} +07/25/2024 12:31:11 - INFO - __main__ - Step 13943: {'lr': 0.0004903575900204978, 'samples': 669264, 'steps': 13942, 'loss/train': 0.16082724928855896} +07/25/2024 12:31:12 - INFO - __main__ - Step 13944: {'lr': 0.0004903561430646347, 'samples': 669312, 'steps': 13943, 'loss/train': 2.027554512023926} +07/25/2024 12:31:12 - INFO - __main__ - Step 13945: {'lr': 0.0004903546960023488, 'samples': 669360, 'steps': 13944, 'loss/train': 0.46326205134391785} +07/25/2024 12:31:12 - INFO - __main__ - Step 13946: {'lr': 0.0004903532488336404, 'samples': 669408, 'steps': 13945, 'loss/train': 2.138397693634033} +07/25/2024 12:31:12 - INFO - __main__ - Step 13947: {'lr': 0.0004903518015585104, 'samples': 669456, 'steps': 13946, 'loss/train': 1.6197723150253296} +07/25/2024 12:31:13 - INFO - __main__ - Step 13948: {'lr': 0.0004903503541769594, 'samples': 669504, 'steps': 13947, 'loss/train': 1.8970016241073608} +07/25/2024 12:31:13 - INFO - __main__ - Step 13949: {'lr': 0.0004903489066889879, 'samples': 669552, 'steps': 13948, 'loss/train': 1.3433996438980103} +07/25/2024 12:31:13 - INFO - __main__ - Step 13950: {'lr': 0.0004903474590945968, 'samples': 669600, 'steps': 13949, 'loss/train': 2.235652446746826} +07/25/2024 12:31:13 - DEBUG - datasets.packaged_modules.json.json - Batch of 11324668 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 12:31:14 - INFO - __main__ - Step 13951: {'lr': 0.0004903460113937864, 'samples': 669648, 'steps': 13950, 'loss/train': 1.7277852296829224} +07/25/2024 12:31:14 - DEBUG - datasets.packaged_modules.json.json - Batch of 10745074 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 12:31:14 - INFO - __main__ - Step 13952: {'lr': 0.0004903445635865576, 'samples': 669696, 'steps': 13951, 'loss/train': 1.791016936302185} +07/25/2024 12:31:14 - INFO - __main__ - Step 13953: {'lr': 0.000490343115672911, 'samples': 669744, 'steps': 13952, 'loss/train': 1.9823952913284302} +07/25/2024 12:31:14 - INFO - __main__ - Step 13954: {'lr': 0.0004903416676528471, 'samples': 669792, 'steps': 13953, 'loss/train': 1.7086279392242432} +07/25/2024 12:31:15 - INFO - __main__ - Step 13955: {'lr': 0.0004903402195263667, 'samples': 669840, 'steps': 13954, 'loss/train': 2.1749441623687744} +07/25/2024 12:31:15 - INFO - __main__ - Step 13956: {'lr': 0.0004903387712934704, 'samples': 669888, 'steps': 13955, 'loss/train': 1.6103540658950806} +07/25/2024 12:31:15 - INFO - __main__ - Step 13957: {'lr': 0.0004903373229541588, 'samples': 669936, 'steps': 13956, 'loss/train': 1.5819019079208374} +07/25/2024 12:31:16 - INFO - __main__ - Step 13958: {'lr': 0.0004903358745084326, 'samples': 669984, 'steps': 13957, 'loss/train': 1.7738786935806274} +07/25/2024 12:31:16 - INFO - __main__ - Step 13959: {'lr': 0.0004903344259562924, 'samples': 670032, 'steps': 13958, 'loss/train': 1.5273462533950806} +07/25/2024 12:31:16 - INFO - __main__ - Step 13960: {'lr': 0.0004903329772977388, 'samples': 670080, 'steps': 13959, 'loss/train': 1.6083998680114746} +07/25/2024 12:31:16 - INFO - __main__ - Step 13961: {'lr': 0.0004903315285327726, 'samples': 670128, 'steps': 13960, 'loss/train': 0.7423622608184814} +07/25/2024 12:31:17 - INFO - __main__ - Step 13962: {'lr': 0.0004903300796613942, 'samples': 670176, 'steps': 13961, 'loss/train': 1.8407771587371826} +07/25/2024 12:31:17 - INFO - __main__ - Step 13963: {'lr': 0.0004903286306836045, 'samples': 670224, 'steps': 13962, 'loss/train': 0.9355841875076294} +07/25/2024 12:31:17 - INFO - __main__ - Step 13964: {'lr': 0.000490327181599404, 'samples': 670272, 'steps': 13963, 'loss/train': 2.1893856525421143} +07/25/2024 12:31:17 - INFO - __main__ - Step 13965: {'lr': 0.0004903257324087932, 'samples': 670320, 'steps': 13964, 'loss/train': 1.2773151397705078} +07/25/2024 12:31:18 - INFO - __main__ - Step 13966: {'lr': 0.0004903242831117731, 'samples': 670368, 'steps': 13965, 'loss/train': 2.1227004528045654} +07/25/2024 12:31:18 - INFO - __main__ - Step 13967: {'lr': 0.000490322833708344, 'samples': 670416, 'steps': 13966, 'loss/train': 0.11044199019670486} +07/25/2024 12:31:18 - INFO - __main__ - Step 13968: {'lr': 0.0004903213841985067, 'samples': 670464, 'steps': 13967, 'loss/train': 1.5396562814712524} +07/25/2024 12:31:19 - INFO - __main__ - Step 13969: {'lr': 0.0004903199345822619, 'samples': 670512, 'steps': 13968, 'loss/train': 0.5507844090461731} +07/25/2024 12:31:19 - INFO - __main__ - Step 13970: {'lr': 0.0004903184848596101, 'samples': 670560, 'steps': 13969, 'loss/train': 1.839134693145752} +07/25/2024 12:31:19 - INFO - __main__ - Step 13971: {'lr': 0.0004903170350305521, 'samples': 670608, 'steps': 13970, 'loss/train': 2.509114980697632} +07/25/2024 12:31:19 - INFO - __main__ - Step 13972: {'lr': 0.0004903155850950883, 'samples': 670656, 'steps': 13971, 'loss/train': 2.113166570663452} +07/25/2024 12:31:20 - INFO - __main__ - Step 13973: {'lr': 0.0004903141350532196, 'samples': 670704, 'steps': 13972, 'loss/train': 1.1139750480651855} +07/25/2024 12:31:20 - INFO - __main__ - Step 13974: {'lr': 0.0004903126849049465, 'samples': 670752, 'steps': 13973, 'loss/train': 1.9030449390411377} +07/25/2024 12:31:20 - INFO - __main__ - Step 13975: {'lr': 0.0004903112346502697, 'samples': 670800, 'steps': 13974, 'loss/train': 1.9154084920883179} +07/25/2024 12:31:21 - INFO - __main__ - Step 13976: {'lr': 0.0004903097842891898, 'samples': 670848, 'steps': 13975, 'loss/train': 1.5511298179626465} +07/25/2024 12:31:21 - INFO - __main__ - Step 13977: {'lr': 0.0004903083338217075, 'samples': 670896, 'steps': 13976, 'loss/train': 2.003246545791626} +07/25/2024 12:31:21 - INFO - __main__ - Step 13978: {'lr': 0.0004903068832478234, 'samples': 670944, 'steps': 13977, 'loss/train': 1.280185341835022} +07/25/2024 12:31:21 - INFO - __main__ - Step 13979: {'lr': 0.0004903054325675381, 'samples': 670992, 'steps': 13978, 'loss/train': 1.1575665473937988} +07/25/2024 12:31:22 - INFO - __main__ - Step 13980: {'lr': 0.0004903039817808523, 'samples': 671040, 'steps': 13979, 'loss/train': 2.1094937324523926} +07/25/2024 12:31:22 - INFO - __main__ - Step 13981: {'lr': 0.0004903025308877667, 'samples': 671088, 'steps': 13980, 'loss/train': 1.5719441175460815} +07/25/2024 12:31:22 - INFO - __main__ - Step 13982: {'lr': 0.0004903010798882818, 'samples': 671136, 'steps': 13981, 'loss/train': 1.7234588861465454} +07/25/2024 12:31:23 - INFO - __main__ - Step 13983: {'lr': 0.0004902996287823984, 'samples': 671184, 'steps': 13982, 'loss/train': 1.5213589668273926} +07/25/2024 12:31:23 - INFO - __main__ - Step 13984: {'lr': 0.0004902981775701168, 'samples': 671232, 'steps': 13983, 'loss/train': 1.9187536239624023} +07/25/2024 12:31:23 - INFO - __main__ - Step 13985: {'lr': 0.0004902967262514382, 'samples': 671280, 'steps': 13984, 'loss/train': 0.7143397331237793} +07/25/2024 12:31:23 - INFO - __main__ - Step 13986: {'lr': 0.0004902952748263628, 'samples': 671328, 'steps': 13985, 'loss/train': 1.766727089881897} +07/25/2024 12:31:24 - INFO - __main__ - Step 13987: {'lr': 0.0004902938232948914, 'samples': 671376, 'steps': 13986, 'loss/train': 1.8956636190414429} +07/25/2024 12:31:24 - INFO - __main__ - Step 13988: {'lr': 0.0004902923716570246, 'samples': 671424, 'steps': 13987, 'loss/train': 1.9398995637893677} +07/25/2024 12:31:24 - INFO - __main__ - Step 13989: {'lr': 0.0004902909199127631, 'samples': 671472, 'steps': 13988, 'loss/train': 2.1456897258758545} +07/25/2024 12:31:25 - INFO - __main__ - Step 13990: {'lr': 0.0004902894680621076, 'samples': 671520, 'steps': 13989, 'loss/train': 3.6582589149475098} +07/25/2024 12:31:25 - INFO - __main__ - Step 13991: {'lr': 0.0004902880161050585, 'samples': 671568, 'steps': 13990, 'loss/train': 0.1952439844608307} +07/25/2024 12:31:25 - INFO - __main__ - Step 13992: {'lr': 0.0004902865640416167, 'samples': 671616, 'steps': 13991, 'loss/train': 2.0724196434020996} +07/25/2024 12:31:25 - INFO - __main__ - Step 13993: {'lr': 0.0004902851118717827, 'samples': 671664, 'steps': 13992, 'loss/train': 0.659279465675354} +07/25/2024 12:31:26 - INFO - __main__ - Step 13994: {'lr': 0.0004902836595955572, 'samples': 671712, 'steps': 13993, 'loss/train': 1.564858078956604} +07/25/2024 12:31:26 - INFO - __main__ - Step 13995: {'lr': 0.0004902822072129409, 'samples': 671760, 'steps': 13994, 'loss/train': 1.4178375005722046} +07/25/2024 12:31:26 - INFO - __main__ - Step 13996: {'lr': 0.0004902807547239343, 'samples': 671808, 'steps': 13995, 'loss/train': 1.7965879440307617} +07/25/2024 12:31:27 - INFO - __main__ - Step 13997: {'lr': 0.0004902793021285382, 'samples': 671856, 'steps': 13996, 'loss/train': 1.4508843421936035} +07/25/2024 12:31:27 - INFO - __main__ - Step 13998: {'lr': 0.000490277849426753, 'samples': 671904, 'steps': 13997, 'loss/train': 1.714200496673584} +07/25/2024 12:31:27 - INFO - __main__ - Step 13999: {'lr': 0.0004902763966185796, 'samples': 671952, 'steps': 13998, 'loss/train': 1.112024188041687} +07/25/2024 12:31:27 - INFO - __main__ - Step 14000: {'lr': 0.0004902749437040185, 'samples': 672000, 'steps': 13999, 'loss/train': 1.5964674949645996} +07/25/2024 12:31:28 - INFO - __main__ - Step 14001: {'lr': 0.0004902734906830704, 'samples': 672048, 'steps': 14000, 'loss/train': 2.3768677711486816} +07/25/2024 12:31:28 - DEBUG - datasets.packaged_modules.json.json - Batch of 10488714 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 12:31:28 - INFO - __main__ - Step 14002: {'lr': 0.0004902720375557359, 'samples': 672096, 'steps': 14001, 'loss/train': 2.479311227798462} +07/25/2024 12:31:28 - INFO - __main__ - Step 14003: {'lr': 0.0004902705843220157, 'samples': 672144, 'steps': 14002, 'loss/train': 0.26051801443099976} +07/25/2024 12:31:29 - INFO - __main__ - Step 14004: {'lr': 0.0004902691309819104, 'samples': 672192, 'steps': 14003, 'loss/train': 2.258776903152466} +07/25/2024 12:31:29 - INFO - __main__ - Step 14005: {'lr': 0.0004902676775354207, 'samples': 672240, 'steps': 14004, 'loss/train': 2.007646083831787} +07/25/2024 12:31:29 - INFO - __main__ - Step 14006: {'lr': 0.0004902662239825473, 'samples': 672288, 'steps': 14005, 'loss/train': 1.1413264274597168} +07/25/2024 12:31:29 - INFO - __main__ - Step 14007: {'lr': 0.0004902647703232906, 'samples': 672336, 'steps': 14006, 'loss/train': 2.1386449337005615} +07/25/2024 12:31:30 - INFO - __main__ - Step 14008: {'lr': 0.0004902633165576514, 'samples': 672384, 'steps': 14007, 'loss/train': 2.352726697921753} +07/25/2024 12:31:30 - INFO - __main__ - Step 14009: {'lr': 0.0004902618626856304, 'samples': 672432, 'steps': 14008, 'loss/train': 0.8033301830291748} +07/25/2024 12:31:30 - INFO - __main__ - Step 14010: {'lr': 0.0004902604087072282, 'samples': 672480, 'steps': 14009, 'loss/train': 1.662956714630127} +07/25/2024 12:31:31 - INFO - __main__ - Step 14011: {'lr': 0.0004902589546224453, 'samples': 672528, 'steps': 14010, 'loss/train': 1.835359811782837} +07/25/2024 12:31:31 - INFO - __main__ - Step 14012: {'lr': 0.0004902575004312826, 'samples': 672576, 'steps': 14011, 'loss/train': 1.9017761945724487} +07/25/2024 12:31:31 - INFO - __main__ - Step 14013: {'lr': 0.0004902560461337405, 'samples': 672624, 'steps': 14012, 'loss/train': 1.8239960670471191} +07/25/2024 12:31:31 - INFO - __main__ - Step 14014: {'lr': 0.0004902545917298197, 'samples': 672672, 'steps': 14013, 'loss/train': 1.570167064666748} +07/25/2024 12:31:32 - INFO - __main__ - Step 14015: {'lr': 0.0004902531372195212, 'samples': 672720, 'steps': 14014, 'loss/train': 0.24465018510818481} +07/25/2024 12:31:32 - INFO - __main__ - Step 14016: {'lr': 0.000490251682602845, 'samples': 672768, 'steps': 14015, 'loss/train': 1.745800495147705} +07/25/2024 12:31:32 - INFO - __main__ - Step 14017: {'lr': 0.0004902502278797923, 'samples': 672816, 'steps': 14016, 'loss/train': 0.5804218053817749} +07/25/2024 12:31:33 - INFO - __main__ - Step 14018: {'lr': 0.0004902487730503635, 'samples': 672864, 'steps': 14017, 'loss/train': 2.07492733001709} +07/25/2024 12:31:33 - INFO - __main__ - Step 14019: {'lr': 0.0004902473181145593, 'samples': 672912, 'steps': 14018, 'loss/train': 1.857669472694397} +07/25/2024 12:31:33 - INFO - __main__ - Step 14020: {'lr': 0.0004902458630723802, 'samples': 672960, 'steps': 14019, 'loss/train': 1.8385634422302246} +07/25/2024 12:31:33 - INFO - __main__ - Step 14021: {'lr': 0.0004902444079238269, 'samples': 673008, 'steps': 14020, 'loss/train': 1.7917710542678833} +07/25/2024 12:31:34 - INFO - __main__ - Step 14022: {'lr': 0.0004902429526689003, 'samples': 673056, 'steps': 14021, 'loss/train': 0.79887855052948} +07/25/2024 12:31:34 - INFO - __main__ - Step 14023: {'lr': 0.0004902414973076008, 'samples': 673104, 'steps': 14022, 'loss/train': 1.4551827907562256} +07/25/2024 12:31:34 - INFO - __main__ - Step 14024: {'lr': 0.0004902400418399291, 'samples': 673152, 'steps': 14023, 'loss/train': 1.7978601455688477} +07/25/2024 12:31:35 - INFO - __main__ - Step 14025: {'lr': 0.0004902385862658858, 'samples': 673200, 'steps': 14024, 'loss/train': 1.5871820449829102} +07/25/2024 12:31:35 - INFO - __main__ - Step 14026: {'lr': 0.0004902371305854716, 'samples': 673248, 'steps': 14025, 'loss/train': 0.9607473611831665} +07/25/2024 12:31:35 - INFO - __main__ - Step 14027: {'lr': 0.0004902356747986871, 'samples': 673296, 'steps': 14026, 'loss/train': 1.234490156173706} +07/25/2024 12:31:35 - INFO - __main__ - Step 14028: {'lr': 0.000490234218905533, 'samples': 673344, 'steps': 14027, 'loss/train': 2.027371406555176} +07/25/2024 12:31:36 - INFO - __main__ - Step 14029: {'lr': 0.00049023276290601, 'samples': 673392, 'steps': 14028, 'loss/train': 2.0395798683166504} +07/25/2024 12:31:36 - INFO - __main__ - Step 14030: {'lr': 0.0004902313068001186, 'samples': 673440, 'steps': 14029, 'loss/train': 1.991277813911438} +07/25/2024 12:31:36 - INFO - __main__ - Step 14031: {'lr': 0.0004902298505878595, 'samples': 673488, 'steps': 14030, 'loss/train': 2.399665594100952} +07/25/2024 12:31:37 - INFO - __main__ - Step 14032: {'lr': 0.0004902283942692334, 'samples': 673536, 'steps': 14031, 'loss/train': 2.410736083984375} +07/25/2024 12:31:37 - INFO - __main__ - Step 14033: {'lr': 0.0004902269378442408, 'samples': 673584, 'steps': 14032, 'loss/train': 0.7604895234107971} +07/25/2024 12:31:37 - INFO - __main__ - Step 14034: {'lr': 0.0004902254813128825, 'samples': 673632, 'steps': 14033, 'loss/train': 1.9227627515792847} +07/25/2024 12:31:37 - INFO - __main__ - Step 14035: {'lr': 0.000490224024675159, 'samples': 673680, 'steps': 14034, 'loss/train': 2.143932342529297} +07/25/2024 12:31:38 - INFO - __main__ - Step 14036: {'lr': 0.0004902225679310712, 'samples': 673728, 'steps': 14035, 'loss/train': 1.978017807006836} +07/25/2024 12:31:38 - INFO - __main__ - Step 14037: {'lr': 0.0004902211110806195, 'samples': 673776, 'steps': 14036, 'loss/train': 1.896439790725708} +07/25/2024 12:31:38 - INFO - __main__ - Step 14038: {'lr': 0.0004902196541238046, 'samples': 673824, 'steps': 14037, 'loss/train': 1.706348180770874} +07/25/2024 12:31:39 - INFO - __main__ - Step 14039: {'lr': 0.0004902181970606272, 'samples': 673872, 'steps': 14038, 'loss/train': 0.2894965410232544} +07/25/2024 12:31:39 - INFO - __main__ - Step 14040: {'lr': 0.0004902167398910879, 'samples': 673920, 'steps': 14039, 'loss/train': 1.9382988214492798} +07/25/2024 12:31:39 - INFO - __main__ - Step 14041: {'lr': 0.0004902152826151873, 'samples': 673968, 'steps': 14040, 'loss/train': 0.27350401878356934} +07/25/2024 12:31:39 - INFO - __main__ - Step 14042: {'lr': 0.0004902138252329262, 'samples': 674016, 'steps': 14041, 'loss/train': 1.9307050704956055} +07/25/2024 12:31:40 - INFO - __main__ - Step 14043: {'lr': 0.0004902123677443051, 'samples': 674064, 'steps': 14042, 'loss/train': 1.6571648120880127} +07/25/2024 12:31:40 - INFO - __main__ - Step 14044: {'lr': 0.0004902109101493248, 'samples': 674112, 'steps': 14043, 'loss/train': 2.0484509468078613} +07/25/2024 12:31:40 - INFO - __main__ - Step 14045: {'lr': 0.0004902094524479857, 'samples': 674160, 'steps': 14044, 'loss/train': 2.1331424713134766} +07/25/2024 12:31:41 - INFO - __main__ - Step 14046: {'lr': 0.0004902079946402886, 'samples': 674208, 'steps': 14045, 'loss/train': 1.1962695121765137} +07/25/2024 12:31:41 - INFO - __main__ - Step 14047: {'lr': 0.0004902065367262342, 'samples': 674256, 'steps': 14046, 'loss/train': 1.3432585000991821} +07/25/2024 12:31:41 - INFO - __main__ - Step 14048: {'lr': 0.000490205078705823, 'samples': 674304, 'steps': 14047, 'loss/train': 1.4076001644134521} +07/25/2024 12:31:41 - INFO - __main__ - Step 14049: {'lr': 0.0004902036205790557, 'samples': 674352, 'steps': 14048, 'loss/train': 1.8736777305603027} +07/25/2024 12:31:42 - INFO - __main__ - Step 14050: {'lr': 0.000490202162345933, 'samples': 674400, 'steps': 14049, 'loss/train': 0.4330558776855469} +07/25/2024 12:31:42 - INFO - __main__ - Step 14051: {'lr': 0.0004902007040064556, 'samples': 674448, 'steps': 14050, 'loss/train': 2.1190435886383057} +07/25/2024 12:31:42 - INFO - __main__ - Step 14052: {'lr': 0.000490199245560624, 'samples': 674496, 'steps': 14051, 'loss/train': 1.8019105195999146} +07/25/2024 12:31:43 - INFO - __main__ - Step 14053: {'lr': 0.0004901977870084388, 'samples': 674544, 'steps': 14052, 'loss/train': 2.136397361755371} +07/25/2024 12:31:43 - DEBUG - datasets.packaged_modules.json.json - Batch of 10487004 bytes couldn't be parsed with block_size=327680. Retrying with block_size=655360. +07/25/2024 12:31:43 - INFO - __main__ - Step 14054: {'lr': 0.0004901963283499009, 'samples': 674592, 'steps': 14053, 'loss/train': 1.401371955871582} +07/25/2024 12:31:43 - INFO - __main__ - Step 14055: {'lr': 0.0004901948695850108, 'samples': 674640, 'steps': 14054, 'loss/train': 1.9199743270874023} +07/25/2024 12:31:43 - INFO - __main__ - Step 14056: {'lr': 0.0004901934107137691, 'samples': 674688, 'steps': 14055, 'loss/train': 1.5640838146209717} +07/25/2024 12:31:43 - DEBUG - datasets.packaged_modules.json.json - Batch of 10489615 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 12:31:44 - INFO - __main__ - Step 14057: {'lr': 0.0004901919517361765, 'samples': 674736, 'steps': 14056, 'loss/train': 0.734663188457489} +07/25/2024 12:31:44 - INFO - __main__ - Step 14058: {'lr': 0.0004901904926522336, 'samples': 674784, 'steps': 14057, 'loss/train': 1.5294684171676636} +07/25/2024 12:31:44 - INFO - __main__ - Step 14059: {'lr': 0.000490189033461941, 'samples': 674832, 'steps': 14058, 'loss/train': 1.9266445636749268} +07/25/2024 12:31:45 - INFO - __main__ - Step 14060: {'lr': 0.0004901875741652996, 'samples': 674880, 'steps': 14059, 'loss/train': 1.8215454816818237} +07/25/2024 12:31:45 - INFO - __main__ - Step 14061: {'lr': 0.0004901861147623098, 'samples': 674928, 'steps': 14060, 'loss/train': 1.830808401107788} +07/25/2024 12:31:45 - INFO - __main__ - Step 14062: {'lr': 0.0004901846552529723, 'samples': 674976, 'steps': 14061, 'loss/train': 1.9749447107315063} +07/25/2024 12:31:45 - INFO - __main__ - Step 14063: {'lr': 0.0004901831956372878, 'samples': 675024, 'steps': 14062, 'loss/train': 0.7568787932395935} +07/25/2024 12:31:46 - INFO - __main__ - Step 14064: {'lr': 0.0004901817359152568, 'samples': 675072, 'steps': 14063, 'loss/train': 1.500494360923767} +07/25/2024 12:31:46 - INFO - __main__ - Step 14065: {'lr': 0.0004901802760868801, 'samples': 675120, 'steps': 14064, 'loss/train': 0.2715095579624176} +07/25/2024 12:31:46 - INFO - __main__ - Step 14066: {'lr': 0.0004901788161521584, 'samples': 675168, 'steps': 14065, 'loss/train': 1.454016923904419} +07/25/2024 12:31:47 - INFO - __main__ - Step 14067: {'lr': 0.0004901773561110922, 'samples': 675216, 'steps': 14066, 'loss/train': 2.7274680137634277} +07/25/2024 12:31:47 - INFO - __main__ - Step 14068: {'lr': 0.0004901758959636822, 'samples': 675264, 'steps': 14067, 'loss/train': 1.3386664390563965} +07/25/2024 12:31:47 - INFO - __main__ - Step 14069: {'lr': 0.000490174435709929, 'samples': 675312, 'steps': 14068, 'loss/train': 2.2004218101501465} +07/25/2024 12:31:47 - INFO - __main__ - Step 14070: {'lr': 0.0004901729753498333, 'samples': 675360, 'steps': 14069, 'loss/train': 1.4720454216003418} +07/25/2024 12:31:48 - INFO - __main__ - Step 14071: {'lr': 0.0004901715148833958, 'samples': 675408, 'steps': 14070, 'loss/train': 0.9319239258766174} +07/25/2024 12:31:48 - INFO - __main__ - Step 14072: {'lr': 0.000490170054310617, 'samples': 675456, 'steps': 14071, 'loss/train': 1.7955708503723145} +07/25/2024 12:31:48 - INFO - __main__ - Step 14073: {'lr': 0.0004901685936314977, 'samples': 675504, 'steps': 14072, 'loss/train': 1.6739590167999268} +07/25/2024 12:31:49 - INFO - __main__ - Step 14074: {'lr': 0.0004901671328460384, 'samples': 675552, 'steps': 14073, 'loss/train': 1.0619219541549683} +07/25/2024 12:31:49 - INFO - __main__ - Step 14075: {'lr': 0.0004901656719542398, 'samples': 675600, 'steps': 14074, 'loss/train': 1.7038403749465942} +07/25/2024 12:31:49 - INFO - __main__ - Step 14076: {'lr': 0.0004901642109561027, 'samples': 675648, 'steps': 14075, 'loss/train': 1.6582905054092407} +07/25/2024 12:31:49 - INFO - __main__ - Step 14077: {'lr': 0.0004901627498516276, 'samples': 675696, 'steps': 14076, 'loss/train': 1.700771689414978} +07/25/2024 12:31:50 - INFO - __main__ - Step 14078: {'lr': 0.0004901612886408152, 'samples': 675744, 'steps': 14077, 'loss/train': 1.7215416431427002} +07/25/2024 12:31:50 - INFO - __main__ - Step 14079: {'lr': 0.000490159827323666, 'samples': 675792, 'steps': 14078, 'loss/train': 1.7023073434829712} +07/25/2024 12:31:50 - INFO - __main__ - Step 14080: {'lr': 0.0004901583659001807, 'samples': 675840, 'steps': 14079, 'loss/train': 1.8045445680618286} +07/25/2024 12:31:50 - INFO - __main__ - Step 14081: {'lr': 0.0004901569043703601, 'samples': 675888, 'steps': 14080, 'loss/train': 0.8498140573501587} +07/25/2024 12:31:51 - INFO - __main__ - Step 14082: {'lr': 0.0004901554427342048, 'samples': 675936, 'steps': 14081, 'loss/train': 1.6996804475784302} +07/25/2024 12:31:51 - INFO - __main__ - Step 14083: {'lr': 0.0004901539809917153, 'samples': 675984, 'steps': 14082, 'loss/train': 1.1698212623596191} +07/25/2024 12:31:51 - INFO - __main__ - Step 14084: {'lr': 0.0004901525191428924, 'samples': 676032, 'steps': 14083, 'loss/train': 1.933272361755371} +07/25/2024 12:31:52 - INFO - __main__ - Step 14085: {'lr': 0.0004901510571877366, 'samples': 676080, 'steps': 14084, 'loss/train': 1.7192388772964478} +07/25/2024 12:31:52 - INFO - __main__ - Step 14086: {'lr': 0.0004901495951262488, 'samples': 676128, 'steps': 14085, 'loss/train': 1.7071107625961304} +07/25/2024 12:31:52 - INFO - __main__ - Step 14087: {'lr': 0.0004901481329584295, 'samples': 676176, 'steps': 14086, 'loss/train': 1.8124672174453735} +07/25/2024 12:31:52 - INFO - __main__ - Step 14088: {'lr': 0.0004901466706842791, 'samples': 676224, 'steps': 14087, 'loss/train': 2.176363706588745} +07/25/2024 12:31:53 - INFO - __main__ - Step 14089: {'lr': 0.0004901452083037987, 'samples': 676272, 'steps': 14088, 'loss/train': 0.2865450084209442} +07/25/2024 12:31:53 - INFO - __main__ - Step 14090: {'lr': 0.0004901437458169886, 'samples': 676320, 'steps': 14089, 'loss/train': 1.68484365940094} +07/25/2024 12:31:53 - INFO - __main__ - Step 14091: {'lr': 0.0004901422832238497, 'samples': 676368, 'steps': 14090, 'loss/train': 2.607215642929077} +07/25/2024 12:31:54 - INFO - __main__ - Step 14092: {'lr': 0.0004901408205243824, 'samples': 676416, 'steps': 14091, 'loss/train': 2.066246747970581} +07/25/2024 12:31:54 - INFO - __main__ - Step 14093: {'lr': 0.0004901393577185875, 'samples': 676464, 'steps': 14092, 'loss/train': 1.9769387245178223} +07/25/2024 12:31:54 - INFO - __main__ - Step 14094: {'lr': 0.0004901378948064657, 'samples': 676512, 'steps': 14093, 'loss/train': 1.5812649726867676} +07/25/2024 12:31:54 - INFO - __main__ - Step 14095: {'lr': 0.0004901364317880175, 'samples': 676560, 'steps': 14094, 'loss/train': 0.9835636019706726} +07/25/2024 12:31:55 - INFO - __main__ - Step 14096: {'lr': 0.0004901349686632437, 'samples': 676608, 'steps': 14095, 'loss/train': 1.6746400594711304} +07/25/2024 12:31:55 - INFO - __main__ - Step 14097: {'lr': 0.0004901335054321448, 'samples': 676656, 'steps': 14096, 'loss/train': 1.1962578296661377} +07/25/2024 12:31:55 - INFO - __main__ - Step 14098: {'lr': 0.0004901320420947215, 'samples': 676704, 'steps': 14097, 'loss/train': 1.5909472703933716} +07/25/2024 12:31:56 - INFO - __main__ - Step 14099: {'lr': 0.0004901305786509744, 'samples': 676752, 'steps': 14098, 'loss/train': 1.9003115892410278} +07/25/2024 12:31:56 - INFO - __main__ - Step 14100: {'lr': 0.0004901291151009045, 'samples': 676800, 'steps': 14099, 'loss/train': 1.0633131265640259} +07/25/2024 12:31:56 - INFO - __main__ - Step 14101: {'lr': 0.0004901276514445119, 'samples': 676848, 'steps': 14100, 'loss/train': 2.3341615200042725} +07/25/2024 12:31:56 - INFO - __main__ - Step 14102: {'lr': 0.0004901261876817975, 'samples': 676896, 'steps': 14101, 'loss/train': 1.329426646232605} +07/25/2024 12:31:57 - INFO - __main__ - Step 14103: {'lr': 0.000490124723812762, 'samples': 676944, 'steps': 14102, 'loss/train': 1.9156070947647095} +07/25/2024 12:31:57 - INFO - __main__ - Step 14104: {'lr': 0.000490123259837406, 'samples': 676992, 'steps': 14103, 'loss/train': 2.119553327560425} +07/25/2024 12:31:57 - INFO - __main__ - Step 14105: {'lr': 0.0004901217957557301, 'samples': 677040, 'steps': 14104, 'loss/train': 0.9688634276390076} +07/25/2024 12:31:58 - INFO - __main__ - Step 14106: {'lr': 0.0004901203315677351, 'samples': 677088, 'steps': 14105, 'loss/train': 1.7696819305419922} +07/25/2024 12:31:58 - INFO - __main__ - Step 14107: {'lr': 0.0004901188672734215, 'samples': 677136, 'steps': 14106, 'loss/train': 0.9083914756774902} +07/25/2024 12:31:58 - INFO - __main__ - Step 14108: {'lr': 0.0004901174028727899, 'samples': 677184, 'steps': 14107, 'loss/train': 1.3470114469528198} +07/25/2024 12:31:58 - INFO - __main__ - Step 14109: {'lr': 0.0004901159383658411, 'samples': 677232, 'steps': 14108, 'loss/train': 1.662819266319275} +07/25/2024 12:31:59 - INFO - __main__ - Step 14110: {'lr': 0.0004901144737525757, 'samples': 677280, 'steps': 14109, 'loss/train': 1.6566461324691772} +07/25/2024 12:31:59 - INFO - __main__ - Step 14111: {'lr': 0.0004901130090329944, 'samples': 677328, 'steps': 14110, 'loss/train': 1.865328073501587} +07/25/2024 12:31:59 - INFO - __main__ - Step 14112: {'lr': 0.0004901115442070978, 'samples': 677376, 'steps': 14111, 'loss/train': 1.8916131258010864} +07/25/2024 12:32:00 - INFO - __main__ - Step 14113: {'lr': 0.0004901100792748864, 'samples': 677424, 'steps': 14112, 'loss/train': 0.22712068259716034} +07/25/2024 12:32:00 - INFO - __main__ - Step 14114: {'lr': 0.0004901086142363612, 'samples': 677472, 'steps': 14113, 'loss/train': 1.868392825126648} +07/25/2024 12:32:00 - INFO - __main__ - Step 14115: {'lr': 0.0004901071490915225, 'samples': 677520, 'steps': 14114, 'loss/train': 2.01716947555542} +07/25/2024 12:32:00 - INFO - __main__ - Step 14116: {'lr': 0.0004901056838403711, 'samples': 677568, 'steps': 14115, 'loss/train': 2.26102876663208} +07/25/2024 12:32:01 - INFO - __main__ - Step 14117: {'lr': 0.0004901042184829075, 'samples': 677616, 'steps': 14116, 'loss/train': 2.016977071762085} +07/25/2024 12:32:01 - INFO - __main__ - Step 14118: {'lr': 0.0004901027530191327, 'samples': 677664, 'steps': 14117, 'loss/train': 1.8077088594436646} +07/25/2024 12:32:01 - INFO - __main__ - Step 14119: {'lr': 0.0004901012874490469, 'samples': 677712, 'steps': 14118, 'loss/train': 0.6032324433326721} +07/25/2024 12:32:02 - INFO - __main__ - Step 14120: {'lr': 0.0004900998217726511, 'samples': 677760, 'steps': 14119, 'loss/train': 1.2874925136566162} +07/25/2024 12:32:02 - INFO - __main__ - Step 14121: {'lr': 0.0004900983559899459, 'samples': 677808, 'steps': 14120, 'loss/train': 2.010164499282837} +07/25/2024 12:32:02 - INFO - __main__ - Step 14122: {'lr': 0.0004900968901009317, 'samples': 677856, 'steps': 14121, 'loss/train': 1.6247045993804932} +07/25/2024 12:32:02 - INFO - __main__ - Step 14123: {'lr': 0.0004900954241056094, 'samples': 677904, 'steps': 14122, 'loss/train': 1.5404306650161743} +07/25/2024 12:32:03 - INFO - __main__ - Step 14124: {'lr': 0.0004900939580039797, 'samples': 677952, 'steps': 14123, 'loss/train': 1.7435946464538574} +07/25/2024 12:32:03 - INFO - __main__ - Step 14125: {'lr': 0.0004900924917960429, 'samples': 678000, 'steps': 14124, 'loss/train': 1.8534080982208252} +07/25/2024 12:32:03 - INFO - __main__ - Step 14126: {'lr': 0.0004900910254818001, 'samples': 678048, 'steps': 14125, 'loss/train': 1.792319893836975} +07/25/2024 12:32:04 - INFO - __main__ - Step 14127: {'lr': 0.0004900895590612516, 'samples': 678096, 'steps': 14126, 'loss/train': 0.6852773427963257} +07/25/2024 12:32:04 - INFO - __main__ - Step 14128: {'lr': 0.0004900880925343982, 'samples': 678144, 'steps': 14127, 'loss/train': 1.855085015296936} +07/25/2024 12:32:04 - INFO - __main__ - Step 14129: {'lr': 0.0004900866259012405, 'samples': 678192, 'steps': 14128, 'loss/train': 0.7521795034408569} +07/25/2024 12:32:04 - INFO - __main__ - Step 14130: {'lr': 0.0004900851591617792, 'samples': 678240, 'steps': 14129, 'loss/train': 1.7880679368972778} +07/25/2024 12:32:05 - INFO - __main__ - Step 14131: {'lr': 0.0004900836923160148, 'samples': 678288, 'steps': 14130, 'loss/train': 1.0034682750701904} +07/25/2024 12:32:05 - INFO - __main__ - Step 14132: {'lr': 0.0004900822253639482, 'samples': 678336, 'steps': 14131, 'loss/train': 1.9870152473449707} +07/25/2024 12:32:05 - INFO - __main__ - Step 14133: {'lr': 0.0004900807583055798, 'samples': 678384, 'steps': 14132, 'loss/train': 1.933286190032959} +07/25/2024 12:32:06 - INFO - __main__ - Step 14134: {'lr': 0.0004900792911409106, 'samples': 678432, 'steps': 14133, 'loss/train': 0.5763664841651917} +07/25/2024 12:32:06 - INFO - __main__ - Step 14135: {'lr': 0.0004900778238699408, 'samples': 678480, 'steps': 14134, 'loss/train': 2.5082039833068848} +07/25/2024 12:32:06 - INFO - __main__ - Step 14136: {'lr': 0.0004900763564926713, 'samples': 678528, 'steps': 14135, 'loss/train': 2.0189261436462402} +07/25/2024 12:32:06 - INFO - __main__ - Step 14137: {'lr': 0.0004900748890091028, 'samples': 678576, 'steps': 14136, 'loss/train': 0.23599755764007568} +07/25/2024 12:32:07 - INFO - __main__ - Step 14138: {'lr': 0.0004900734214192358, 'samples': 678624, 'steps': 14137, 'loss/train': 2.739262342453003} +07/25/2024 12:32:07 - INFO - __main__ - Step 14139: {'lr': 0.000490071953723071, 'samples': 678672, 'steps': 14138, 'loss/train': 1.4435874223709106} +07/25/2024 12:32:07 - INFO - __main__ - Step 14140: {'lr': 0.0004900704859206091, 'samples': 678720, 'steps': 14139, 'loss/train': 1.8675531148910522} +07/25/2024 12:32:08 - INFO - __main__ - Step 14141: {'lr': 0.0004900690180118507, 'samples': 678768, 'steps': 14140, 'loss/train': 1.9225001335144043} +07/25/2024 12:32:08 - INFO - __main__ - Step 14142: {'lr': 0.0004900675499967964, 'samples': 678816, 'steps': 14141, 'loss/train': 1.9732005596160889} +07/25/2024 12:32:08 - INFO - __main__ - Step 14143: {'lr': 0.0004900660818754471, 'samples': 678864, 'steps': 14142, 'loss/train': 0.7811542749404907} +07/25/2024 12:32:08 - INFO - __main__ - Step 14144: {'lr': 0.000490064613647803, 'samples': 678912, 'steps': 14143, 'loss/train': 1.1204901933670044} +07/25/2024 12:32:09 - INFO - __main__ - Step 14145: {'lr': 0.0004900631453138653, 'samples': 678960, 'steps': 14144, 'loss/train': 2.1812286376953125} +07/25/2024 12:32:09 - INFO - __main__ - Step 14146: {'lr': 0.0004900616768736343, 'samples': 679008, 'steps': 14145, 'loss/train': 1.077347993850708} +07/25/2024 12:32:09 - INFO - __main__ - Step 14147: {'lr': 0.0004900602083271106, 'samples': 679056, 'steps': 14146, 'loss/train': 1.7653049230575562} +07/25/2024 12:32:10 - INFO - __main__ - Step 14148: {'lr': 0.0004900587396742949, 'samples': 679104, 'steps': 14147, 'loss/train': 1.3775378465652466} +07/25/2024 12:32:10 - INFO - __main__ - Step 14149: {'lr': 0.0004900572709151882, 'samples': 679152, 'steps': 14148, 'loss/train': 1.6849355697631836} +07/25/2024 12:32:10 - INFO - __main__ - Step 14150: {'lr': 0.0004900558020497906, 'samples': 679200, 'steps': 14149, 'loss/train': 1.507136583328247} +07/25/2024 12:32:10 - INFO - __main__ - Step 14151: {'lr': 0.0004900543330781032, 'samples': 679248, 'steps': 14150, 'loss/train': 0.7831141948699951} +07/25/2024 12:32:11 - INFO - __main__ - Step 14152: {'lr': 0.0004900528640001264, 'samples': 679296, 'steps': 14151, 'loss/train': 1.6850980520248413} +07/25/2024 12:32:11 - INFO - __main__ - Step 14153: {'lr': 0.0004900513948158608, 'samples': 679344, 'steps': 14152, 'loss/train': 0.802989661693573} +07/25/2024 12:32:11 - INFO - __main__ - Step 14154: {'lr': 0.0004900499255253072, 'samples': 679392, 'steps': 14153, 'loss/train': 2.1104736328125} +07/25/2024 12:32:12 - INFO - __main__ - Step 14155: {'lr': 0.0004900484561284664, 'samples': 679440, 'steps': 14154, 'loss/train': 1.2628657817840576} +07/25/2024 12:32:12 - INFO - __main__ - Step 14156: {'lr': 0.0004900469866253388, 'samples': 679488, 'steps': 14155, 'loss/train': 1.8892308473587036} +07/25/2024 12:32:12 - INFO - __main__ - Step 14157: {'lr': 0.000490045517015925, 'samples': 679536, 'steps': 14156, 'loss/train': 0.6803908348083496} +07/25/2024 12:32:12 - INFO - __main__ - Step 14158: {'lr': 0.0004900440473002259, 'samples': 679584, 'steps': 14157, 'loss/train': 1.6079044342041016} +07/25/2024 12:32:13 - INFO - __main__ - Step 14159: {'lr': 0.000490042577478242, 'samples': 679632, 'steps': 14158, 'loss/train': 2.46112322807312} +07/25/2024 12:32:13 - INFO - __main__ - Step 14160: {'lr': 0.0004900411075499739, 'samples': 679680, 'steps': 14159, 'loss/train': 1.8051300048828125} +07/25/2024 12:32:13 - INFO - __main__ - Step 14161: {'lr': 0.0004900396375154223, 'samples': 679728, 'steps': 14160, 'loss/train': 0.21503303945064545} +07/25/2024 12:32:13 - INFO - __main__ - Step 14162: {'lr': 0.000490038167374588, 'samples': 679776, 'steps': 14161, 'loss/train': 1.99709153175354} +07/25/2024 12:32:14 - INFO - __main__ - Step 14163: {'lr': 0.0004900366971274714, 'samples': 679824, 'steps': 14162, 'loss/train': 1.2007176876068115} +07/25/2024 12:32:14 - INFO - __main__ - Step 14164: {'lr': 0.0004900352267740735, 'samples': 679872, 'steps': 14163, 'loss/train': 1.4100615978240967} +07/25/2024 12:32:14 - INFO - __main__ - Step 14165: {'lr': 0.0004900337563143945, 'samples': 679920, 'steps': 14164, 'loss/train': 1.2995867729187012} +07/25/2024 12:32:15 - INFO - __main__ - Step 14166: {'lr': 0.0004900322857484354, 'samples': 679968, 'steps': 14165, 'loss/train': 2.3115711212158203} +07/25/2024 12:32:15 - INFO - __main__ - Step 14167: {'lr': 0.0004900308150761967, 'samples': 680016, 'steps': 14166, 'loss/train': 1.1759426593780518} +07/25/2024 12:32:15 - INFO - __main__ - Step 14168: {'lr': 0.000490029344297679, 'samples': 680064, 'steps': 14167, 'loss/train': 2.3996658325195312} +07/25/2024 12:32:15 - INFO - __main__ - Step 14169: {'lr': 0.0004900278734128831, 'samples': 680112, 'steps': 14168, 'loss/train': 2.0984890460968018} +07/25/2024 12:32:16 - INFO - __main__ - Step 14170: {'lr': 0.0004900264024218095, 'samples': 680160, 'steps': 14169, 'loss/train': 1.6736754179000854} +07/25/2024 12:32:16 - INFO - __main__ - Step 14171: {'lr': 0.000490024931324459, 'samples': 680208, 'steps': 14170, 'loss/train': 0.3171278238296509} +07/25/2024 12:32:16 - INFO - __main__ - Step 14172: {'lr': 0.0004900234601208322, 'samples': 680256, 'steps': 14171, 'loss/train': 1.8242677450180054} +07/25/2024 12:32:17 - INFO - __main__ - Step 14173: {'lr': 0.0004900219888109297, 'samples': 680304, 'steps': 14172, 'loss/train': 1.4536402225494385} +07/25/2024 12:32:17 - INFO - __main__ - Step 14174: {'lr': 0.0004900205173947522, 'samples': 680352, 'steps': 14173, 'loss/train': 1.644002079963684} +07/25/2024 12:32:17 - INFO - __main__ - Step 14175: {'lr': 0.0004900190458723005, 'samples': 680400, 'steps': 14174, 'loss/train': 1.0725526809692383} +07/25/2024 12:32:17 - INFO - __main__ - Step 14176: {'lr': 0.0004900175742435749, 'samples': 680448, 'steps': 14175, 'loss/train': 1.2875713109970093} +07/25/2024 12:32:18 - INFO - __main__ - Step 14177: {'lr': 0.0004900161025085763, 'samples': 680496, 'steps': 14176, 'loss/train': 0.7649598121643066} +07/25/2024 12:32:18 - INFO - __main__ - Step 14178: {'lr': 0.0004900146306673054, 'samples': 680544, 'steps': 14177, 'loss/train': 1.4189177751541138} +07/25/2024 12:32:18 - INFO - __main__ - Step 14179: {'lr': 0.0004900131587197626, 'samples': 680592, 'steps': 14178, 'loss/train': 0.9760828614234924} +07/25/2024 12:32:19 - INFO - __main__ - Step 14180: {'lr': 0.0004900116866659487, 'samples': 680640, 'steps': 14179, 'loss/train': 1.9142836332321167} +07/25/2024 12:32:19 - INFO - __main__ - Step 14181: {'lr': 0.0004900102145058644, 'samples': 680688, 'steps': 14180, 'loss/train': 1.523174524307251} +07/25/2024 12:32:19 - INFO - __main__ - Step 14182: {'lr': 0.0004900087422395102, 'samples': 680736, 'steps': 14181, 'loss/train': 1.6914228200912476} +07/25/2024 12:32:19 - INFO - __main__ - Step 14183: {'lr': 0.0004900072698668871, 'samples': 680784, 'steps': 14182, 'loss/train': 1.6363850831985474} +07/25/2024 12:32:20 - INFO - __main__ - Step 14184: {'lr': 0.0004900057973879952, 'samples': 680832, 'steps': 14183, 'loss/train': 1.8825932741165161} +07/25/2024 12:32:20 - INFO - __main__ - Step 14185: {'lr': 0.0004900043248028356, 'samples': 680880, 'steps': 14184, 'loss/train': 0.15540586411952972} +07/25/2024 12:32:20 - INFO - __main__ - Step 14186: {'lr': 0.0004900028521114089, 'samples': 680928, 'steps': 14185, 'loss/train': 1.9623407125473022} +07/25/2024 12:32:21 - INFO - __main__ - Step 14187: {'lr': 0.0004900013793137155, 'samples': 680976, 'steps': 14186, 'loss/train': 1.9346412420272827} +07/25/2024 12:32:21 - INFO - __main__ - Step 14188: {'lr': 0.0004899999064097564, 'samples': 681024, 'steps': 14187, 'loss/train': 1.552310585975647} +07/25/2024 12:32:21 - INFO - __main__ - Step 14189: {'lr': 0.0004899984333995319, 'samples': 681072, 'steps': 14188, 'loss/train': 1.717971682548523} +07/25/2024 12:32:21 - INFO - __main__ - Step 14190: {'lr': 0.0004899969602830429, 'samples': 681120, 'steps': 14189, 'loss/train': 2.6733994483947754} +07/25/2024 12:32:22 - INFO - __main__ - Step 14191: {'lr': 0.00048999548706029, 'samples': 681168, 'steps': 14190, 'loss/train': 0.8648079633712769} +07/25/2024 12:32:22 - INFO - __main__ - Step 14192: {'lr': 0.0004899940137312737, 'samples': 681216, 'steps': 14191, 'loss/train': 2.3495612144470215} +07/25/2024 12:32:22 - INFO - __main__ - Step 14193: {'lr': 0.0004899925402959949, 'samples': 681264, 'steps': 14192, 'loss/train': 1.7313930988311768} +07/25/2024 12:32:23 - INFO - __main__ - Step 14194: {'lr': 0.000489991066754454, 'samples': 681312, 'steps': 14193, 'loss/train': 1.8155087232589722} +07/25/2024 12:32:23 - INFO - __main__ - Step 14195: {'lr': 0.0004899895931066518, 'samples': 681360, 'steps': 14194, 'loss/train': 0.1177433431148529} +07/25/2024 12:32:23 - INFO - __main__ - Step 14196: {'lr': 0.000489988119352589, 'samples': 681408, 'steps': 14195, 'loss/train': 1.9399077892303467} +07/25/2024 12:32:23 - INFO - __main__ - Step 14197: {'lr': 0.0004899866454922663, 'samples': 681456, 'steps': 14196, 'loss/train': 1.8696922063827515} +07/25/2024 12:32:24 - INFO - __main__ - Step 14198: {'lr': 0.000489985171525684, 'samples': 681504, 'steps': 14197, 'loss/train': 1.899850606918335} +07/25/2024 12:32:24 - INFO - __main__ - Step 14199: {'lr': 0.0004899836974528431, 'samples': 681552, 'steps': 14198, 'loss/train': 1.252732753753662} +07/25/2024 12:32:24 - INFO - __main__ - Step 14200: {'lr': 0.0004899822232737442, 'samples': 681600, 'steps': 14199, 'loss/train': 2.132788896560669} +07/25/2024 12:32:25 - INFO - __main__ - Step 14201: {'lr': 0.0004899807489883878, 'samples': 681648, 'steps': 14200, 'loss/train': 0.6928120851516724} +07/25/2024 12:32:25 - INFO - __main__ - Step 14202: {'lr': 0.0004899792745967748, 'samples': 681696, 'steps': 14201, 'loss/train': 1.8631517887115479} +07/25/2024 12:32:25 - INFO - __main__ - Step 14203: {'lr': 0.0004899778000989055, 'samples': 681744, 'steps': 14202, 'loss/train': 0.9627642035484314} +07/25/2024 12:32:25 - INFO - __main__ - Step 14204: {'lr': 0.0004899763254947808, 'samples': 681792, 'steps': 14203, 'loss/train': 2.330838680267334} +07/25/2024 12:32:26 - INFO - __main__ - Step 14205: {'lr': 0.0004899748507844014, 'samples': 681840, 'steps': 14204, 'loss/train': 1.9623061418533325} +07/25/2024 12:32:26 - INFO - __main__ - Step 14206: {'lr': 0.0004899733759677678, 'samples': 681888, 'steps': 14205, 'loss/train': 0.8614903092384338} +07/25/2024 12:32:26 - INFO - __main__ - Step 14207: {'lr': 0.0004899719010448807, 'samples': 681936, 'steps': 14206, 'loss/train': 1.569187045097351} +07/25/2024 12:32:27 - INFO - __main__ - Step 14208: {'lr': 0.0004899704260157409, 'samples': 681984, 'steps': 14207, 'loss/train': 1.9155571460723877} +07/25/2024 12:32:27 - INFO - __main__ - Step 14209: {'lr': 0.0004899689508803487, 'samples': 682032, 'steps': 14208, 'loss/train': 0.23126867413520813} +07/25/2024 12:32:27 - INFO - __main__ - Step 14210: {'lr': 0.0004899674756387051, 'samples': 682080, 'steps': 14209, 'loss/train': 1.9332444667816162} +07/25/2024 12:32:27 - INFO - __main__ - Step 14211: {'lr': 0.0004899660002908107, 'samples': 682128, 'steps': 14210, 'loss/train': 1.6671910285949707} +07/25/2024 12:32:28 - INFO - __main__ - Step 14212: {'lr': 0.0004899645248366658, 'samples': 682176, 'steps': 14211, 'loss/train': 2.2815582752227783} +07/25/2024 12:32:28 - INFO - __main__ - Step 14213: {'lr': 0.0004899630492762716, 'samples': 682224, 'steps': 14212, 'loss/train': 2.038682460784912} +07/25/2024 12:32:28 - INFO - __main__ - Step 14214: {'lr': 0.0004899615736096283, 'samples': 682272, 'steps': 14213, 'loss/train': 2.7496416568756104} +07/25/2024 12:32:29 - INFO - __main__ - Step 14215: {'lr': 0.0004899600978367369, 'samples': 682320, 'steps': 14214, 'loss/train': 0.6119776368141174} +07/25/2024 12:32:29 - INFO - __main__ - Step 14216: {'lr': 0.0004899586219575979, 'samples': 682368, 'steps': 14215, 'loss/train': 1.9675147533416748} +07/25/2024 12:32:29 - INFO - __main__ - Step 14217: {'lr': 0.0004899571459722118, 'samples': 682416, 'steps': 14216, 'loss/train': 2.0415635108947754} +07/25/2024 12:32:29 - INFO - __main__ - Step 14218: {'lr': 0.0004899556698805795, 'samples': 682464, 'steps': 14217, 'loss/train': 1.896063208580017} +07/25/2024 12:32:30 - INFO - __main__ - Step 14219: {'lr': 0.0004899541936827015, 'samples': 682512, 'steps': 14218, 'loss/train': 0.1545640230178833} +07/25/2024 12:32:30 - INFO - __main__ - Step 14220: {'lr': 0.0004899527173785786, 'samples': 682560, 'steps': 14219, 'loss/train': 1.6145174503326416} +07/25/2024 12:32:30 - INFO - __main__ - Step 14221: {'lr': 0.0004899512409682113, 'samples': 682608, 'steps': 14220, 'loss/train': 1.7808915376663208} +07/25/2024 12:32:31 - INFO - __main__ - Step 14222: {'lr': 0.0004899497644516004, 'samples': 682656, 'steps': 14221, 'loss/train': 2.295703887939453} +07/25/2024 12:32:31 - INFO - __main__ - Step 14223: {'lr': 0.0004899482878287463, 'samples': 682704, 'steps': 14222, 'loss/train': 1.0182514190673828} +07/25/2024 12:32:31 - INFO - __main__ - Step 14224: {'lr': 0.0004899468110996499, 'samples': 682752, 'steps': 14223, 'loss/train': 2.186788558959961} +07/25/2024 12:32:31 - INFO - __main__ - Step 14225: {'lr': 0.0004899453342643117, 'samples': 682800, 'steps': 14224, 'loss/train': 0.7154544591903687} +07/25/2024 12:32:32 - INFO - __main__ - Step 14226: {'lr': 0.0004899438573227325, 'samples': 682848, 'steps': 14225, 'loss/train': 1.9309645891189575} +07/25/2024 12:32:32 - INFO - __main__ - Step 14227: {'lr': 0.000489942380274913, 'samples': 682896, 'steps': 14226, 'loss/train': 1.0008758306503296} +07/25/2024 12:32:32 - INFO - __main__ - Step 14228: {'lr': 0.0004899409031208536, 'samples': 682944, 'steps': 14227, 'loss/train': 1.9093865156173706} +07/25/2024 12:32:33 - INFO - __main__ - Step 14229: {'lr': 0.000489939425860555, 'samples': 682992, 'steps': 14228, 'loss/train': 2.0399532318115234} +07/25/2024 12:32:33 - INFO - __main__ - Step 14230: {'lr': 0.0004899379484940182, 'samples': 683040, 'steps': 14229, 'loss/train': 1.6523076295852661} +07/25/2024 12:32:33 - INFO - __main__ - Step 14231: {'lr': 0.0004899364710212433, 'samples': 683088, 'steps': 14230, 'loss/train': 0.9160648584365845} +07/25/2024 12:32:33 - INFO - __main__ - Step 14232: {'lr': 0.0004899349934422315, 'samples': 683136, 'steps': 14231, 'loss/train': 2.1400623321533203} +07/25/2024 12:32:34 - INFO - __main__ - Step 14233: {'lr': 0.0004899335157569831, 'samples': 683184, 'steps': 14232, 'loss/train': 0.2244376242160797} +07/25/2024 12:32:34 - INFO - __main__ - Step 14234: {'lr': 0.0004899320379654989, 'samples': 683232, 'steps': 14233, 'loss/train': 2.126192331314087} +07/25/2024 12:32:34 - INFO - __main__ - Step 14235: {'lr': 0.0004899305600677796, 'samples': 683280, 'steps': 14234, 'loss/train': 1.8911951780319214} +07/25/2024 12:32:35 - INFO - __main__ - Step 14236: {'lr': 0.0004899290820638256, 'samples': 683328, 'steps': 14235, 'loss/train': 2.0283737182617188} +07/25/2024 12:32:35 - INFO - __main__ - Step 14237: {'lr': 0.0004899276039536378, 'samples': 683376, 'steps': 14236, 'loss/train': 2.0855112075805664} +07/25/2024 12:32:35 - INFO - __main__ - Step 14238: {'lr': 0.0004899261257372168, 'samples': 683424, 'steps': 14237, 'loss/train': 2.1014585494995117} +07/25/2024 12:32:35 - INFO - __main__ - Step 14239: {'lr': 0.0004899246474145632, 'samples': 683472, 'steps': 14238, 'loss/train': 0.7739297747612} +07/25/2024 12:32:36 - INFO - __main__ - Step 14240: {'lr': 0.0004899231689856776, 'samples': 683520, 'steps': 14239, 'loss/train': 1.9374028444290161} +07/25/2024 12:32:36 - INFO - __main__ - Step 14241: {'lr': 0.0004899216904505609, 'samples': 683568, 'steps': 14240, 'loss/train': 2.6438405513763428} +07/25/2024 12:32:36 - DEBUG - datasets.packaged_modules.json.json - Batch of 10572592 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 12:32:36 - INFO - __main__ - Step 14242: {'lr': 0.0004899202118092135, 'samples': 683616, 'steps': 14241, 'loss/train': 1.5578539371490479} +07/25/2024 12:32:37 - INFO - __main__ - Step 14243: {'lr': 0.0004899187330616362, 'samples': 683664, 'steps': 14242, 'loss/train': 0.17412309348583221} +07/25/2024 12:32:37 - INFO - __main__ - Step 14244: {'lr': 0.0004899172542078297, 'samples': 683712, 'steps': 14243, 'loss/train': 2.3362791538238525} +07/25/2024 12:32:37 - INFO - __main__ - Step 14245: {'lr': 0.0004899157752477944, 'samples': 683760, 'steps': 14244, 'loss/train': 1.69489324092865} +07/25/2024 12:32:37 - INFO - __main__ - Step 14246: {'lr': 0.0004899142961815312, 'samples': 683808, 'steps': 14245, 'loss/train': 1.5131374597549438} +07/25/2024 12:32:38 - INFO - __main__ - Step 14247: {'lr': 0.0004899128170090406, 'samples': 683856, 'steps': 14246, 'loss/train': 1.626941204071045} +07/25/2024 12:32:38 - INFO - __main__ - Step 14248: {'lr': 0.0004899113377303234, 'samples': 683904, 'steps': 14247, 'loss/train': 1.740498661994934} +07/25/2024 12:32:38 - INFO - __main__ - Step 14249: {'lr': 0.0004899098583453802, 'samples': 683952, 'steps': 14248, 'loss/train': 0.8444044589996338} +07/25/2024 12:32:38 - INFO - __main__ - Step 14250: {'lr': 0.0004899083788542116, 'samples': 684000, 'steps': 14249, 'loss/train': 1.5308791399002075} +07/25/2024 12:32:39 - INFO - __main__ - Step 14251: {'lr': 0.0004899068992568183, 'samples': 684048, 'steps': 14250, 'loss/train': 0.8991661667823792} +07/25/2024 12:32:39 - INFO - __main__ - Step 14252: {'lr': 0.0004899054195532009, 'samples': 684096, 'steps': 14251, 'loss/train': 2.1258506774902344} +07/25/2024 12:32:39 - INFO - __main__ - Step 14253: {'lr': 0.0004899039397433601, 'samples': 684144, 'steps': 14252, 'loss/train': 1.4740612506866455} +07/25/2024 12:32:40 - INFO - __main__ - Step 14254: {'lr': 0.0004899024598272966, 'samples': 684192, 'steps': 14253, 'loss/train': 1.547959804534912} +07/25/2024 12:32:40 - INFO - __main__ - Step 14255: {'lr': 0.000489900979805011, 'samples': 684240, 'steps': 14254, 'loss/train': 1.0422362089157104} +07/25/2024 12:32:40 - INFO - __main__ - Step 14256: {'lr': 0.0004898994996765041, 'samples': 684288, 'steps': 14255, 'loss/train': 1.2566941976547241} +07/25/2024 12:32:40 - INFO - __main__ - Step 14257: {'lr': 0.0004898980194417762, 'samples': 684336, 'steps': 14256, 'loss/train': 0.41699448227882385} +07/25/2024 12:32:41 - INFO - __main__ - Step 14258: {'lr': 0.0004898965391008283, 'samples': 684384, 'steps': 14257, 'loss/train': 1.861204981803894} +07/25/2024 12:32:41 - INFO - __main__ - Step 14259: {'lr': 0.0004898950586536609, 'samples': 684432, 'steps': 14258, 'loss/train': 2.032932758331299} +07/25/2024 12:32:41 - INFO - __main__ - Step 14260: {'lr': 0.0004898935781002747, 'samples': 684480, 'steps': 14259, 'loss/train': 2.151301860809326} +07/25/2024 12:32:42 - INFO - __main__ - Step 14261: {'lr': 0.0004898920974406705, 'samples': 684528, 'steps': 14260, 'loss/train': 2.393040657043457} +07/25/2024 12:32:42 - INFO - __main__ - Step 14262: {'lr': 0.0004898906166748485, 'samples': 684576, 'steps': 14261, 'loss/train': 1.9292956590652466} +07/25/2024 12:32:42 - INFO - __main__ - Step 14263: {'lr': 0.0004898891358028098, 'samples': 684624, 'steps': 14262, 'loss/train': 0.6808069348335266} +07/25/2024 12:32:42 - INFO - __main__ - Step 14264: {'lr': 0.000489887654824555, 'samples': 684672, 'steps': 14263, 'loss/train': 1.6265345811843872} +07/25/2024 12:32:43 - INFO - __main__ - Step 14265: {'lr': 0.0004898861737400846, 'samples': 684720, 'steps': 14264, 'loss/train': 2.0170228481292725} +07/25/2024 12:32:43 - INFO - __main__ - Step 14266: {'lr': 0.0004898846925493992, 'samples': 684768, 'steps': 14265, 'loss/train': 1.930497169494629} +07/25/2024 12:32:43 - INFO - __main__ - Step 14267: {'lr': 0.0004898832112524997, 'samples': 684816, 'steps': 14266, 'loss/train': 0.14619645476341248} +07/25/2024 12:32:44 - INFO - __main__ - Step 14268: {'lr': 0.0004898817298493867, 'samples': 684864, 'steps': 14267, 'loss/train': 1.7768940925598145} +07/25/2024 12:32:44 - INFO - __main__ - Step 14269: {'lr': 0.0004898802483400607, 'samples': 684912, 'steps': 14268, 'loss/train': 2.3068654537200928} +07/25/2024 12:32:44 - INFO - __main__ - Step 14270: {'lr': 0.0004898787667245224, 'samples': 684960, 'steps': 14269, 'loss/train': 2.2245655059814453} +07/25/2024 12:32:44 - INFO - __main__ - Step 14271: {'lr': 0.0004898772850027726, 'samples': 685008, 'steps': 14270, 'loss/train': 2.172574043273926} +07/25/2024 12:32:45 - INFO - __main__ - Step 14272: {'lr': 0.0004898758031748119, 'samples': 685056, 'steps': 14271, 'loss/train': 1.6227408647537231} +07/25/2024 12:32:45 - INFO - __main__ - Step 14273: {'lr': 0.0004898743212406408, 'samples': 685104, 'steps': 14272, 'loss/train': 0.7867647409439087} +07/25/2024 12:32:45 - INFO - __main__ - Step 14274: {'lr': 0.0004898728392002601, 'samples': 685152, 'steps': 14273, 'loss/train': 1.8216679096221924} +07/25/2024 12:32:46 - INFO - __main__ - Step 14275: {'lr': 0.0004898713570536705, 'samples': 685200, 'steps': 14274, 'loss/train': 0.9280842542648315} +07/25/2024 12:32:46 - INFO - __main__ - Step 14276: {'lr': 0.0004898698748008725, 'samples': 685248, 'steps': 14275, 'loss/train': 1.785351276397705} +07/25/2024 12:32:46 - INFO - __main__ - Step 14277: {'lr': 0.0004898683924418668, 'samples': 685296, 'steps': 14276, 'loss/train': 1.5304027795791626} +07/25/2024 12:32:46 - INFO - __main__ - Step 14278: {'lr': 0.0004898669099766542, 'samples': 685344, 'steps': 14277, 'loss/train': 2.132934093475342} +07/25/2024 12:32:47 - INFO - __main__ - Step 14279: {'lr': 0.0004898654274052353, 'samples': 685392, 'steps': 14278, 'loss/train': 2.159926176071167} +07/25/2024 12:32:47 - INFO - __main__ - Step 14280: {'lr': 0.0004898639447276105, 'samples': 685440, 'steps': 14279, 'loss/train': 1.7576847076416016} +07/25/2024 12:32:47 - INFO - __main__ - Step 14281: {'lr': 0.0004898624619437808, 'samples': 685488, 'steps': 14280, 'loss/train': 1.1534682512283325} +07/25/2024 12:32:48 - INFO - __main__ - Step 14282: {'lr': 0.0004898609790537468, 'samples': 685536, 'steps': 14281, 'loss/train': 1.8456257581710815} +07/25/2024 12:32:48 - INFO - __main__ - Step 14283: {'lr': 0.0004898594960575089, 'samples': 685584, 'steps': 14282, 'loss/train': 1.9177727699279785} +07/25/2024 12:32:48 - INFO - __main__ - Step 14284: {'lr': 0.0004898580129550681, 'samples': 685632, 'steps': 14283, 'loss/train': 1.714345932006836} +07/25/2024 12:32:48 - INFO - __main__ - Step 14285: {'lr': 0.0004898565297464248, 'samples': 685680, 'steps': 14284, 'loss/train': 1.5481245517730713} +07/25/2024 12:32:49 - INFO - __main__ - Step 14286: {'lr': 0.0004898550464315798, 'samples': 685728, 'steps': 14285, 'loss/train': 1.6317542791366577} +07/25/2024 12:32:49 - INFO - __main__ - Step 14287: {'lr': 0.0004898535630105336, 'samples': 685776, 'steps': 14286, 'loss/train': 0.5113502740859985} +07/25/2024 12:32:49 - INFO - __main__ - Step 14288: {'lr': 0.0004898520794832872, 'samples': 685824, 'steps': 14287, 'loss/train': 1.7123496532440186} +07/25/2024 12:32:50 - INFO - __main__ - Step 14289: {'lr': 0.0004898505958498407, 'samples': 685872, 'steps': 14288, 'loss/train': 0.5392670035362244} +07/25/2024 12:32:50 - INFO - __main__ - Step 14290: {'lr': 0.0004898491121101954, 'samples': 685920, 'steps': 14289, 'loss/train': 1.507386565208435} +07/25/2024 12:32:50 - INFO - __main__ - Step 14291: {'lr': 0.0004898476282643513, 'samples': 685968, 'steps': 14290, 'loss/train': 0.1749221384525299} +07/25/2024 12:32:50 - INFO - __main__ - Step 14292: {'lr': 0.0004898461443123097, 'samples': 686016, 'steps': 14291, 'loss/train': 2.0131518840789795} +07/25/2024 12:32:51 - INFO - __main__ - Step 14293: {'lr': 0.0004898446602540707, 'samples': 686064, 'steps': 14292, 'loss/train': 1.7802455425262451} +07/25/2024 12:32:51 - INFO - __main__ - Step 14294: {'lr': 0.0004898431760896354, 'samples': 686112, 'steps': 14293, 'loss/train': 1.5819759368896484} +07/25/2024 12:32:51 - INFO - __main__ - Step 14295: {'lr': 0.0004898416918190042, 'samples': 686160, 'steps': 14294, 'loss/train': 1.93607497215271} +07/25/2024 12:32:52 - INFO - __main__ - Step 14296: {'lr': 0.0004898402074421778, 'samples': 686208, 'steps': 14295, 'loss/train': 1.9996483325958252} +07/25/2024 12:32:52 - INFO - __main__ - Step 14297: {'lr': 0.0004898387229591569, 'samples': 686256, 'steps': 14296, 'loss/train': 0.7420142889022827} +07/25/2024 12:32:52 - INFO - __main__ - Step 14298: {'lr': 0.0004898372383699421, 'samples': 686304, 'steps': 14297, 'loss/train': 1.9745818376541138} +07/25/2024 12:32:52 - INFO - __main__ - Step 14299: {'lr': 0.000489835753674534, 'samples': 686352, 'steps': 14298, 'loss/train': 0.9033025503158569} +07/25/2024 12:32:53 - INFO - __main__ - Step 14300: {'lr': 0.0004898342688729334, 'samples': 686400, 'steps': 14299, 'loss/train': 2.3077149391174316} +07/25/2024 12:32:53 - INFO - __main__ - Step 14301: {'lr': 0.000489832783965141, 'samples': 686448, 'steps': 14300, 'loss/train': 2.2390222549438477} +07/25/2024 12:32:53 - INFO - __main__ - Step 14302: {'lr': 0.0004898312989511573, 'samples': 686496, 'steps': 14301, 'loss/train': 2.048783779144287} +07/25/2024 12:32:54 - INFO - __main__ - Step 14303: {'lr': 0.0004898298138309831, 'samples': 686544, 'steps': 14302, 'loss/train': 2.530980110168457} +07/25/2024 12:32:54 - INFO - __main__ - Step 14304: {'lr': 0.0004898283286046189, 'samples': 686592, 'steps': 14303, 'loss/train': 1.7529677152633667} +07/25/2024 12:32:54 - INFO - __main__ - Step 14305: {'lr': 0.0004898268432720654, 'samples': 686640, 'steps': 14304, 'loss/train': 1.0772699117660522} +07/25/2024 12:32:54 - INFO - __main__ - Step 14306: {'lr': 0.0004898253578333233, 'samples': 686688, 'steps': 14305, 'loss/train': 1.955263376235962} +07/25/2024 12:32:55 - INFO - __main__ - Step 14307: {'lr': 0.0004898238722883933, 'samples': 686736, 'steps': 14306, 'loss/train': 1.9443919658660889} +07/25/2024 12:32:55 - INFO - __main__ - Step 14308: {'lr': 0.0004898223866372761, 'samples': 686784, 'steps': 14307, 'loss/train': 1.8136627674102783} +07/25/2024 12:32:55 - INFO - __main__ - Step 14309: {'lr': 0.0004898209008799722, 'samples': 686832, 'steps': 14308, 'loss/train': 2.153872013092041} +07/25/2024 12:32:56 - INFO - __main__ - Step 14310: {'lr': 0.0004898194150164823, 'samples': 686880, 'steps': 14309, 'loss/train': 1.6587032079696655} +07/25/2024 12:32:56 - INFO - __main__ - Step 14311: {'lr': 0.000489817929046807, 'samples': 686928, 'steps': 14310, 'loss/train': 0.7437333464622498} +07/25/2024 12:32:56 - INFO - __main__ - Step 14312: {'lr': 0.0004898164429709472, 'samples': 686976, 'steps': 14311, 'loss/train': 1.8632129430770874} +07/25/2024 12:32:56 - INFO - __main__ - Step 14313: {'lr': 0.0004898149567889033, 'samples': 687024, 'steps': 14312, 'loss/train': 0.5434642434120178} +07/25/2024 12:32:57 - INFO - __main__ - Step 14314: {'lr': 0.000489813470500676, 'samples': 687072, 'steps': 14313, 'loss/train': 1.5197237730026245} +07/25/2024 12:32:57 - INFO - __main__ - Step 14315: {'lr': 0.0004898119841062661, 'samples': 687120, 'steps': 14314, 'loss/train': 0.18229445815086365} +07/25/2024 12:32:57 - INFO - __main__ - Step 14316: {'lr': 0.0004898104976056743, 'samples': 687168, 'steps': 14315, 'loss/train': 1.88733971118927} +07/25/2024 12:32:57 - INFO - __main__ - Step 14317: {'lr': 0.0004898090109989008, 'samples': 687216, 'steps': 14316, 'loss/train': 2.0685322284698486} +07/25/2024 12:32:58 - INFO - __main__ - Step 14318: {'lr': 0.0004898075242859469, 'samples': 687264, 'steps': 14317, 'loss/train': 1.8542397022247314} +07/25/2024 12:32:58 - INFO - __main__ - Step 14319: {'lr': 0.0004898060374668128, 'samples': 687312, 'steps': 14318, 'loss/train': 1.8708484172821045} +07/25/2024 12:32:58 - INFO - __main__ - Step 14320: {'lr': 0.0004898045505414993, 'samples': 687360, 'steps': 14319, 'loss/train': 1.5564846992492676} +07/25/2024 12:32:59 - INFO - __main__ - Step 14321: {'lr': 0.0004898030635100072, 'samples': 687408, 'steps': 14320, 'loss/train': 0.7300568222999573} +07/25/2024 12:32:59 - INFO - __main__ - Step 14322: {'lr': 0.0004898015763723369, 'samples': 687456, 'steps': 14321, 'loss/train': 1.4850140810012817} +07/25/2024 12:32:59 - INFO - __main__ - Step 14323: {'lr': 0.0004898000891284892, 'samples': 687504, 'steps': 14322, 'loss/train': 0.9319223165512085} +07/25/2024 12:32:59 - INFO - __main__ - Step 14324: {'lr': 0.0004897986017784647, 'samples': 687552, 'steps': 14323, 'loss/train': 1.4697879552841187} +07/25/2024 12:33:00 - INFO - __main__ - Step 14325: {'lr': 0.0004897971143222642, 'samples': 687600, 'steps': 14324, 'loss/train': 1.9437838792800903} +07/25/2024 12:33:00 - INFO - __main__ - Step 14326: {'lr': 0.0004897956267598881, 'samples': 687648, 'steps': 14325, 'loss/train': 2.3194501399993896} +07/25/2024 12:33:00 - INFO - __main__ - Step 14327: {'lr': 0.0004897941390913374, 'samples': 687696, 'steps': 14326, 'loss/train': 1.7482773065567017} +07/25/2024 12:33:01 - INFO - __main__ - Step 14328: {'lr': 0.0004897926513166125, 'samples': 687744, 'steps': 14327, 'loss/train': 2.485689878463745} +07/25/2024 12:33:01 - INFO - __main__ - Step 14329: {'lr': 0.000489791163435714, 'samples': 687792, 'steps': 14328, 'loss/train': 1.4586806297302246} +07/25/2024 12:33:01 - INFO - __main__ - Step 14330: {'lr': 0.0004897896754486429, 'samples': 687840, 'steps': 14329, 'loss/train': 1.8500429391860962} +07/25/2024 12:33:01 - INFO - __main__ - Step 14331: {'lr': 0.0004897881873553995, 'samples': 687888, 'steps': 14330, 'loss/train': 1.7719943523406982} +07/25/2024 12:33:02 - INFO - __main__ - Step 14332: {'lr': 0.0004897866991559847, 'samples': 687936, 'steps': 14331, 'loss/train': 1.7667381763458252} +07/25/2024 12:33:02 - INFO - __main__ - Step 14333: {'lr': 0.0004897852108503991, 'samples': 687984, 'steps': 14332, 'loss/train': 1.8963117599487305} +07/25/2024 12:33:02 - INFO - __main__ - Step 14334: {'lr': 0.0004897837224386432, 'samples': 688032, 'steps': 14333, 'loss/train': 1.4812414646148682} +07/25/2024 12:33:03 - INFO - __main__ - Step 14335: {'lr': 0.0004897822339207179, 'samples': 688080, 'steps': 14334, 'loss/train': 0.6262151002883911} +07/25/2024 12:33:03 - INFO - __main__ - Step 14336: {'lr': 0.0004897807452966237, 'samples': 688128, 'steps': 14335, 'loss/train': 1.446424126625061} +07/25/2024 12:33:03 - INFO - __main__ - Step 14337: {'lr': 0.0004897792565663613, 'samples': 688176, 'steps': 14336, 'loss/train': 1.6761893033981323} +07/25/2024 12:33:03 - INFO - __main__ - Step 14338: {'lr': 0.0004897777677299313, 'samples': 688224, 'steps': 14337, 'loss/train': 0.5063796639442444} +07/25/2024 12:33:04 - INFO - __main__ - Step 14339: {'lr': 0.0004897762787873345, 'samples': 688272, 'steps': 14338, 'loss/train': 0.17525075376033783} +07/25/2024 12:33:04 - INFO - __main__ - Step 14340: {'lr': 0.0004897747897385715, 'samples': 688320, 'steps': 14339, 'loss/train': 1.912131667137146} +07/25/2024 12:33:04 - INFO - __main__ - Step 14341: {'lr': 0.0004897733005836429, 'samples': 688368, 'steps': 14340, 'loss/train': 2.228736162185669} +07/25/2024 12:33:05 - INFO - __main__ - Step 14342: {'lr': 0.0004897718113225495, 'samples': 688416, 'steps': 14341, 'loss/train': 1.8536553382873535} +07/25/2024 12:33:05 - INFO - __main__ - Step 14343: {'lr': 0.0004897703219552917, 'samples': 688464, 'steps': 14342, 'loss/train': 1.5797600746154785} +07/25/2024 12:33:05 - INFO - __main__ - Step 14344: {'lr': 0.0004897688324818705, 'samples': 688512, 'steps': 14343, 'loss/train': 1.5127612352371216} +07/25/2024 12:33:05 - INFO - __main__ - Step 14345: {'lr': 0.0004897673429022862, 'samples': 688560, 'steps': 14344, 'loss/train': 0.6856855154037476} +07/25/2024 12:33:06 - INFO - __main__ - Step 14346: {'lr': 0.0004897658532165397, 'samples': 688608, 'steps': 14345, 'loss/train': 1.8747761249542236} +07/25/2024 12:33:06 - INFO - __main__ - Step 14347: {'lr': 0.0004897643634246316, 'samples': 688656, 'steps': 14346, 'loss/train': 0.8008133172988892} +07/25/2024 12:33:06 - INFO - __main__ - Step 14348: {'lr': 0.0004897628735265625, 'samples': 688704, 'steps': 14347, 'loss/train': 1.6521291732788086} +07/25/2024 12:33:07 - INFO - __main__ - Step 14349: {'lr': 0.0004897613835223333, 'samples': 688752, 'steps': 14348, 'loss/train': 0.5306215286254883} +07/25/2024 12:33:07 - INFO - __main__ - Step 14350: {'lr': 0.0004897598934119443, 'samples': 688800, 'steps': 14349, 'loss/train': 1.75392746925354} +07/25/2024 12:33:07 - INFO - __main__ - Step 14351: {'lr': 0.0004897584031953964, 'samples': 688848, 'steps': 14350, 'loss/train': 1.426695466041565} +07/25/2024 12:33:07 - INFO - __main__ - Step 14352: {'lr': 0.0004897569128726902, 'samples': 688896, 'steps': 14351, 'loss/train': 2.8489363193511963} +07/25/2024 12:33:08 - INFO - __main__ - Step 14353: {'lr': 0.0004897554224438265, 'samples': 688944, 'steps': 14352, 'loss/train': 1.2651339769363403} +07/25/2024 12:33:08 - INFO - __main__ - Step 14354: {'lr': 0.0004897539319088055, 'samples': 688992, 'steps': 14353, 'loss/train': 1.738686442375183} +07/25/2024 12:33:08 - INFO - __main__ - Step 14355: {'lr': 0.0004897524412676284, 'samples': 689040, 'steps': 14354, 'loss/train': 1.843855381011963} +07/25/2024 12:33:09 - INFO - __main__ - Step 14356: {'lr': 0.0004897509505202956, 'samples': 689088, 'steps': 14355, 'loss/train': 1.622258186340332} +07/25/2024 12:33:09 - INFO - __main__ - Step 14357: {'lr': 0.0004897494596668078, 'samples': 689136, 'steps': 14356, 'loss/train': 1.7073594331741333} +07/25/2024 12:33:09 - INFO - __main__ - Step 14358: {'lr': 0.0004897479687071656, 'samples': 689184, 'steps': 14357, 'loss/train': 2.1406636238098145} +07/25/2024 12:33:09 - INFO - __main__ - Step 14359: {'lr': 0.0004897464776413698, 'samples': 689232, 'steps': 14358, 'loss/train': 0.8163815140724182} +07/25/2024 12:33:10 - INFO - __main__ - Step 14360: {'lr': 0.000489744986469421, 'samples': 689280, 'steps': 14359, 'loss/train': 2.567620277404785} +07/25/2024 12:33:10 - INFO - __main__ - Step 14361: {'lr': 0.0004897434951913198, 'samples': 689328, 'steps': 14360, 'loss/train': 1.774897575378418} +07/25/2024 12:33:10 - INFO - __main__ - Step 14362: {'lr': 0.0004897420038070668, 'samples': 689376, 'steps': 14361, 'loss/train': 1.1749122142791748} +07/25/2024 12:33:11 - INFO - __main__ - Step 14363: {'lr': 0.0004897405123166628, 'samples': 689424, 'steps': 14362, 'loss/train': 0.291465699672699} +07/25/2024 12:33:11 - INFO - __main__ - Step 14364: {'lr': 0.0004897390207201085, 'samples': 689472, 'steps': 14363, 'loss/train': 1.7194156646728516} +07/25/2024 12:33:11 - INFO - __main__ - Step 14365: {'lr': 0.0004897375290174044, 'samples': 689520, 'steps': 14364, 'loss/train': 2.4221267700195312} +07/25/2024 12:33:11 - INFO - __main__ - Step 14366: {'lr': 0.0004897360372085513, 'samples': 689568, 'steps': 14365, 'loss/train': 1.880120038986206} +07/25/2024 12:33:12 - INFO - __main__ - Step 14367: {'lr': 0.0004897345452935497, 'samples': 689616, 'steps': 14366, 'loss/train': 1.8016678094863892} +07/25/2024 12:33:12 - INFO - __main__ - Step 14368: {'lr': 0.0004897330532724005, 'samples': 689664, 'steps': 14367, 'loss/train': 1.732203722000122} +07/25/2024 12:33:12 - INFO - __main__ - Step 14369: {'lr': 0.0004897315611451041, 'samples': 689712, 'steps': 14368, 'loss/train': 0.6641019582748413} +07/25/2024 12:33:13 - INFO - __main__ - Step 14370: {'lr': 0.0004897300689116613, 'samples': 689760, 'steps': 14369, 'loss/train': 1.8614466190338135} +07/25/2024 12:33:13 - INFO - __main__ - Step 14371: {'lr': 0.0004897285765720728, 'samples': 689808, 'steps': 14370, 'loss/train': 0.7625656723976135} +07/25/2024 12:33:13 - INFO - __main__ - Step 14372: {'lr': 0.0004897270841263392, 'samples': 689856, 'steps': 14371, 'loss/train': 1.948724389076233} +07/25/2024 12:33:13 - INFO - __main__ - Step 14373: {'lr': 0.0004897255915744611, 'samples': 689904, 'steps': 14372, 'loss/train': 0.7939683794975281} +07/25/2024 12:33:14 - INFO - __main__ - Step 14374: {'lr': 0.0004897240989164393, 'samples': 689952, 'steps': 14373, 'loss/train': 1.5774389505386353} +07/25/2024 12:33:14 - INFO - __main__ - Step 14375: {'lr': 0.0004897226061522744, 'samples': 690000, 'steps': 14374, 'loss/train': 2.0116817951202393} +07/25/2024 12:33:14 - INFO - __main__ - Step 14376: {'lr': 0.0004897211132819669, 'samples': 690048, 'steps': 14375, 'loss/train': 3.7770068645477295} +07/25/2024 12:33:15 - INFO - __main__ - Step 14377: {'lr': 0.0004897196203055178, 'samples': 690096, 'steps': 14376, 'loss/train': 2.2743804454803467} +07/25/2024 12:33:15 - INFO - __main__ - Step 14378: {'lr': 0.0004897181272229274, 'samples': 690144, 'steps': 14377, 'loss/train': 2.0696847438812256} +07/25/2024 12:33:15 - INFO - __main__ - Step 14379: {'lr': 0.0004897166340341966, 'samples': 690192, 'steps': 14378, 'loss/train': 1.6389163732528687} +07/25/2024 12:33:15 - INFO - __main__ - Step 14380: {'lr': 0.000489715140739326, 'samples': 690240, 'steps': 14379, 'loss/train': 0.9107683300971985} +07/25/2024 12:33:16 - INFO - __main__ - Step 14381: {'lr': 0.0004897136473383162, 'samples': 690288, 'steps': 14380, 'loss/train': 1.8488715887069702} +07/25/2024 12:33:16 - INFO - __main__ - Step 14382: {'lr': 0.000489712153831168, 'samples': 690336, 'steps': 14381, 'loss/train': 1.838127851486206} +07/25/2024 12:33:16 - INFO - __main__ - Step 14383: {'lr': 0.000489710660217882, 'samples': 690384, 'steps': 14382, 'loss/train': 1.7917606830596924} +07/25/2024 12:33:17 - INFO - __main__ - Step 14384: {'lr': 0.0004897091664984587, 'samples': 690432, 'steps': 14383, 'loss/train': 2.79972767829895} +07/25/2024 12:33:17 - INFO - __main__ - Step 14385: {'lr': 0.000489707672672899, 'samples': 690480, 'steps': 14384, 'loss/train': 2.2988479137420654} +07/25/2024 12:33:17 - INFO - __main__ - Step 14386: {'lr': 0.0004897061787412034, 'samples': 690528, 'steps': 14385, 'loss/train': 1.6836788654327393} +07/25/2024 12:33:17 - INFO - __main__ - Step 14387: {'lr': 0.0004897046847033727, 'samples': 690576, 'steps': 14386, 'loss/train': 2.1380980014801025} +07/25/2024 12:33:18 - INFO - __main__ - Step 14388: {'lr': 0.0004897031905594074, 'samples': 690624, 'steps': 14387, 'loss/train': 1.9729390144348145} +07/25/2024 12:33:18 - INFO - __main__ - Step 14389: {'lr': 0.0004897016963093083, 'samples': 690672, 'steps': 14388, 'loss/train': 2.4552102088928223} +07/25/2024 12:33:18 - INFO - __main__ - Step 14390: {'lr': 0.0004897002019530759, 'samples': 690720, 'steps': 14389, 'loss/train': 2.051863431930542} +07/25/2024 12:33:19 - INFO - __main__ - Step 14391: {'lr': 0.0004896987074907111, 'samples': 690768, 'steps': 14390, 'loss/train': 1.7850863933563232} +07/25/2024 12:33:19 - INFO - __main__ - Step 14392: {'lr': 0.0004896972129222143, 'samples': 690816, 'steps': 14391, 'loss/train': 1.5336259603500366} +07/25/2024 12:33:19 - INFO - __main__ - Step 14393: {'lr': 0.0004896957182475865, 'samples': 690864, 'steps': 14392, 'loss/train': 0.664706289768219} +07/25/2024 12:33:19 - INFO - __main__ - Step 14394: {'lr': 0.000489694223466828, 'samples': 690912, 'steps': 14393, 'loss/train': 0.8684497475624084} +07/25/2024 12:33:20 - INFO - __main__ - Step 14395: {'lr': 0.0004896927285799397, 'samples': 690960, 'steps': 14394, 'loss/train': 0.5836400985717773} +07/25/2024 12:33:20 - INFO - __main__ - Step 14396: {'lr': 0.000489691233586922, 'samples': 691008, 'steps': 14395, 'loss/train': 2.162696361541748} +07/25/2024 12:33:20 - INFO - __main__ - Step 14397: {'lr': 0.0004896897384877759, 'samples': 691056, 'steps': 14396, 'loss/train': 0.8643290996551514} +07/25/2024 12:33:20 - INFO - __main__ - Step 14398: {'lr': 0.0004896882432825018, 'samples': 691104, 'steps': 14397, 'loss/train': 1.7214680910110474} +07/25/2024 12:33:21 - INFO - __main__ - Step 14399: {'lr': 0.0004896867479711006, 'samples': 691152, 'steps': 14398, 'loss/train': 1.703936219215393} +07/25/2024 12:33:21 - INFO - __main__ - Step 14400: {'lr': 0.0004896852525535727, 'samples': 691200, 'steps': 14399, 'loss/train': 2.2923593521118164} +07/25/2024 12:33:21 - INFO - __main__ - Step 14401: {'lr': 0.0004896837570299189, 'samples': 691248, 'steps': 14400, 'loss/train': 1.9665580987930298} +07/25/2024 12:33:22 - INFO - __main__ - Step 14402: {'lr': 0.0004896822614001398, 'samples': 691296, 'steps': 14401, 'loss/train': 1.3946330547332764} +07/25/2024 12:33:22 - INFO - __main__ - Step 14403: {'lr': 0.0004896807656642363, 'samples': 691344, 'steps': 14402, 'loss/train': 0.860273540019989} +07/25/2024 12:33:22 - INFO - __main__ - Step 14404: {'lr': 0.0004896792698222087, 'samples': 691392, 'steps': 14403, 'loss/train': 1.1790882349014282} +07/25/2024 12:33:22 - INFO - __main__ - Step 14405: {'lr': 0.0004896777738740579, 'samples': 691440, 'steps': 14404, 'loss/train': 1.9629400968551636} +07/25/2024 12:33:23 - INFO - __main__ - Step 14406: {'lr': 0.0004896762778197844, 'samples': 691488, 'steps': 14405, 'loss/train': 1.5447258949279785} +07/25/2024 12:33:23 - INFO - __main__ - Step 14407: {'lr': 0.0004896747816593891, 'samples': 691536, 'steps': 14406, 'loss/train': 1.8767311573028564} +07/25/2024 12:33:23 - INFO - __main__ - Step 14408: {'lr': 0.0004896732853928725, 'samples': 691584, 'steps': 14407, 'loss/train': 2.077083110809326} +07/25/2024 12:33:24 - INFO - __main__ - Step 14409: {'lr': 0.0004896717890202352, 'samples': 691632, 'steps': 14408, 'loss/train': 2.230414867401123} +07/25/2024 12:33:24 - INFO - __main__ - Step 14410: {'lr': 0.000489670292541478, 'samples': 691680, 'steps': 14409, 'loss/train': 1.8914138078689575} +07/25/2024 12:33:24 - INFO - __main__ - Step 14411: {'lr': 0.0004896687959566015, 'samples': 691728, 'steps': 14410, 'loss/train': 2.1525352001190186} +07/25/2024 12:33:24 - INFO - __main__ - Step 14412: {'lr': 0.0004896672992656064, 'samples': 691776, 'steps': 14411, 'loss/train': 1.972996711730957} +07/25/2024 12:33:25 - INFO - __main__ - Step 14413: {'lr': 0.0004896658024684933, 'samples': 691824, 'steps': 14412, 'loss/train': 2.0985639095306396} +07/25/2024 12:33:25 - INFO - __main__ - Step 14414: {'lr': 0.0004896643055652629, 'samples': 691872, 'steps': 14413, 'loss/train': 1.5325860977172852} +07/25/2024 12:33:25 - INFO - __main__ - Step 14415: {'lr': 0.0004896628085559159, 'samples': 691920, 'steps': 14414, 'loss/train': 1.6572436094284058} +07/25/2024 12:33:26 - INFO - __main__ - Step 14416: {'lr': 0.0004896613114404529, 'samples': 691968, 'steps': 14415, 'loss/train': 2.00665283203125} +07/25/2024 12:33:26 - INFO - __main__ - Step 14417: {'lr': 0.0004896598142188745, 'samples': 692016, 'steps': 14416, 'loss/train': 0.6074827909469604} +07/25/2024 12:33:26 - INFO - __main__ - Step 14418: {'lr': 0.0004896583168911816, 'samples': 692064, 'steps': 14417, 'loss/train': 1.8288099765777588} +07/25/2024 12:33:26 - INFO - __main__ - Step 14419: {'lr': 0.0004896568194573747, 'samples': 692112, 'steps': 14418, 'loss/train': 0.6884140968322754} +07/25/2024 12:33:27 - INFO - __main__ - Step 14420: {'lr': 0.0004896553219174544, 'samples': 692160, 'steps': 14419, 'loss/train': 1.3538469076156616} +07/25/2024 12:33:27 - INFO - __main__ - Step 14421: {'lr': 0.0004896538242714215, 'samples': 692208, 'steps': 14420, 'loss/train': 0.4842419922351837} +07/25/2024 12:33:27 - INFO - __main__ - Step 14422: {'lr': 0.0004896523265192765, 'samples': 692256, 'steps': 14421, 'loss/train': 1.865239143371582} +07/25/2024 12:33:28 - INFO - __main__ - Step 14423: {'lr': 0.0004896508286610201, 'samples': 692304, 'steps': 14422, 'loss/train': 1.6948599815368652} +07/25/2024 12:33:28 - INFO - __main__ - Step 14424: {'lr': 0.0004896493306966533, 'samples': 692352, 'steps': 14423, 'loss/train': 1.6581226587295532} +07/25/2024 12:33:28 - INFO - __main__ - Step 14425: {'lr': 0.0004896478326261763, 'samples': 692400, 'steps': 14424, 'loss/train': 2.0511252880096436} +07/25/2024 12:33:28 - INFO - __main__ - Step 14426: {'lr': 0.00048964633444959, 'samples': 692448, 'steps': 14425, 'loss/train': 1.386459231376648} +07/25/2024 12:33:29 - INFO - __main__ - Step 14427: {'lr': 0.0004896448361668951, 'samples': 692496, 'steps': 14426, 'loss/train': 0.9813017249107361} +07/25/2024 12:33:29 - INFO - __main__ - Step 14428: {'lr': 0.000489643337778092, 'samples': 692544, 'steps': 14427, 'loss/train': 1.8789080381393433} +07/25/2024 12:33:29 - INFO - __main__ - Step 14429: {'lr': 0.0004896418392831817, 'samples': 692592, 'steps': 14428, 'loss/train': 2.175083875656128} +07/25/2024 12:33:30 - INFO - __main__ - Step 14430: {'lr': 0.0004896403406821646, 'samples': 692640, 'steps': 14429, 'loss/train': 1.83785879611969} +07/25/2024 12:33:30 - INFO - __main__ - Step 14431: {'lr': 0.0004896388419750416, 'samples': 692688, 'steps': 14430, 'loss/train': 1.8385578393936157} +07/25/2024 12:33:30 - INFO - __main__ - Step 14432: {'lr': 0.0004896373431618131, 'samples': 692736, 'steps': 14431, 'loss/train': 1.828347086906433} +07/25/2024 12:33:30 - INFO - __main__ - Step 14433: {'lr': 0.0004896358442424799, 'samples': 692784, 'steps': 14432, 'loss/train': 2.164644718170166} +07/25/2024 12:33:31 - INFO - __main__ - Step 14434: {'lr': 0.0004896343452170427, 'samples': 692832, 'steps': 14433, 'loss/train': 1.7823878526687622} +07/25/2024 12:33:31 - INFO - __main__ - Step 14435: {'lr': 0.0004896328460855023, 'samples': 692880, 'steps': 14434, 'loss/train': 1.4818999767303467} +07/25/2024 12:33:31 - INFO - __main__ - Step 14436: {'lr': 0.000489631346847859, 'samples': 692928, 'steps': 14435, 'loss/train': 1.6913607120513916} +07/25/2024 12:33:32 - INFO - __main__ - Step 14437: {'lr': 0.0004896298475041137, 'samples': 692976, 'steps': 14436, 'loss/train': 2.0131969451904297} +07/25/2024 12:33:32 - INFO - __main__ - Step 14438: {'lr': 0.0004896283480542669, 'samples': 693024, 'steps': 14437, 'loss/train': 1.8439792394638062} +07/25/2024 12:33:32 - INFO - __main__ - Step 14439: {'lr': 0.0004896268484983195, 'samples': 693072, 'steps': 14438, 'loss/train': 0.965295672416687} +07/25/2024 12:33:32 - INFO - __main__ - Step 14440: {'lr': 0.0004896253488362721, 'samples': 693120, 'steps': 14439, 'loss/train': 1.9771108627319336} +07/25/2024 12:33:33 - INFO - __main__ - Step 14441: {'lr': 0.0004896238490681253, 'samples': 693168, 'steps': 14440, 'loss/train': 0.6794227361679077} +07/25/2024 12:33:33 - INFO - __main__ - Step 14442: {'lr': 0.0004896223491938796, 'samples': 693216, 'steps': 14441, 'loss/train': 2.032060384750366} +07/25/2024 12:33:33 - INFO - __main__ - Step 14443: {'lr': 0.000489620849213536, 'samples': 693264, 'steps': 14442, 'loss/train': 0.7285451889038086} +07/25/2024 12:33:34 - INFO - __main__ - Step 14444: {'lr': 0.0004896193491270949, 'samples': 693312, 'steps': 14443, 'loss/train': 1.5126789808273315} +07/25/2024 12:33:34 - INFO - __main__ - Step 14445: {'lr': 0.0004896178489345572, 'samples': 693360, 'steps': 14444, 'loss/train': 0.3604236841201782} +07/25/2024 12:33:34 - INFO - __main__ - Step 14446: {'lr': 0.0004896163486359233, 'samples': 693408, 'steps': 14445, 'loss/train': 5.117636680603027} +07/25/2024 12:33:34 - INFO - __main__ - Step 14447: {'lr': 0.0004896148482311941, 'samples': 693456, 'steps': 14446, 'loss/train': 1.4652187824249268} +07/25/2024 12:33:35 - INFO - __main__ - Step 14448: {'lr': 0.0004896133477203701, 'samples': 693504, 'steps': 14447, 'loss/train': 2.16109561920166} +07/25/2024 12:33:35 - INFO - __main__ - Step 14449: {'lr': 0.0004896118471034521, 'samples': 693552, 'steps': 14448, 'loss/train': 1.6209847927093506} +07/25/2024 12:33:35 - INFO - __main__ - Step 14450: {'lr': 0.0004896103463804405, 'samples': 693600, 'steps': 14449, 'loss/train': 1.6052420139312744} +07/25/2024 12:33:36 - INFO - __main__ - Step 14451: {'lr': 0.0004896088455513363, 'samples': 693648, 'steps': 14450, 'loss/train': 2.018820285797119} +07/25/2024 12:33:36 - INFO - __main__ - Step 14452: {'lr': 0.0004896073446161401, 'samples': 693696, 'steps': 14451, 'loss/train': 1.3933449983596802} +07/25/2024 12:33:36 - INFO - __main__ - Step 14453: {'lr': 0.0004896058435748523, 'samples': 693744, 'steps': 14452, 'loss/train': 2.0691821575164795} +07/25/2024 12:33:36 - INFO - __main__ - Step 14454: {'lr': 0.0004896043424274739, 'samples': 693792, 'steps': 14453, 'loss/train': 1.6701096296310425} +07/25/2024 12:33:37 - INFO - __main__ - Step 14455: {'lr': 0.0004896028411740052, 'samples': 693840, 'steps': 14454, 'loss/train': 1.9947426319122314} +07/25/2024 12:33:37 - INFO - __main__ - Step 14456: {'lr': 0.0004896013398144473, 'samples': 693888, 'steps': 14455, 'loss/train': 1.841048002243042} +07/25/2024 12:33:37 - INFO - __main__ - Step 14457: {'lr': 0.0004895998383488005, 'samples': 693936, 'steps': 14456, 'loss/train': 2.2667524814605713} +07/25/2024 12:33:37 - INFO - __main__ - Step 14458: {'lr': 0.0004895983367770657, 'samples': 693984, 'steps': 14457, 'loss/train': 1.7487213611602783} +07/25/2024 12:33:38 - INFO - __main__ - Step 14459: {'lr': 0.0004895968350992434, 'samples': 694032, 'steps': 14458, 'loss/train': 1.7791390419006348} +07/25/2024 12:33:38 - INFO - __main__ - Step 14460: {'lr': 0.0004895953333153343, 'samples': 694080, 'steps': 14459, 'loss/train': 2.018296480178833} +07/25/2024 12:33:38 - INFO - __main__ - Step 14461: {'lr': 0.0004895938314253392, 'samples': 694128, 'steps': 14460, 'loss/train': 1.3279955387115479} +07/25/2024 12:33:39 - INFO - __main__ - Step 14462: {'lr': 0.0004895923294292587, 'samples': 694176, 'steps': 14461, 'loss/train': 1.8334766626358032} +07/25/2024 12:33:39 - INFO - __main__ - Step 14463: {'lr': 0.0004895908273270933, 'samples': 694224, 'steps': 14462, 'loss/train': 1.5920323133468628} +07/25/2024 12:33:39 - INFO - __main__ - Step 14464: {'lr': 0.0004895893251188438, 'samples': 694272, 'steps': 14463, 'loss/train': 2.1116526126861572} +07/25/2024 12:33:39 - INFO - __main__ - Step 14465: {'lr': 0.0004895878228045109, 'samples': 694320, 'steps': 14464, 'loss/train': 0.6737218499183655} +07/25/2024 12:33:40 - INFO - __main__ - Step 14466: {'lr': 0.0004895863203840952, 'samples': 694368, 'steps': 14465, 'loss/train': 2.377110719680786} +07/25/2024 12:33:40 - INFO - __main__ - Step 14467: {'lr': 0.0004895848178575974, 'samples': 694416, 'steps': 14466, 'loss/train': 0.5549656748771667} +07/25/2024 12:33:40 - INFO - __main__ - Step 14468: {'lr': 0.0004895833152250182, 'samples': 694464, 'steps': 14467, 'loss/train': 1.495099425315857} +07/25/2024 12:33:41 - INFO - __main__ - Step 14469: {'lr': 0.0004895818124863582, 'samples': 694512, 'steps': 14468, 'loss/train': 0.4048757553100586} +07/25/2024 12:33:41 - INFO - __main__ - Step 14470: {'lr': 0.0004895803096416181, 'samples': 694560, 'steps': 14469, 'loss/train': 5.4768757820129395} +07/25/2024 12:33:41 - INFO - __main__ - Step 14471: {'lr': 0.0004895788066907985, 'samples': 694608, 'steps': 14470, 'loss/train': 1.8097718954086304} +07/25/2024 12:33:41 - INFO - __main__ - Step 14472: {'lr': 0.0004895773036339002, 'samples': 694656, 'steps': 14471, 'loss/train': 1.6881343126296997} +07/25/2024 12:33:42 - INFO - __main__ - Step 14473: {'lr': 0.0004895758004709237, 'samples': 694704, 'steps': 14472, 'loss/train': 1.497053623199463} +07/25/2024 12:33:42 - INFO - __main__ - Step 14474: {'lr': 0.0004895742972018699, 'samples': 694752, 'steps': 14473, 'loss/train': 1.9454947710037231} +07/25/2024 12:33:42 - INFO - __main__ - Step 14475: {'lr': 0.0004895727938267391, 'samples': 694800, 'steps': 14474, 'loss/train': 1.902250051498413} +07/25/2024 12:33:43 - INFO - __main__ - Step 14476: {'lr': 0.0004895712903455323, 'samples': 694848, 'steps': 14475, 'loss/train': 1.2520250082015991} +07/25/2024 12:33:43 - INFO - __main__ - Step 14477: {'lr': 0.00048956978675825, 'samples': 694896, 'steps': 14476, 'loss/train': 1.9839935302734375} +07/25/2024 12:33:43 - INFO - __main__ - Step 14478: {'lr': 0.0004895682830648929, 'samples': 694944, 'steps': 14477, 'loss/train': 1.5704115629196167} +07/25/2024 12:33:43 - INFO - __main__ - Step 14479: {'lr': 0.0004895667792654618, 'samples': 694992, 'steps': 14478, 'loss/train': 1.958508849143982} +07/25/2024 12:33:44 - INFO - __main__ - Step 14480: {'lr': 0.0004895652753599571, 'samples': 695040, 'steps': 14479, 'loss/train': 1.7411936521530151} +07/25/2024 12:33:44 - INFO - __main__ - Step 14481: {'lr': 0.0004895637713483797, 'samples': 695088, 'steps': 14480, 'loss/train': 2.0553243160247803} +07/25/2024 12:33:44 - INFO - __main__ - Step 14482: {'lr': 0.0004895622672307302, 'samples': 695136, 'steps': 14481, 'loss/train': 1.6687955856323242} +07/25/2024 12:33:45 - INFO - __main__ - Step 14483: {'lr': 0.0004895607630070091, 'samples': 695184, 'steps': 14482, 'loss/train': 1.7220178842544556} +07/25/2024 12:33:45 - INFO - __main__ - Step 14484: {'lr': 0.0004895592586772173, 'samples': 695232, 'steps': 14483, 'loss/train': 1.6698380708694458} +07/25/2024 12:33:45 - INFO - __main__ - Step 14485: {'lr': 0.0004895577542413554, 'samples': 695280, 'steps': 14484, 'loss/train': 1.8193559646606445} +07/25/2024 12:33:45 - INFO - __main__ - Step 14486: {'lr': 0.0004895562496994239, 'samples': 695328, 'steps': 14485, 'loss/train': 1.8504903316497803} +07/25/2024 12:33:46 - INFO - __main__ - Step 14487: {'lr': 0.0004895547450514237, 'samples': 695376, 'steps': 14486, 'loss/train': 1.726981520652771} +07/25/2024 12:33:46 - INFO - __main__ - Step 14488: {'lr': 0.0004895532402973555, 'samples': 695424, 'steps': 14487, 'loss/train': 1.0227599143981934} +07/25/2024 12:33:46 - INFO - __main__ - Step 14489: {'lr': 0.0004895517354372197, 'samples': 695472, 'steps': 14488, 'loss/train': 0.6272615194320679} +07/25/2024 12:33:47 - INFO - __main__ - Step 14490: {'lr': 0.000489550230471017, 'samples': 695520, 'steps': 14489, 'loss/train': 1.7961722612380981} +07/25/2024 12:33:47 - INFO - __main__ - Step 14491: {'lr': 0.0004895487253987484, 'samples': 695568, 'steps': 14490, 'loss/train': 1.0571638345718384} +07/25/2024 12:33:47 - INFO - __main__ - Step 14492: {'lr': 0.0004895472202204142, 'samples': 695616, 'steps': 14491, 'loss/train': 1.659633755683899} +07/25/2024 12:33:47 - INFO - __main__ - Step 14493: {'lr': 0.0004895457149360152, 'samples': 695664, 'steps': 14492, 'loss/train': 0.35192179679870605} +07/25/2024 12:33:48 - INFO - __main__ - Step 14494: {'lr': 0.0004895442095455521, 'samples': 695712, 'steps': 14493, 'loss/train': 6.617551326751709} +07/25/2024 12:33:48 - INFO - __main__ - Step 14495: {'lr': 0.0004895427040490255, 'samples': 695760, 'steps': 14494, 'loss/train': 2.4399726390838623} +07/25/2024 12:33:48 - INFO - __main__ - Step 14496: {'lr': 0.0004895411984464362, 'samples': 695808, 'steps': 14495, 'loss/train': 1.1350429058074951} +07/25/2024 12:33:49 - INFO - __main__ - Step 14497: {'lr': 0.0004895396927377847, 'samples': 695856, 'steps': 14496, 'loss/train': 1.9129225015640259} +07/25/2024 12:33:49 - INFO - __main__ - Step 14498: {'lr': 0.0004895381869230716, 'samples': 695904, 'steps': 14497, 'loss/train': 1.1805660724639893} +07/25/2024 12:33:49 - INFO - __main__ - Step 14499: {'lr': 0.0004895366810022979, 'samples': 695952, 'steps': 14498, 'loss/train': 1.8974443674087524} +07/25/2024 12:33:49 - INFO - __main__ - Step 14500: {'lr': 0.000489535174975464, 'samples': 696000, 'steps': 14499, 'loss/train': 2.070359468460083} +07/25/2024 12:33:50 - INFO - __main__ - Step 14501: {'lr': 0.0004895336688425706, 'samples': 696048, 'steps': 14500, 'loss/train': 1.9740924835205078} +07/25/2024 12:33:50 - INFO - __main__ - Step 14502: {'lr': 0.0004895321626036185, 'samples': 696096, 'steps': 14501, 'loss/train': 1.562525749206543} +07/25/2024 12:33:50 - INFO - __main__ - Step 14503: {'lr': 0.0004895306562586082, 'samples': 696144, 'steps': 14502, 'loss/train': 1.632839560508728} +07/25/2024 12:33:51 - INFO - __main__ - Step 14504: {'lr': 0.0004895291498075404, 'samples': 696192, 'steps': 14503, 'loss/train': 1.63553786277771} +07/25/2024 12:33:51 - INFO - __main__ - Step 14505: {'lr': 0.0004895276432504158, 'samples': 696240, 'steps': 14504, 'loss/train': 1.7550581693649292} +07/25/2024 12:33:51 - INFO - __main__ - Step 14506: {'lr': 0.0004895261365872352, 'samples': 696288, 'steps': 14505, 'loss/train': 1.6681504249572754} +07/25/2024 12:33:51 - INFO - __main__ - Step 14507: {'lr': 0.0004895246298179992, 'samples': 696336, 'steps': 14506, 'loss/train': 1.8409055471420288} +07/25/2024 12:33:52 - INFO - __main__ - Step 14508: {'lr': 0.0004895231229427082, 'samples': 696384, 'steps': 14507, 'loss/train': 2.1697375774383545} +07/25/2024 12:33:52 - INFO - __main__ - Step 14509: {'lr': 0.0004895216159613631, 'samples': 696432, 'steps': 14508, 'loss/train': 1.6735225915908813} +07/25/2024 12:33:52 - INFO - __main__ - Step 14510: {'lr': 0.0004895201088739645, 'samples': 696480, 'steps': 14509, 'loss/train': 1.5852774381637573} +07/25/2024 12:33:53 - INFO - __main__ - Step 14511: {'lr': 0.0004895186016805133, 'samples': 696528, 'steps': 14510, 'loss/train': 1.8377275466918945} +07/25/2024 12:33:53 - INFO - __main__ - Step 14512: {'lr': 0.0004895170943810099, 'samples': 696576, 'steps': 14511, 'loss/train': 0.26129594445228577} +07/25/2024 12:33:53 - INFO - __main__ - Step 14513: {'lr': 0.0004895155869754549, 'samples': 696624, 'steps': 14512, 'loss/train': 0.5507838129997253} +07/25/2024 12:33:53 - INFO - __main__ - Step 14514: {'lr': 0.0004895140794638493, 'samples': 696672, 'steps': 14513, 'loss/train': 1.6372931003570557} +07/25/2024 12:33:54 - INFO - __main__ - Step 14515: {'lr': 0.0004895125718461934, 'samples': 696720, 'steps': 14514, 'loss/train': 1.1870585680007935} +07/25/2024 12:33:54 - INFO - __main__ - Step 14516: {'lr': 0.0004895110641224882, 'samples': 696768, 'steps': 14515, 'loss/train': 1.6201415061950684} +07/25/2024 12:33:54 - INFO - __main__ - Step 14517: {'lr': 0.0004895095562927342, 'samples': 696816, 'steps': 14516, 'loss/train': 0.3788124620914459} +07/25/2024 12:33:55 - INFO - __main__ - Step 14518: {'lr': 0.000489508048356932, 'samples': 696864, 'steps': 14517, 'loss/train': 5.959254741668701} +07/25/2024 12:33:55 - INFO - __main__ - Step 14519: {'lr': 0.0004895065403150824, 'samples': 696912, 'steps': 14518, 'loss/train': 1.4324711561203003} +07/25/2024 12:33:55 - INFO - __main__ - Step 14520: {'lr': 0.000489505032167186, 'samples': 696960, 'steps': 14519, 'loss/train': 1.65883207321167} +07/25/2024 12:33:55 - INFO - __main__ - Step 14521: {'lr': 0.0004895035239132435, 'samples': 697008, 'steps': 14520, 'loss/train': 1.8618496656417847} +07/25/2024 12:33:56 - DEBUG - datasets.packaged_modules.json.json - Batch of 10493677 bytes couldn't be parsed with block_size=655360. Retrying with block_size=1310720. +07/25/2024 12:33:56 - INFO - __main__ - Step 14522: {'lr': 0.0004895020155532555, 'samples': 697056, 'steps': 14521, 'loss/train': 1.999266266822815} +07/25/2024 12:33:56 - INFO - __main__ - Step 14523: {'lr': 0.0004895005070872227, 'samples': 697104, 'steps': 14522, 'loss/train': 1.6251137256622314} +07/25/2024 12:33:56 - INFO - __main__ - Step 14524: {'lr': 0.0004894989985151459, 'samples': 697152, 'steps': 14523, 'loss/train': 2.309180974960327} +07/25/2024 12:33:56 - INFO - __main__ - Step 14525: {'lr': 0.0004894974898370255, 'samples': 697200, 'steps': 14524, 'loss/train': 2.122180461883545} +07/25/2024 12:33:57 - INFO - __main__ - Step 14526: {'lr': 0.0004894959810528624, 'samples': 697248, 'steps': 14525, 'loss/train': 1.2132585048675537} +07/25/2024 12:33:57 - INFO - __main__ - Step 14527: {'lr': 0.0004894944721626572, 'samples': 697296, 'steps': 14526, 'loss/train': 1.3721634149551392} +07/25/2024 12:33:57 - INFO - __main__ - Step 14528: {'lr': 0.0004894929631664106, 'samples': 697344, 'steps': 14527, 'loss/train': 1.6029129028320312} +07/25/2024 12:33:58 - INFO - __main__ - Step 14529: {'lr': 0.0004894914540641232, 'samples': 697392, 'steps': 14528, 'loss/train': 2.031360626220703} +07/25/2024 12:33:58 - INFO - __main__ - Step 14530: {'lr': 0.0004894899448557957, 'samples': 697440, 'steps': 14529, 'loss/train': 1.981006145477295} +07/25/2024 12:33:58 - INFO - __main__ - Step 14531: {'lr': 0.0004894884355414286, 'samples': 697488, 'steps': 14530, 'loss/train': 2.237910270690918} +07/25/2024 12:33:58 - INFO - __main__ - Step 14532: {'lr': 0.0004894869261210229, 'samples': 697536, 'steps': 14531, 'loss/train': 2.126164436340332} +07/25/2024 12:33:59 - INFO - __main__ - Step 14533: {'lr': 0.0004894854165945792, 'samples': 697584, 'steps': 14532, 'loss/train': 2.0854907035827637} +07/25/2024 12:33:59 - INFO - __main__ - Step 14534: {'lr': 0.0004894839069620978, 'samples': 697632, 'steps': 14533, 'loss/train': 1.6156214475631714} +07/25/2024 12:33:59 - INFO - __main__ - Step 14535: {'lr': 0.0004894823972235798, 'samples': 697680, 'steps': 14534, 'loss/train': 1.317335844039917} +07/25/2024 12:34:00 - INFO - __main__ - Step 14536: {'lr': 0.0004894808873790257, 'samples': 697728, 'steps': 14535, 'loss/train': 1.3350499868392944} +07/25/2024 12:34:00 - INFO - __main__ - Step 14537: {'lr': 0.0004894793774284361, 'samples': 697776, 'steps': 14536, 'loss/train': 1.1598409414291382} +07/25/2024 12:34:00 - INFO - __main__ - Step 14538: {'lr': 0.0004894778673718118, 'samples': 697824, 'steps': 14537, 'loss/train': 1.4153339862823486} +07/25/2024 12:34:00 - INFO - __main__ - Step 14539: {'lr': 0.0004894763572091534, 'samples': 697872, 'steps': 14538, 'loss/train': 1.2614399194717407} +07/25/2024 12:34:01 - INFO - __main__ - Step 14540: {'lr': 0.0004894748469404616, 'samples': 697920, 'steps': 14539, 'loss/train': 1.6851617097854614} +07/25/2024 12:34:01 - INFO - __main__ - Step 14541: {'lr': 0.000489473336565737, 'samples': 697968, 'steps': 14540, 'loss/train': 0.9000803232192993} +07/25/2024 12:34:01 - INFO - __main__ - Step 14542: {'lr': 0.0004894718260849804, 'samples': 698016, 'steps': 14541, 'loss/train': 7.0097880363464355} +07/25/2024 12:34:02 - INFO - __main__ - Step 14543: {'lr': 0.0004894703154981923, 'samples': 698064, 'steps': 14542, 'loss/train': 0.28413906693458557} +07/25/2024 12:34:02 - INFO - __main__ - Step 14544: {'lr': 0.0004894688048053735, 'samples': 698112, 'steps': 14543, 'loss/train': 2.042051076889038} +07/25/2024 12:34:02 - INFO - __main__ - Step 14545: {'lr': 0.0004894672940065246, 'samples': 698160, 'steps': 14544, 'loss/train': 2.201470136642456} +07/25/2024 12:34:02 - INFO - __main__ - Step 14546: {'lr': 0.0004894657831016463, 'samples': 698208, 'steps': 14545, 'loss/train': 2.6468801498413086} +07/25/2024 12:34:03 - INFO - __main__ - Step 14547: {'lr': 0.0004894642720907393, 'samples': 698256, 'steps': 14546, 'loss/train': 1.739590048789978} +07/25/2024 12:34:03 - INFO - __main__ - Step 14548: {'lr': 0.0004894627609738041, 'samples': 698304, 'steps': 14547, 'loss/train': 2.5491883754730225} +07/25/2024 12:34:03 - INFO - __main__ - Step 14549: {'lr': 0.0004894612497508416, 'samples': 698352, 'steps': 14548, 'loss/train': 1.4903521537780762} +07/25/2024 12:34:04 - INFO - __main__ - Step 14550: {'lr': 0.0004894597384218523, 'samples': 698400, 'steps': 14549, 'loss/train': 2.0868940353393555} +07/25/2024 12:34:04 - INFO - __main__ - Step 14551: {'lr': 0.0004894582269868371, 'samples': 698448, 'steps': 14550, 'loss/train': 1.7618657350540161} +07/25/2024 12:34:04 - INFO - __main__ - Step 14552: {'lr': 0.0004894567154457964, 'samples': 698496, 'steps': 14551, 'loss/train': 1.3284482955932617} +07/25/2024 12:34:04 - INFO - __main__ - Step 14553: {'lr': 0.000489455203798731, 'samples': 698544, 'steps': 14552, 'loss/train': 2.025306224822998} +07/25/2024 12:34:05 - INFO - __main__ - Step 14554: {'lr': 0.0004894536920456415, 'samples': 698592, 'steps': 14553, 'loss/train': 1.911612868309021} +07/25/2024 12:34:05 - INFO - __main__ - Step 14555: {'lr': 0.0004894521801865286, 'samples': 698640, 'steps': 14554, 'loss/train': 2.1163878440856934} +07/25/2024 12:34:05 - INFO - __main__ - Step 14556: {'lr': 0.000489450668221393, 'samples': 698688, 'steps': 14555, 'loss/train': 2.1124484539031982} +07/25/2024 12:34:06 - INFO - __main__ - Step 14557: {'lr': 0.0004894491561502354, 'samples': 698736, 'steps': 14556, 'loss/train': 2.7998111248016357} +07/25/2024 12:34:06 - INFO - __main__ - Step 14558: {'lr': 0.0004894476439730564, 'samples': 698784, 'steps': 14557, 'loss/train': 1.929358959197998} +07/25/2024 12:34:06 - INFO - __main__ - Step 14559: {'lr': 0.0004894461316898568, 'samples': 698832, 'steps': 14558, 'loss/train': 0.6577134132385254} +07/25/2024 12:34:06 - INFO - __main__ - Step 14560: {'lr': 0.000489444619300637, 'samples': 698880, 'steps': 14559, 'loss/train': 1.7763700485229492} +07/25/2024 12:34:07 - INFO - __main__ - Step 14561: {'lr': 0.0004894431068053979, 'samples': 698928, 'steps': 14560, 'loss/train': 1.9643772840499878} +07/25/2024 12:34:07 - INFO - __main__ - Step 14562: {'lr': 0.0004894415942041401, 'samples': 698976, 'steps': 14561, 'loss/train': 1.7068278789520264} +07/25/2024 12:34:07 - INFO - __main__ - Step 14563: {'lr': 0.0004894400814968641, 'samples': 699024, 'steps': 14562, 'loss/train': 1.8318467140197754} +07/25/2024 12:34:08 - INFO - __main__ - Step 14564: {'lr': 0.000489438568683571, 'samples': 699072, 'steps': 14563, 'loss/train': 1.7371991872787476} +07/25/2024 12:34:08 - INFO - __main__ - Step 14565: {'lr': 0.0004894370557642612, 'samples': 699120, 'steps': 14564, 'loss/train': 0.37899577617645264} +07/25/2024 12:34:08 - INFO - __main__ - Step 14566: {'lr': 0.0004894355427389352, 'samples': 699168, 'steps': 14565, 'loss/train': 4.9930267333984375} +07/25/2024 12:34:08 - INFO - __main__ - Step 14567: {'lr': 0.0004894340296075939, 'samples': 699216, 'steps': 14566, 'loss/train': 0.1108439639210701} +07/25/2024 12:34:09 - INFO - __main__ - Step 14568: {'lr': 0.000489432516370238, 'samples': 699264, 'steps': 14567, 'loss/train': 2.141723394393921} +07/25/2024 12:34:09 - INFO - __main__ - Step 14569: {'lr': 0.0004894310030268681, 'samples': 699312, 'steps': 14568, 'loss/train': 2.02994441986084} +07/25/2024 12:34:09 - INFO - __main__ - Step 14570: {'lr': 0.0004894294895774848, 'samples': 699360, 'steps': 14569, 'loss/train': 2.7782440185546875} +07/25/2024 12:34:10 - INFO - __main__ - Step 14571: {'lr': 0.0004894279760220887, 'samples': 699408, 'steps': 14570, 'loss/train': 1.8068922758102417} +07/25/2024 12:34:10 - INFO - __main__ - Step 14572: {'lr': 0.0004894264623606808, 'samples': 699456, 'steps': 14571, 'loss/train': 2.569261074066162} +07/25/2024 12:34:10 - INFO - __main__ - Step 14573: {'lr': 0.0004894249485932616, 'samples': 699504, 'steps': 14572, 'loss/train': 2.6596808433532715} +07/25/2024 12:34:10 - INFO - __main__ - Step 14574: {'lr': 0.0004894234347198315, 'samples': 699552, 'steps': 14573, 'loss/train': 1.8694093227386475} +07/25/2024 12:34:11 - INFO - __main__ - Step 14575: {'lr': 0.0004894219207403917, 'samples': 699600, 'steps': 14574, 'loss/train': 1.9581305980682373} +07/25/2024 12:34:11 - INFO - __main__ - Step 14576: {'lr': 0.0004894204066549424, 'samples': 699648, 'steps': 14575, 'loss/train': 2.084155797958374} +07/25/2024 12:34:11 - INFO - __main__ - Step 14577: {'lr': 0.0004894188924634844, 'samples': 699696, 'steps': 14576, 'loss/train': 2.219775915145874} +07/25/2024 12:34:12 - INFO - __main__ - Step 14578: {'lr': 0.0004894173781660186, 'samples': 699744, 'steps': 14577, 'loss/train': 1.7003817558288574} +07/25/2024 12:34:12 - INFO - __main__ - Step 14579: {'lr': 0.0004894158637625454, 'samples': 699792, 'steps': 14578, 'loss/train': 1.8026763200759888} +07/25/2024 12:34:12 - INFO - __main__ - Step 14580: {'lr': 0.0004894143492530656, 'samples': 699840, 'steps': 14579, 'loss/train': 2.293149948120117} +07/25/2024 12:34:12 - INFO - __main__ - Step 14581: {'lr': 0.0004894128346375798, 'samples': 699888, 'steps': 14580, 'loss/train': 2.360393762588501} +07/25/2024 12:34:13 - INFO - __main__ - Step 14582: {'lr': 0.0004894113199160888, 'samples': 699936, 'steps': 14581, 'loss/train': 1.6830189228057861} +07/25/2024 12:34:13 - INFO - __main__ - Step 14583: {'lr': 0.000489409805088593, 'samples': 699984, 'steps': 14582, 'loss/train': 1.880470871925354} +07/25/2024 12:34:13 - INFO - __main__ - Step 14584: {'lr': 0.0004894082901550934, 'samples': 700032, 'steps': 14583, 'loss/train': 2.0255463123321533} +07/25/2024 12:34:14 - INFO - __main__ - Step 14585: {'lr': 0.0004894067751155905, 'samples': 700080, 'steps': 14584, 'loss/train': 2.017850875854492} +07/25/2024 12:34:14 - INFO - __main__ - Step 14586: {'lr': 0.0004894052599700849, 'samples': 700128, 'steps': 14585, 'loss/train': 2.121256113052368} +07/25/2024 12:34:14 - INFO - __main__ - Step 14587: {'lr': 0.0004894037447185774, 'samples': 700176, 'steps': 14586, 'loss/train': 2.3240272998809814} +07/25/2024 12:34:14 - INFO - __main__ - Step 14588: {'lr': 0.0004894022293610687, 'samples': 700224, 'steps': 14587, 'loss/train': 2.0436289310455322} +07/25/2024 12:34:15 - INFO - __main__ - Step 14589: {'lr': 0.0004894007138975593, 'samples': 700272, 'steps': 14588, 'loss/train': 1.6765624284744263} +07/25/2024 12:34:15 - INFO - __main__ - Step 14590: {'lr': 0.00048939919832805, 'samples': 700320, 'steps': 14589, 'loss/train': 2.347421169281006} +07/25/2024 12:34:15 - INFO - __main__ - Step 14591: {'lr': 0.0004893976826525415, 'samples': 700368, 'steps': 14590, 'loss/train': 0.14500747621059418} +07/25/2024 12:34:15 - INFO - __main__ - Step 14592: {'lr': 0.0004893961668710344, 'samples': 700416, 'steps': 14591, 'loss/train': 1.7950439453125} +07/25/2024 12:34:16 - INFO - __main__ - Step 14593: {'lr': 0.0004893946509835293, 'samples': 700464, 'steps': 14592, 'loss/train': 2.164720058441162} +07/25/2024 12:34:16 - INFO - __main__ - Step 14594: {'lr': 0.000489393134990027, 'samples': 700512, 'steps': 14593, 'loss/train': 2.017467975616455} +07/25/2024 12:34:16 - INFO - __main__ - Step 14595: {'lr': 0.0004893916188905282, 'samples': 700560, 'steps': 14594, 'loss/train': 1.6710996627807617} +07/25/2024 12:34:17 - INFO - __main__ - Step 14596: {'lr': 0.0004893901026850334, 'samples': 700608, 'steps': 14595, 'loss/train': 2.0465517044067383} +07/25/2024 12:34:17 - INFO - __main__ - Step 14597: {'lr': 0.0004893885863735434, 'samples': 700656, 'steps': 14596, 'loss/train': 2.6669833660125732} +07/25/2024 12:34:17 - INFO - __main__ - Step 14598: {'lr': 0.0004893870699560588, 'samples': 700704, 'steps': 14597, 'loss/train': 1.9812517166137695} +07/25/2024 12:34:17 - INFO - __main__ - Step 14599: {'lr': 0.0004893855534325804, 'samples': 700752, 'steps': 14598, 'loss/train': 2.0544614791870117} +07/25/2024 12:34:18 - INFO - __main__ - Step 14600: {'lr': 0.0004893840368031087, 'samples': 700800, 'steps': 14599, 'loss/train': 2.57647967338562} +07/25/2024 12:34:18 - INFO - __main__ - Step 14601: {'lr': 0.0004893825200676445, 'samples': 700848, 'steps': 14600, 'loss/train': 1.347291111946106} +07/25/2024 12:34:18 - INFO - __main__ - Step 14602: {'lr': 0.0004893810032261884, 'samples': 700896, 'steps': 14601, 'loss/train': 0.9572461843490601} +07/25/2024 12:34:19 - INFO - __main__ - Step 14603: {'lr': 0.0004893794862787412, 'samples': 700944, 'steps': 14602, 'loss/train': 1.9919993877410889} +07/25/2024 12:34:19 - INFO - __main__ - Step 14604: {'lr': 0.0004893779692253034, 'samples': 700992, 'steps': 14603, 'loss/train': 1.0571589469909668} +07/25/2024 12:34:19 - INFO - __main__ - Step 14605: {'lr': 0.0004893764520658756, 'samples': 701040, 'steps': 14604, 'loss/train': 2.5567548274993896} +07/25/2024 12:34:19 - INFO - __main__ - Step 14606: {'lr': 0.0004893749348004588, 'samples': 701088, 'steps': 14605, 'loss/train': 1.3183386325836182} +07/25/2024 12:34:20 - INFO - __main__ - Step 14607: {'lr': 0.0004893734174290535, 'samples': 701136, 'steps': 14606, 'loss/train': 1.7349882125854492} +07/25/2024 12:34:20 - INFO - __main__ - Step 14608: {'lr': 0.0004893718999516603, 'samples': 701184, 'steps': 14607, 'loss/train': 1.7092386484146118} +07/25/2024 12:34:20 - INFO - __main__ - Step 14609: {'lr': 0.0004893703823682799, 'samples': 701232, 'steps': 14608, 'loss/train': 2.1182827949523926} +07/25/2024 12:34:21 - INFO - __main__ - Step 14610: {'lr': 0.0004893688646789131, 'samples': 701280, 'steps': 14609, 'loss/train': 1.4879554510116577} +07/25/2024 12:34:21 - INFO - __main__ - Step 14611: {'lr': 0.0004893673468835603, 'samples': 701328, 'steps': 14610, 'loss/train': 2.087500810623169} +07/25/2024 12:34:21 - INFO - __main__ - Step 14612: {'lr': 0.0004893658289822225, 'samples': 701376, 'steps': 14611, 'loss/train': 1.3328109979629517} +07/25/2024 12:34:21 - INFO - __main__ - Step 14613: {'lr': 0.0004893643109749002, 'samples': 701424, 'steps': 14612, 'loss/train': 1.6105960607528687} +07/25/2024 12:34:22 - INFO - __main__ - Step 14614: {'lr': 0.000489362792861594, 'samples': 701472, 'steps': 14613, 'loss/train': 1.8842668533325195} +07/25/2024 12:34:22 - INFO - __main__ - Step 14615: {'lr': 0.0004893612746423048, 'samples': 701520, 'steps': 14614, 'loss/train': 0.14811861515045166} +07/25/2024 12:34:22 - INFO - __main__ - Step 14616: {'lr': 0.000489359756317033, 'samples': 701568, 'steps': 14615, 'loss/train': 1.4750292301177979} +07/25/2024 12:34:23 - INFO - __main__ - Step 14617: {'lr': 0.0004893582378857795, 'samples': 701616, 'steps': 14616, 'loss/train': 2.1270017623901367} +07/25/2024 12:34:23 - INFO - __main__ - Step 14618: {'lr': 0.0004893567193485447, 'samples': 701664, 'steps': 14617, 'loss/train': 1.144962191581726} +07/25/2024 12:34:23 - INFO - __main__ - Step 14619: {'lr': 0.0004893552007053297, 'samples': 701712, 'steps': 14618, 'loss/train': 1.7974193096160889} +07/25/2024 12:34:23 - INFO - __main__ - Step 14620: {'lr': 0.0004893536819561348, 'samples': 701760, 'steps': 14619, 'loss/train': 1.2288706302642822} +07/25/2024 12:34:24 - INFO - __main__ - Step 14621: {'lr': 0.0004893521631009608, 'samples': 701808, 'steps': 14620, 'loss/train': 2.2162065505981445} +07/25/2024 12:34:24 - INFO - __main__ - Step 14622: {'lr': 0.0004893506441398084, 'samples': 701856, 'steps': 14621, 'loss/train': 2.025062322616577} +07/25/2024 12:34:24 - INFO - __main__ - Step 14623: {'lr': 0.0004893491250726782, 'samples': 701904, 'steps': 14622, 'loss/train': 1.7022494077682495} +07/25/2024 12:34:25 - INFO - __main__ - Step 14624: {'lr': 0.000489347605899571, 'samples': 701952, 'steps': 14623, 'loss/train': 1.7629673480987549} +07/25/2024 12:34:25 - INFO - __main__ - Step 14625: {'lr': 0.0004893460866204872, 'samples': 702000, 'steps': 14624, 'loss/train': 1.69694185256958} +07/25/2024 12:34:25 - INFO - __main__ - Step 14626: {'lr': 0.0004893445672354279, 'samples': 702048, 'steps': 14625, 'loss/train': 1.2906687259674072} +07/25/2024 12:34:25 - INFO - __main__ - Step 14627: {'lr': 0.0004893430477443933, 'samples': 702096, 'steps': 14626, 'loss/train': 1.5264112949371338} +07/25/2024 12:34:26 - INFO - __main__ - Step 14628: {'lr': 0.0004893415281473845, 'samples': 702144, 'steps': 14627, 'loss/train': 1.253121256828308} +07/25/2024 12:34:26 - INFO - __main__ - Step 14629: {'lr': 0.0004893400084444019, 'samples': 702192, 'steps': 14628, 'loss/train': 1.9273074865341187} +07/25/2024 12:34:26 - INFO - __main__ - Step 14630: {'lr': 0.0004893384886354462, 'samples': 702240, 'steps': 14629, 'loss/train': 1.5979505777359009} +07/25/2024 12:34:27 - INFO - __main__ - Step 14631: {'lr': 0.0004893369687205182, 'samples': 702288, 'steps': 14630, 'loss/train': 2.409816026687622} +07/25/2024 12:34:27 - INFO - __main__ - Step 14632: {'lr': 0.0004893354486996184, 'samples': 702336, 'steps': 14631, 'loss/train': 1.8697782754898071} +07/25/2024 12:34:27 - INFO - __main__ - Step 14633: {'lr': 0.0004893339285727477, 'samples': 702384, 'steps': 14632, 'loss/train': 1.9917588233947754} +07/25/2024 12:34:27 - INFO - __main__ - Step 14634: {'lr': 0.0004893324083399066, 'samples': 702432, 'steps': 14633, 'loss/train': 1.6000548601150513} +07/25/2024 12:34:28 - INFO - __main__ - Step 14635: {'lr': 0.0004893308880010957, 'samples': 702480, 'steps': 14634, 'loss/train': 1.6258009672164917} +07/25/2024 12:34:28 - INFO - __main__ - Step 14636: {'lr': 0.0004893293675563159, 'samples': 702528, 'steps': 14635, 'loss/train': 2.0667340755462646} +07/25/2024 12:34:28 - INFO - __main__ - Step 14637: {'lr': 0.0004893278470055678, 'samples': 702576, 'steps': 14636, 'loss/train': 1.9045251607894897} +07/25/2024 12:34:29 - INFO - __main__ - Step 14638: {'lr': 0.0004893263263488519, 'samples': 702624, 'steps': 14637, 'loss/train': 2.2694156169891357} +07/25/2024 12:34:29 - INFO - __main__ - Step 14639: {'lr': 0.0004893248055861691, 'samples': 702672, 'steps': 14638, 'loss/train': 0.12721100449562073} +07/25/2024 12:34:29 - INFO - __main__ - Step 14640: {'lr': 0.00048932328471752, 'samples': 702720, 'steps': 14639, 'loss/train': 1.2631313800811768} +07/25/2024 12:34:29 - INFO - __main__ - Step 14641: {'lr': 0.0004893217637429053, 'samples': 702768, 'steps': 14640, 'loss/train': 1.6278544664382935} +07/25/2024 12:34:30 - INFO - __main__ - Step 14642: {'lr': 0.0004893202426623255, 'samples': 702816, 'steps': 14641, 'loss/train': 1.6189899444580078} +07/25/2024 12:34:30 - INFO - __main__ - Step 14643: {'lr': 0.0004893187214757814, 'samples': 702864, 'steps': 14642, 'loss/train': 1.9959852695465088} +07/25/2024 12:34:30 - INFO - __main__ - Step 14644: {'lr': 0.0004893172001832738, 'samples': 702912, 'steps': 14643, 'loss/train': 1.4773406982421875} +07/25/2024 12:34:31 - INFO - __main__ - Step 14645: {'lr': 0.0004893156787848031, 'samples': 702960, 'steps': 14644, 'loss/train': 1.8825201988220215} +07/25/2024 12:34:31 - INFO - __main__ - Step 14646: {'lr': 0.0004893141572803702, 'samples': 703008, 'steps': 14645, 'loss/train': 1.7360458374023438} +07/25/2024 12:34:31 - INFO - __main__ - Step 14647: {'lr': 0.0004893126356699757, 'samples': 703056, 'steps': 14646, 'loss/train': 2.063603639602661} +07/25/2024 12:34:31 - INFO - __main__ - Step 14648: {'lr': 0.0004893111139536203, 'samples': 703104, 'steps': 14647, 'loss/train': 1.3045637607574463} +07/25/2024 12:34:32 - INFO - __main__ - Step 14649: {'lr': 0.0004893095921313047, 'samples': 703152, 'steps': 14648, 'loss/train': 2.1678223609924316} +07/25/2024 12:34:32 - INFO - __main__ - Step 14650: {'lr': 0.0004893080702030294, 'samples': 703200, 'steps': 14649, 'loss/train': 1.765495777130127} +07/25/2024 12:34:32 - INFO - __main__ - Step 14651: {'lr': 0.0004893065481687952, 'samples': 703248, 'steps': 14650, 'loss/train': 1.6067166328430176} +07/25/2024 12:34:32 - INFO - __main__ - Step 14652: {'lr': 0.0004893050260286028, 'samples': 703296, 'steps': 14651, 'loss/train': 1.6743273735046387} +07/25/2024 12:34:33 - INFO - __main__ - Step 14653: {'lr': 0.0004893035037824528, 'samples': 703344, 'steps': 14652, 'loss/train': 2.4475483894348145} +07/25/2024 12:34:33 - INFO - __main__ - Step 14654: {'lr': 0.000489301981430346, 'samples': 703392, 'steps': 14653, 'loss/train': 1.90167236328125} +07/25/2024 12:34:33 - INFO - __main__ - Step 14655: {'lr': 0.0004893004589722828, 'samples': 703440, 'steps': 14654, 'loss/train': 1.407475233078003} +07/25/2024 12:34:34 - INFO - __main__ - Step 14656: {'lr': 0.0004892989364082643, 'samples': 703488, 'steps': 14655, 'loss/train': 1.9449262619018555} +07/25/2024 12:34:34 - INFO - __main__ - Step 14657: {'lr': 0.0004892974137382908, 'samples': 703536, 'steps': 14656, 'loss/train': 1.5635474920272827} +07/25/2024 12:34:34 - INFO - __main__ - Step 14658: {'lr': 0.0004892958909623632, 'samples': 703584, 'steps': 14657, 'loss/train': 1.9616953134536743} +07/25/2024 12:34:34 - INFO - __main__ - Step 14659: {'lr': 0.000489294368080482, 'samples': 703632, 'steps': 14658, 'loss/train': 1.7888137102127075} +07/25/2024 12:34:35 - INFO - __main__ - Step 14660: {'lr': 0.0004892928450926479, 'samples': 703680, 'steps': 14659, 'loss/train': 1.7969664335250854} +07/25/2024 12:34:35 - INFO - __main__ - Step 14661: {'lr': 0.0004892913219988617, 'samples': 703728, 'steps': 14660, 'loss/train': 1.8701170682907104} +07/25/2024 12:34:35 - INFO - __main__ - Step 14662: {'lr': 0.000489289798799124, 'samples': 703776, 'steps': 14661, 'loss/train': 1.6851327419281006} +07/25/2024 12:34:36 - INFO - __main__ - Step 14663: {'lr': 0.0004892882754934356, 'samples': 703824, 'steps': 14662, 'loss/train': 0.09418191015720367} +07/25/2024 12:34:36 - INFO - __main__ - Step 14664: {'lr': 0.0004892867520817969, 'samples': 703872, 'steps': 14663, 'loss/train': 1.4512131214141846} +07/25/2024 12:34:36 - INFO - __main__ - Step 14665: {'lr': 0.0004892852285642089, 'samples': 703920, 'steps': 14664, 'loss/train': 1.0511165857315063} +07/25/2024 12:34:36 - INFO - __main__ - Step 14666: {'lr': 0.0004892837049406719, 'samples': 703968, 'steps': 14665, 'loss/train': 1.6064642667770386} +07/25/2024 12:34:37 - INFO - __main__ - Step 14667: {'lr': 0.000489282181211187, 'samples': 704016, 'steps': 14666, 'loss/train': 2.1586568355560303} +07/25/2024 12:34:37 - INFO - __main__ - Step 14668: {'lr': 0.0004892806573757544, 'samples': 704064, 'steps': 14667, 'loss/train': 1.107539415359497} +07/25/2024 12:34:37 - INFO - __main__ - Step 14669: {'lr': 0.0004892791334343752, 'samples': 704112, 'steps': 14668, 'loss/train': 1.88993239402771} +07/25/2024 12:34:38 - INFO - __main__ - Step 14670: {'lr': 0.0004892776093870499, 'samples': 704160, 'steps': 14669, 'loss/train': 2.328228712081909} +07/25/2024 12:34:38 - INFO - __main__ - Step 14671: {'lr': 0.0004892760852337791, 'samples': 704208, 'steps': 14670, 'loss/train': 1.5407664775848389} +07/25/2024 12:34:38 - INFO - __main__ - Step 14672: {'lr': 0.0004892745609745636, 'samples': 704256, 'steps': 14671, 'loss/train': 1.9858944416046143} +07/25/2024 12:34:38 - INFO - __main__ - Step 14673: {'lr': 0.000489273036609404, 'samples': 704304, 'steps': 14672, 'loss/train': 2.95900559425354} +07/25/2024 12:34:39 - INFO - __main__ - Step 14674: {'lr': 0.0004892715121383011, 'samples': 704352, 'steps': 14673, 'loss/train': 0.5018801093101501} +07/25/2024 12:34:39 - INFO - __main__ - Step 14675: {'lr': 0.0004892699875612554, 'samples': 704400, 'steps': 14674, 'loss/train': 1.5972734689712524} +07/25/2024 12:34:39 - INFO - __main__ - Step 14676: {'lr': 0.0004892684628782677, 'samples': 704448, 'steps': 14675, 'loss/train': 2.0066568851470947} +07/25/2024 12:34:40 - INFO - __main__ - Step 14677: {'lr': 0.0004892669380893386, 'samples': 704496, 'steps': 14676, 'loss/train': 1.7989498376846313} +07/25/2024 12:34:40 - INFO - __main__ - Step 14678: {'lr': 0.0004892654131944687, 'samples': 704544, 'steps': 14677, 'loss/train': 1.712637186050415} +07/25/2024 12:34:40 - INFO - __main__ - Step 14679: {'lr': 0.0004892638881936589, 'samples': 704592, 'steps': 14678, 'loss/train': 2.029301404953003} +07/25/2024 12:34:40 - INFO - __main__ - Step 14680: {'lr': 0.0004892623630869098, 'samples': 704640, 'steps': 14679, 'loss/train': 1.3754701614379883} +07/25/2024 12:34:41 - INFO - __main__ - Step 14681: {'lr': 0.000489260837874222, 'samples': 704688, 'steps': 14680, 'loss/train': 2.845684766769409} +07/25/2024 12:34:41 - INFO - __main__ - Step 14682: {'lr': 0.0004892593125555961, 'samples': 704736, 'steps': 14681, 'loss/train': 1.8692142963409424} +07/25/2024 12:34:41 - INFO - __main__ - Step 14683: {'lr': 0.0004892577871310329, 'samples': 704784, 'steps': 14682, 'loss/train': 1.9983255863189697} +07/25/2024 12:34:42 - INFO - __main__ - Step 14684: {'lr': 0.0004892562616005331, 'samples': 704832, 'steps': 14683, 'loss/train': 1.781341314315796} +07/25/2024 12:34:42 - INFO - __main__ - Step 14685: {'lr': 0.0004892547359640973, 'samples': 704880, 'steps': 14684, 'loss/train': 1.172932744026184} +07/25/2024 12:34:42 - INFO - __main__ - Step 14686: {'lr': 0.0004892532102217264, 'samples': 704928, 'steps': 14685, 'loss/train': 1.862949013710022} +07/25/2024 12:34:42 - INFO - __main__ - Step 14687: {'lr': 0.0004892516843734206, 'samples': 704976, 'steps': 14686, 'loss/train': 0.12388252466917038} +07/25/2024 12:34:43 - INFO - __main__ - Step 14688: {'lr': 0.000489250158419181, 'samples': 705024, 'steps': 14687, 'loss/train': 1.704949140548706} +07/25/2024 12:34:43 - INFO - __main__ - Step 14689: {'lr': 0.0004892486323590081, 'samples': 705072, 'steps': 14688, 'loss/train': 1.5156978368759155} +07/25/2024 12:34:43 - INFO - __main__ - Step 14690: {'lr': 0.0004892471061929026, 'samples': 705120, 'steps': 14689, 'loss/train': 1.7074843645095825} +07/25/2024 12:34:44 - INFO - __main__ - Step 14691: {'lr': 0.0004892455799208653, 'samples': 705168, 'steps': 14690, 'loss/train': 1.8656988143920898} +07/25/2024 12:34:44 - INFO - __main__ - Step 14692: {'lr': 0.0004892440535428966, 'samples': 705216, 'steps': 14691, 'loss/train': 1.5920330286026} +07/25/2024 12:34:44 - INFO - __main__ - Step 14693: {'lr': 0.0004892425270589974, 'samples': 705264, 'steps': 14692, 'loss/train': 1.8514164686203003} +07/25/2024 12:34:44 - INFO - __main__ - Step 14694: {'lr': 0.0004892410004691683, 'samples': 705312, 'steps': 14693, 'loss/train': 2.2643096446990967} +07/25/2024 12:34:45 - INFO - __main__ - Step 14695: {'lr': 0.0004892394737734101, 'samples': 705360, 'steps': 14694, 'loss/train': 1.5299808979034424} +07/25/2024 12:34:45 - INFO - __main__ - Step 14696: {'lr': 0.0004892379469717232, 'samples': 705408, 'steps': 14695, 'loss/train': 2.117487668991089} +07/25/2024 12:34:45 - INFO - __main__ - Step 14697: {'lr': 0.0004892364200641085, 'samples': 705456, 'steps': 14696, 'loss/train': 0.31657132506370544} +07/25/2024 12:34:46 - INFO - __main__ - Step 14698: {'lr': 0.0004892348930505666, 'samples': 705504, 'steps': 14697, 'loss/train': 1.525400996208191} +07/25/2024 12:34:46 - INFO - __main__ - Step 14699: {'lr': 0.0004892333659310982, 'samples': 705552, 'steps': 14698, 'loss/train': 2.101130962371826} +07/25/2024 12:34:46 - INFO - __main__ - Step 14700: {'lr': 0.0004892318387057041, 'samples': 705600, 'steps': 14699, 'loss/train': 1.6674801111221313} +07/25/2024 12:34:46 - INFO - __main__ - Step 14701: {'lr': 0.0004892303113743847, 'samples': 705648, 'steps': 14700, 'loss/train': 1.900591254234314} +07/25/2024 12:34:47 - INFO - __main__ - Step 14702: {'lr': 0.0004892287839371409, 'samples': 705696, 'steps': 14701, 'loss/train': 1.2745319604873657} +07/25/2024 12:34:47 - INFO - __main__ - Step 14703: {'lr': 0.0004892272563939732, 'samples': 705744, 'steps': 14702, 'loss/train': 1.293054223060608} +07/25/2024 12:34:47 - INFO - __main__ - Step 14704: {'lr': 0.0004892257287448824, 'samples': 705792, 'steps': 14703, 'loss/train': 1.4510160684585571} +07/25/2024 12:34:48 - INFO - __main__ - Step 14705: {'lr': 0.0004892242009898692, 'samples': 705840, 'steps': 14704, 'loss/train': 1.6755610704421997} +07/25/2024 12:34:48 - INFO - __main__ - Step 14706: {'lr': 0.0004892226731289342, 'samples': 705888, 'steps': 14705, 'loss/train': 1.5820802450180054} +07/25/2024 12:34:48 - INFO - __main__ - Step 14707: {'lr': 0.0004892211451620781, 'samples': 705936, 'steps': 14706, 'loss/train': 2.1385104656219482} +07/25/2024 12:34:48 - INFO - __main__ - Step 14708: {'lr': 0.0004892196170893016, 'samples': 705984, 'steps': 14707, 'loss/train': 1.8958395719528198} +07/25/2024 12:34:49 - INFO - __main__ - Step 14709: {'lr': 0.0004892180889106054, 'samples': 706032, 'steps': 14708, 'loss/train': 1.2618118524551392} +07/25/2024 12:34:49 - INFO - __main__ - Step 14710: {'lr': 0.00048921656062599, 'samples': 706080, 'steps': 14709, 'loss/train': 2.0542330741882324} +07/25/2024 12:34:49 - INFO - __main__ - Step 14711: {'lr': 0.0004892150322354563, 'samples': 706128, 'steps': 14710, 'loss/train': 0.4294157922267914} +07/25/2024 12:34:49 - INFO - __main__ - Step 14712: {'lr': 0.0004892135037390049, 'samples': 706176, 'steps': 14711, 'loss/train': 1.0357766151428223} +07/25/2024 12:34:50 - INFO - __main__ - Step 14713: {'lr': 0.0004892119751366364, 'samples': 706224, 'steps': 14712, 'loss/train': 1.4707311391830444} +07/25/2024 12:34:50 - INFO - __main__ - Step 14714: {'lr': 0.0004892104464283515, 'samples': 706272, 'steps': 14713, 'loss/train': 1.8182506561279297} +07/25/2024 12:34:50 - INFO - __main__ - Step 14715: {'lr': 0.000489208917614151, 'samples': 706320, 'steps': 14714, 'loss/train': 2.2344346046447754} +07/25/2024 12:34:51 - INFO - __main__ - Step 14716: {'lr': 0.0004892073886940355, 'samples': 706368, 'steps': 14715, 'loss/train': 1.5466818809509277} +07/25/2024 12:34:51 - INFO - __main__ - Step 14717: {'lr': 0.0004892058596680057, 'samples': 706416, 'steps': 14716, 'loss/train': 1.8847779035568237} +07/25/2024 12:34:51 - INFO - __main__ - Step 14718: {'lr': 0.0004892043305360622, 'samples': 706464, 'steps': 14717, 'loss/train': 1.8018100261688232} +07/25/2024 12:34:51 - INFO - __main__ - Step 14719: {'lr': 0.0004892028012982057, 'samples': 706512, 'steps': 14718, 'loss/train': 1.3781135082244873} +07/25/2024 12:34:52 - INFO - __main__ - Step 14720: {'lr': 0.0004892012719544369, 'samples': 706560, 'steps': 14719, 'loss/train': 1.7661104202270508} +07/25/2024 12:34:52 - INFO - __main__ - Step 14721: {'lr': 0.0004891997425047565, 'samples': 706608, 'steps': 14720, 'loss/train': 1.9719016551971436} +07/25/2024 12:34:52 - INFO - __main__ - Step 14722: {'lr': 0.0004891982129491652, 'samples': 706656, 'steps': 14721, 'loss/train': 1.5908265113830566} +07/25/2024 12:34:53 - INFO - __main__ - Step 14723: {'lr': 0.0004891966832876637, 'samples': 706704, 'steps': 14722, 'loss/train': 1.2864012718200684} +07/25/2024 12:34:53 - INFO - __main__ - Step 14724: {'lr': 0.0004891951535202525, 'samples': 706752, 'steps': 14723, 'loss/train': 1.8244131803512573} +07/25/2024 12:34:53 - INFO - __main__ - Step 14725: {'lr': 0.0004891936236469323, 'samples': 706800, 'steps': 14724, 'loss/train': 1.9144556522369385} +07/25/2024 12:34:53 - INFO - __main__ - Step 14726: {'lr': 0.000489192093667704, 'samples': 706848, 'steps': 14725, 'loss/train': 1.8029098510742188} +07/25/2024 12:34:54 - INFO - __main__ - Step 14727: {'lr': 0.0004891905635825681, 'samples': 706896, 'steps': 14726, 'loss/train': 2.280585289001465} +07/25/2024 12:34:54 - INFO - __main__ - Step 14728: {'lr': 0.0004891890333915253, 'samples': 706944, 'steps': 14727, 'loss/train': 1.7911642789840698} +07/25/2024 12:34:54 - INFO - __main__ - Step 14729: {'lr': 0.0004891875030945764, 'samples': 706992, 'steps': 14728, 'loss/train': 1.3450456857681274} +07/25/2024 12:34:55 - INFO - __main__ - Step 14730: {'lr': 0.0004891859726917218, 'samples': 707040, 'steps': 14729, 'loss/train': 2.548888683319092} +07/25/2024 12:34:55 - INFO - __main__ - Step 14731: {'lr': 0.0004891844421829626, 'samples': 707088, 'steps': 14730, 'loss/train': 2.0711870193481445} +07/25/2024 12:34:55 - INFO - __main__ - Step 14732: {'lr': 0.0004891829115682989, 'samples': 707136, 'steps': 14731, 'loss/train': 1.9210463762283325} +07/25/2024 12:34:55 - INFO - __main__ - Step 14733: {'lr': 0.000489181380847732, 'samples': 707184, 'steps': 14732, 'loss/train': 1.998226523399353} +07/25/2024 12:34:56 - INFO - __main__ - Step 14734: {'lr': 0.0004891798500212622, 'samples': 707232, 'steps': 14733, 'loss/train': 1.191900610923767} +07/25/2024 12:34:56 - INFO - __main__ - Step 14735: {'lr': 0.0004891783190888902, 'samples': 707280, 'steps': 14734, 'loss/train': 1.7498183250427246} +07/25/2024 12:34:56 - INFO - __main__ - Step 14736: {'lr': 0.0004891767880506169, 'samples': 707328, 'steps': 14735, 'loss/train': 1.9509047269821167} +07/25/2024 12:34:57 - INFO - __main__ - Step 14737: {'lr': 0.0004891752569064426, 'samples': 707376, 'steps': 14736, 'loss/train': 1.7265212535858154} +07/25/2024 12:34:57 - INFO - __main__ - Step 14738: {'lr': 0.0004891737256563683, 'samples': 707424, 'steps': 14737, 'loss/train': 1.434486985206604} +07/25/2024 12:34:57 - INFO - __main__ - Step 14739: {'lr': 0.0004891721943003946, 'samples': 707472, 'steps': 14738, 'loss/train': 1.9258383512496948} +07/25/2024 12:34:57 - INFO - __main__ - Step 14740: {'lr': 0.0004891706628385222, 'samples': 707520, 'steps': 14739, 'loss/train': 1.4516324996948242} +07/25/2024 12:34:58 - INFO - __main__ - Step 14741: {'lr': 0.0004891691312707516, 'samples': 707568, 'steps': 14740, 'loss/train': 1.3954925537109375} +07/25/2024 12:34:58 - INFO - __main__ - Step 14742: {'lr': 0.0004891675995970837, 'samples': 707616, 'steps': 14741, 'loss/train': 1.9505300521850586} +07/25/2024 12:34:58 - INFO - __main__ - Step 14743: {'lr': 0.000489166067817519, 'samples': 707664, 'steps': 14742, 'loss/train': 1.604373812675476} +07/25/2024 12:34:59 - INFO - __main__ - Step 14744: {'lr': 0.0004891645359320583, 'samples': 707712, 'steps': 14743, 'loss/train': 0.7943459153175354} +07/25/2024 12:34:59 - INFO - __main__ - Step 14745: {'lr': 0.0004891630039407024, 'samples': 707760, 'steps': 14744, 'loss/train': 1.8017916679382324} +07/25/2024 12:34:59 - INFO - __main__ - Step 14746: {'lr': 0.0004891614718434516, 'samples': 707808, 'steps': 14745, 'loss/train': 1.8192384243011475} +07/25/2024 12:34:59 - INFO - __main__ - Step 14747: {'lr': 0.0004891599396403069, 'samples': 707856, 'steps': 14746, 'loss/train': 1.9485008716583252} +07/25/2024 12:35:00 - INFO - __main__ - Step 14748: {'lr': 0.0004891584073312689, 'samples': 707904, 'steps': 14747, 'loss/train': 1.9390004873275757} +07/25/2024 12:35:00 - INFO - __main__ - Step 14749: {'lr': 0.0004891568749163383, 'samples': 707952, 'steps': 14748, 'loss/train': 2.087679147720337} +07/25/2024 12:35:00 - INFO - __main__ - Step 14750: {'lr': 0.0004891553423955156, 'samples': 708000, 'steps': 14749, 'loss/train': 1.9501844644546509} +07/25/2024 12:35:01 - INFO - __main__ - Step 14751: {'lr': 0.0004891538097688018, 'samples': 708048, 'steps': 14750, 'loss/train': 2.0771682262420654} +07/25/2024 12:35:01 - INFO - __main__ - Step 14752: {'lr': 0.0004891522770361972, 'samples': 708096, 'steps': 14751, 'loss/train': 1.7907655239105225} +07/25/2024 12:35:01 - INFO - __main__ - Step 14753: {'lr': 0.0004891507441977027, 'samples': 708144, 'steps': 14752, 'loss/train': 1.176316499710083} +07/25/2024 12:35:01 - INFO - __main__ - Step 14754: {'lr': 0.0004891492112533191, 'samples': 708192, 'steps': 14753, 'loss/train': 2.38720440864563} +07/25/2024 12:35:02 - INFO - __main__ - Step 14755: {'lr': 0.0004891476782030469, 'samples': 708240, 'steps': 14754, 'loss/train': 1.9053760766983032} +07/25/2024 12:35:02 - INFO - __main__ - Step 14756: {'lr': 0.0004891461450468867, 'samples': 708288, 'steps': 14755, 'loss/train': 1.4336580038070679} +07/25/2024 12:35:02 - INFO - __main__ - Step 14757: {'lr': 0.0004891446117848393, 'samples': 708336, 'steps': 14756, 'loss/train': 1.8991739749908447} +07/25/2024 12:35:03 - INFO - __main__ - Step 14758: {'lr': 0.0004891430784169054, 'samples': 708384, 'steps': 14757, 'loss/train': 1.254319667816162} +07/25/2024 12:35:03 - INFO - __main__ - Step 14759: {'lr': 0.0004891415449430857, 'samples': 708432, 'steps': 14758, 'loss/train': 1.8857669830322266} +07/25/2024 12:35:03 - INFO - __main__ - Step 14760: {'lr': 0.0004891400113633807, 'samples': 708480, 'steps': 14759, 'loss/train': 2.143306255340576} +07/25/2024 12:35:03 - INFO - __main__ - Step 14761: {'lr': 0.0004891384776777913, 'samples': 708528, 'steps': 14760, 'loss/train': 0.9994295239448547} +07/25/2024 12:35:04 - INFO - __main__ - Step 14762: {'lr': 0.0004891369438863181, 'samples': 708576, 'steps': 14761, 'loss/train': 1.309114933013916} +07/25/2024 12:35:04 - INFO - __main__ - Step 14763: {'lr': 0.0004891354099889616, 'samples': 708624, 'steps': 14762, 'loss/train': 1.539743185043335} +07/25/2024 12:35:04 - INFO - __main__ - Step 14764: {'lr': 0.0004891338759857229, 'samples': 708672, 'steps': 14763, 'loss/train': 2.265641927719116} +07/25/2024 12:35:05 - INFO - __main__ - Step 14765: {'lr': 0.0004891323418766022, 'samples': 708720, 'steps': 14764, 'loss/train': 1.6975854635238647} +07/25/2024 12:35:05 - INFO - __main__ - Step 14766: {'lr': 0.0004891308076616005, 'samples': 708768, 'steps': 14765, 'loss/train': 1.1505223512649536} +07/25/2024 12:35:05 - INFO - __main__ - Step 14767: {'lr': 0.0004891292733407184, 'samples': 708816, 'steps': 14766, 'loss/train': 2.2357430458068848} +07/25/2024 12:35:05 - INFO - __main__ - Step 14768: {'lr': 0.0004891277389139565, 'samples': 708864, 'steps': 14767, 'loss/train': 1.6111551523208618} +07/25/2024 12:35:06 - INFO - __main__ - Step 14769: {'lr': 0.0004891262043813156, 'samples': 708912, 'steps': 14768, 'loss/train': 1.4689987897872925} +07/25/2024 12:35:06 - INFO - __main__ - Step 14770: {'lr': 0.0004891246697427962, 'samples': 708960, 'steps': 14769, 'loss/train': 1.5174777507781982} +07/25/2024 12:35:06 - INFO - __main__ - Step 14771: {'lr': 0.0004891231349983991, 'samples': 709008, 'steps': 14770, 'loss/train': 1.4042494297027588} +07/25/2024 12:35:07 - INFO - __main__ - Step 14772: {'lr': 0.0004891216001481251, 'samples': 709056, 'steps': 14771, 'loss/train': 1.7135251760482788} +07/25/2024 12:35:07 - INFO - __main__ - Step 14773: {'lr': 0.0004891200651919748, 'samples': 709104, 'steps': 14772, 'loss/train': 2.0490341186523438} +07/25/2024 12:35:07 - INFO - __main__ - Step 14774: {'lr': 0.0004891185301299487, 'samples': 709152, 'steps': 14773, 'loss/train': 2.1250438690185547} +07/25/2024 12:35:07 - INFO - __main__ - Step 14775: {'lr': 0.0004891169949620477, 'samples': 709200, 'steps': 14774, 'loss/train': 1.9171881675720215} +07/25/2024 12:35:08 - INFO - __main__ - Step 14776: {'lr': 0.0004891154596882723, 'samples': 709248, 'steps': 14775, 'loss/train': 1.9103782176971436} +07/25/2024 12:35:08 - INFO - __main__ - Step 14777: {'lr': 0.0004891139243086232, 'samples': 709296, 'steps': 14776, 'loss/train': 1.373047947883606} +07/25/2024 12:35:08 - INFO - __main__ - Step 14778: {'lr': 0.0004891123888231014, 'samples': 709344, 'steps': 14777, 'loss/train': 1.6713049411773682} +07/25/2024 12:35:09 - INFO - __main__ - Step 14779: {'lr': 0.0004891108532317071, 'samples': 709392, 'steps': 14778, 'loss/train': 1.2698091268539429} +07/25/2024 12:35:09 - INFO - __main__ - Step 14780: {'lr': 0.0004891093175344414, 'samples': 709440, 'steps': 14779, 'loss/train': 1.4078716039657593} +07/25/2024 12:35:09 - INFO - __main__ - Step 14781: {'lr': 0.0004891077817313047, 'samples': 709488, 'steps': 14780, 'loss/train': 1.3509949445724487} +07/25/2024 12:35:09 - INFO - __main__ - Step 14782: {'lr': 0.0004891062458222977, 'samples': 709536, 'steps': 14781, 'loss/train': 1.9046530723571777} +07/25/2024 12:35:10 - INFO - __main__ - Step 14783: {'lr': 0.0004891047098074214, 'samples': 709584, 'steps': 14782, 'loss/train': 0.9905339479446411} +07/25/2024 12:35:10 - INFO - __main__ - Step 14784: {'lr': 0.000489103173686676, 'samples': 709632, 'steps': 14783, 'loss/train': 1.9663275480270386} +07/25/2024 12:35:10 - INFO - __main__ - Step 14785: {'lr': 0.0004891016374600625, 'samples': 709680, 'steps': 14784, 'loss/train': 1.4768468141555786} +07/25/2024 12:35:10 - INFO - __main__ - Step 14786: {'lr': 0.0004891001011275815, 'samples': 709728, 'steps': 14785, 'loss/train': 1.068462610244751} +07/25/2024 12:35:11 - INFO - __main__ - Step 14787: {'lr': 0.0004890985646892336, 'samples': 709776, 'steps': 14786, 'loss/train': 1.5958632230758667} +07/25/2024 12:35:11 - INFO - __main__ - Step 14788: {'lr': 0.0004890970281450197, 'samples': 709824, 'steps': 14787, 'loss/train': 1.7779489755630493} +07/25/2024 12:35:11 - INFO - __main__ - Step 14789: {'lr': 0.0004890954914949402, 'samples': 709872, 'steps': 14788, 'loss/train': 1.9414889812469482} +07/25/2024 12:35:12 - INFO - __main__ - Step 14790: {'lr': 0.000489093954738996, 'samples': 709920, 'steps': 14789, 'loss/train': 1.3832181692123413} +07/25/2024 12:35:12 - INFO - __main__ - Step 14791: {'lr': 0.0004890924178771876, 'samples': 709968, 'steps': 14790, 'loss/train': 1.589796543121338} +07/25/2024 12:35:12 - INFO - __main__ - Step 14792: {'lr': 0.0004890908809095159, 'samples': 710016, 'steps': 14791, 'loss/train': 1.8704525232315063} +07/25/2024 12:35:12 - INFO - __main__ - Step 14793: {'lr': 0.0004890893438359813, 'samples': 710064, 'steps': 14792, 'loss/train': 1.7088016271591187} +07/25/2024 12:35:13 - INFO - __main__ - Step 14794: {'lr': 0.0004890878066565848, 'samples': 710112, 'steps': 14793, 'loss/train': 1.9712227582931519} +07/25/2024 12:35:13 - INFO - __main__ - Step 14795: {'lr': 0.0004890862693713268, 'samples': 710160, 'steps': 14794, 'loss/train': 0.953974187374115} +07/25/2024 12:35:13 - INFO - __main__ - Step 14796: {'lr': 0.0004890847319802082, 'samples': 710208, 'steps': 14795, 'loss/train': 2.164005756378174} +07/25/2024 12:35:14 - INFO - __main__ - Step 14797: {'lr': 0.0004890831944832296, 'samples': 710256, 'steps': 14796, 'loss/train': 1.642238974571228} +07/25/2024 12:35:14 - INFO - __main__ - Step 14798: {'lr': 0.0004890816568803915, 'samples': 710304, 'steps': 14797, 'loss/train': 1.5323002338409424} +07/25/2024 12:35:14 - INFO - __main__ - Step 14799: {'lr': 0.0004890801191716949, 'samples': 710352, 'steps': 14798, 'loss/train': 1.8004275560379028} +07/25/2024 12:35:14 - INFO - __main__ - Step 14800: {'lr': 0.0004890785813571403, 'samples': 710400, 'steps': 14799, 'loss/train': 2.1590495109558105} +07/25/2024 12:35:15 - INFO - __main__ - Step 14801: {'lr': 0.0004890770434367284, 'samples': 710448, 'steps': 14800, 'loss/train': 2.2762198448181152} +07/25/2024 12:35:15 - INFO - __main__ - Step 14802: {'lr': 0.0004890755054104597, 'samples': 710496, 'steps': 14801, 'loss/train': 1.140763521194458} +07/25/2024 12:35:15 - INFO - __main__ - Step 14803: {'lr': 0.0004890739672783353, 'samples': 710544, 'steps': 14802, 'loss/train': 1.5657880306243896} +07/25/2024 12:35:16 - INFO - __main__ - Step 14804: {'lr': 0.0004890724290403556, 'samples': 710592, 'steps': 14803, 'loss/train': 1.1249032020568848} +07/25/2024 12:35:16 - INFO - __main__ - Step 14805: {'lr': 0.0004890708906965212, 'samples': 710640, 'steps': 14804, 'loss/train': 1.6594529151916504} +07/25/2024 12:35:16 - INFO - __main__ - Step 14806: {'lr': 0.000489069352246833, 'samples': 710688, 'steps': 14805, 'loss/train': 1.858839750289917} +07/25/2024 12:35:16 - INFO - __main__ - Step 14807: {'lr': 0.0004890678136912916, 'samples': 710736, 'steps': 14806, 'loss/train': 1.623533844947815} +07/25/2024 12:35:17 - INFO - __main__ - Step 14808: {'lr': 0.0004890662750298976, 'samples': 710784, 'steps': 14807, 'loss/train': 1.6125072240829468} +07/25/2024 12:35:17 - INFO - __main__ - Step 14809: {'lr': 0.0004890647362626517, 'samples': 710832, 'steps': 14808, 'loss/train': 1.8931500911712646} +07/25/2024 12:35:17 - INFO - __main__ - Step 14810: {'lr': 0.0004890631973895549, 'samples': 710880, 'steps': 14809, 'loss/train': 1.7788560390472412} +07/25/2024 12:35:18 - INFO - __main__ - Step 14811: {'lr': 0.0004890616584106073, 'samples': 710928, 'steps': 14810, 'loss/train': 1.594839096069336} +07/25/2024 12:35:18 - INFO - __main__ - Step 14812: {'lr': 0.0004890601193258101, 'samples': 710976, 'steps': 14811, 'loss/train': 2.184091329574585} +07/25/2024 12:35:18 - INFO - __main__ - Step 14813: {'lr': 0.0004890585801351638, 'samples': 711024, 'steps': 14812, 'loss/train': 1.6683313846588135} +07/25/2024 12:35:18 - INFO - __main__ - Step 14814: {'lr': 0.000489057040838669, 'samples': 711072, 'steps': 14813, 'loss/train': 2.002750873565674} +07/25/2024 12:35:19 - INFO - __main__ - Step 14815: {'lr': 0.0004890555014363263, 'samples': 711120, 'steps': 14814, 'loss/train': 1.0283063650131226} +07/25/2024 12:35:19 - INFO - __main__ - Step 14816: {'lr': 0.0004890539619281367, 'samples': 711168, 'steps': 14815, 'loss/train': 1.7792692184448242} +07/25/2024 12:35:19 - INFO - __main__ - Step 14817: {'lr': 0.0004890524223141006, 'samples': 711216, 'steps': 14816, 'loss/train': 1.8742684125900269} +07/25/2024 12:35:20 - INFO - __main__ - Step 14818: {'lr': 0.0004890508825942189, 'samples': 711264, 'steps': 14817, 'loss/train': 1.6159085035324097} +07/25/2024 12:35:20 - INFO - __main__ - Step 14819: {'lr': 0.0004890493427684922, 'samples': 711312, 'steps': 14818, 'loss/train': 1.5595991611480713} +07/25/2024 12:35:20 - INFO - __main__ - Step 14820: {'lr': 0.000489047802836921, 'samples': 711360, 'steps': 14819, 'loss/train': 1.7417068481445312} +07/25/2024 12:35:20 - INFO - __main__ - Step 14821: {'lr': 0.0004890462627995061, 'samples': 711408, 'steps': 14820, 'loss/train': 1.732027292251587} +07/25/2024 12:35:21 - INFO - __main__ - Step 14822: {'lr': 0.0004890447226562483, 'samples': 711456, 'steps': 14821, 'loss/train': 2.0963926315307617} +07/25/2024 12:35:21 - INFO - __main__ - Step 14823: {'lr': 0.0004890431824071482, 'samples': 711504, 'steps': 14822, 'loss/train': 2.133488178253174} +07/25/2024 12:35:21 - INFO - __main__ - Step 14824: {'lr': 0.0004890416420522065, 'samples': 711552, 'steps': 14823, 'loss/train': 2.1782989501953125} +07/25/2024 12:35:22 - INFO - __main__ - Step 14825: {'lr': 0.0004890401015914238, 'samples': 711600, 'steps': 14824, 'loss/train': 1.5636813640594482} +07/25/2024 12:35:22 - INFO - __main__ - Step 14826: {'lr': 0.0004890385610248009, 'samples': 711648, 'steps': 14825, 'loss/train': 1.851670265197754} +07/25/2024 12:35:22 - INFO - __main__ - Step 14827: {'lr': 0.0004890370203523383, 'samples': 711696, 'steps': 14826, 'loss/train': 2.3864035606384277} +07/25/2024 12:35:22 - INFO - __main__ - Step 14828: {'lr': 0.0004890354795740369, 'samples': 711744, 'steps': 14827, 'loss/train': 2.049708366394043} +07/25/2024 12:35:23 - INFO - __main__ - Step 14829: {'lr': 0.0004890339386898973, 'samples': 711792, 'steps': 14828, 'loss/train': 1.7968108654022217} +07/25/2024 12:35:23 - INFO - __main__ - Step 14830: {'lr': 0.0004890323976999201, 'samples': 711840, 'steps': 14829, 'loss/train': 2.0737545490264893} +07/25/2024 12:35:23 - INFO - __main__ - Step 14831: {'lr': 0.0004890308566041062, 'samples': 711888, 'steps': 14830, 'loss/train': 1.2274765968322754} +07/25/2024 12:35:24 - INFO - __main__ - Step 14832: {'lr': 0.000489029315402456, 'samples': 711936, 'steps': 14831, 'loss/train': 1.676855206489563} +07/25/2024 12:35:24 - INFO - __main__ - Step 14833: {'lr': 0.0004890277740949704, 'samples': 711984, 'steps': 14832, 'loss/train': 1.5354068279266357} +07/25/2024 12:35:24 - INFO - __main__ - Step 14834: {'lr': 0.0004890262326816499, 'samples': 712032, 'steps': 14833, 'loss/train': 1.893997311592102} +07/25/2024 12:35:24 - INFO - __main__ - Step 14835: {'lr': 0.0004890246911624954, 'samples': 712080, 'steps': 14834, 'loss/train': 1.394131064414978} +07/25/2024 12:35:25 - INFO - __main__ - Step 14836: {'lr': 0.0004890231495375074, 'samples': 712128, 'steps': 14835, 'loss/train': 2.026970863342285} +07/25/2024 12:35:25 - INFO - __main__ - Step 14837: {'lr': 0.0004890216078066866, 'samples': 712176, 'steps': 14836, 'loss/train': 1.762139081954956} +07/25/2024 12:35:25 - INFO - __main__ - Step 14838: {'lr': 0.0004890200659700337, 'samples': 712224, 'steps': 14837, 'loss/train': 1.7326605319976807} +07/25/2024 12:35:26 - INFO - __main__ - Step 14839: {'lr': 0.0004890185240275496, 'samples': 712272, 'steps': 14838, 'loss/train': 1.7516796588897705} +07/25/2024 12:35:26 - INFO - __main__ - Step 14840: {'lr': 0.0004890169819792346, 'samples': 712320, 'steps': 14839, 'loss/train': 1.885952353477478} +07/25/2024 12:35:26 - INFO - __main__ - Step 14841: {'lr': 0.0004890154398250896, 'samples': 712368, 'steps': 14840, 'loss/train': 2.2445240020751953} +07/25/2024 12:35:26 - INFO - __main__ - Step 14842: {'lr': 0.0004890138975651154, 'samples': 712416, 'steps': 14841, 'loss/train': 1.4899649620056152} +07/25/2024 12:35:27 - INFO - __main__ - Step 14843: {'lr': 0.0004890123551993125, 'samples': 712464, 'steps': 14842, 'loss/train': 1.4795172214508057} +07/25/2024 12:35:27 - INFO - __main__ - Step 14844: {'lr': 0.0004890108127276816, 'samples': 712512, 'steps': 14843, 'loss/train': 2.101886034011841} +07/25/2024 12:35:27 - INFO - __main__ - Step 14845: {'lr': 0.0004890092701502234, 'samples': 712560, 'steps': 14844, 'loss/train': 1.7746403217315674} +07/25/2024 12:35:28 - INFO - __main__ - Step 14846: {'lr': 0.0004890077274669385, 'samples': 712608, 'steps': 14845, 'loss/train': 1.2518115043640137} +07/25/2024 12:35:28 - INFO - __main__ - Step 14847: {'lr': 0.0004890061846778278, 'samples': 712656, 'steps': 14846, 'loss/train': 1.7964998483657837} +07/25/2024 12:35:28 - INFO - __main__ - Step 14848: {'lr': 0.0004890046417828918, 'samples': 712704, 'steps': 14847, 'loss/train': 2.2104499340057373} +07/25/2024 12:35:28 - INFO - __main__ - Step 14849: {'lr': 0.0004890030987821314, 'samples': 712752, 'steps': 14848, 'loss/train': 1.7344948053359985} +07/25/2024 12:35:29 - INFO - __main__ - Step 14850: {'lr': 0.0004890015556755469, 'samples': 712800, 'steps': 14849, 'loss/train': 1.3369882106781006} +07/25/2024 12:35:29 - INFO - __main__ - Step 14851: {'lr': 0.0004890000124631393, 'samples': 712848, 'steps': 14850, 'loss/train': 1.7164201736450195} +07/25/2024 12:35:29 - INFO - __main__ - Step 14852: {'lr': 0.0004889984691449092, 'samples': 712896, 'steps': 14851, 'loss/train': 1.9499455690383911} +07/25/2024 12:35:29 - INFO - __main__ - Step 14853: {'lr': 0.0004889969257208573, 'samples': 712944, 'steps': 14852, 'loss/train': 1.9797602891921997} +07/25/2024 12:35:30 - INFO - __main__ - Step 14854: {'lr': 0.0004889953821909841, 'samples': 712992, 'steps': 14853, 'loss/train': 0.250986784696579} +07/25/2024 12:35:30 - INFO - __main__ - Step 14855: {'lr': 0.0004889938385552907, 'samples': 713040, 'steps': 14854, 'loss/train': 0.9366347789764404} +07/25/2024 12:35:30 - INFO - __main__ - Step 14856: {'lr': 0.0004889922948137773, 'samples': 713088, 'steps': 14855, 'loss/train': 2.2298622131347656} +07/25/2024 12:35:31 - INFO - __main__ - Step 14857: {'lr': 0.0004889907509664449, 'samples': 713136, 'steps': 14856, 'loss/train': 2.015997886657715} +07/25/2024 12:35:31 - INFO - __main__ - Step 14858: {'lr': 0.0004889892070132941, 'samples': 713184, 'steps': 14857, 'loss/train': 1.8934614658355713} +07/25/2024 12:35:31 - INFO - __main__ - Step 14859: {'lr': 0.0004889876629543255, 'samples': 713232, 'steps': 14858, 'loss/train': 1.3568841218948364} +07/25/2024 12:35:31 - INFO - __main__ - Step 14860: {'lr': 0.00048898611878954, 'samples': 713280, 'steps': 14859, 'loss/train': 1.8179748058319092} +07/25/2024 12:35:32 - INFO - __main__ - Step 14861: {'lr': 0.0004889845745189381, 'samples': 713328, 'steps': 14860, 'loss/train': 1.726959466934204} +07/25/2024 12:35:32 - INFO - __main__ - Step 14862: {'lr': 0.0004889830301425204, 'samples': 713376, 'steps': 14861, 'loss/train': 2.139791250228882} +07/25/2024 12:35:32 - INFO - __main__ - Step 14863: {'lr': 0.0004889814856602879, 'samples': 713424, 'steps': 14862, 'loss/train': 0.7529365420341492} +07/25/2024 12:35:33 - INFO - __main__ - Step 14864: {'lr': 0.0004889799410722409, 'samples': 713472, 'steps': 14863, 'loss/train': 2.2362289428710938} +07/25/2024 12:35:33 - INFO - __main__ - Step 14865: {'lr': 0.0004889783963783805, 'samples': 713520, 'steps': 14864, 'loss/train': 1.99794602394104} +07/25/2024 12:35:33 - INFO - __main__ - Step 14866: {'lr': 0.000488976851578707, 'samples': 713568, 'steps': 14865, 'loss/train': 1.8339853286743164} +07/25/2024 12:35:33 - INFO - __main__ - Step 14867: {'lr': 0.0004889753066732212, 'samples': 713616, 'steps': 14866, 'loss/train': 2.074653148651123} +07/25/2024 12:35:34 - INFO - __main__ - Step 14868: {'lr': 0.0004889737616619239, 'samples': 713664, 'steps': 14867, 'loss/train': 1.7566025257110596} +07/25/2024 12:35:34 - INFO - __main__ - Step 14869: {'lr': 0.0004889722165448158, 'samples': 713712, 'steps': 14868, 'loss/train': 2.286140203475952} +07/25/2024 12:35:34 - INFO - __main__ - Step 14870: {'lr': 0.0004889706713218974, 'samples': 713760, 'steps': 14869, 'loss/train': 1.5975531339645386} +07/25/2024 12:35:35 - INFO - __main__ - Step 14871: {'lr': 0.0004889691259931695, 'samples': 713808, 'steps': 14870, 'loss/train': 1.1463699340820312} +07/25/2024 12:35:35 - INFO - __main__ - Step 14872: {'lr': 0.0004889675805586327, 'samples': 713856, 'steps': 14871, 'loss/train': 1.6827328205108643} +07/25/2024 12:35:35 - INFO - __main__ - Step 14873: {'lr': 0.0004889660350182879, 'samples': 713904, 'steps': 14872, 'loss/train': 2.0519626140594482} +07/25/2024 12:35:35 - INFO - __main__ - Step 14874: {'lr': 0.0004889644893721355, 'samples': 713952, 'steps': 14873, 'loss/train': 1.5563373565673828} +07/25/2024 12:35:36 - INFO - __main__ - Step 14875: {'lr': 0.0004889629436201765, 'samples': 714000, 'steps': 14874, 'loss/train': 1.9751914739608765} +07/25/2024 12:35:36 - INFO - __main__ - Step 14876: {'lr': 0.0004889613977624112, 'samples': 714048, 'steps': 14875, 'loss/train': 1.335326910018921} +07/25/2024 12:35:36 - INFO - __main__ - Step 14877: {'lr': 0.0004889598517988406, 'samples': 714096, 'steps': 14876, 'loss/train': 1.3654848337173462} +07/25/2024 12:35:37 - INFO - __main__ - Step 14878: {'lr': 0.0004889583057294652, 'samples': 714144, 'steps': 14877, 'loss/train': 0.1522931009531021} +07/25/2024 12:35:37 - INFO - __main__ - Step 14879: {'lr': 0.0004889567595542859, 'samples': 714192, 'steps': 14878, 'loss/train': 1.7728623151779175} +07/25/2024 12:35:37 - INFO - __main__ - Step 14880: {'lr': 0.0004889552132733031, 'samples': 714240, 'steps': 14879, 'loss/train': 1.9780609607696533} +07/25/2024 12:35:37 - INFO - __main__ - Step 14881: {'lr': 0.0004889536668865177, 'samples': 714288, 'steps': 14880, 'loss/train': 1.4423861503601074} +07/25/2024 12:35:38 - INFO - __main__ - Step 14882: {'lr': 0.0004889521203939302, 'samples': 714336, 'steps': 14881, 'loss/train': 2.1137535572052} +07/25/2024 12:35:38 - INFO - __main__ - Step 14883: {'lr': 0.0004889505737955415, 'samples': 714384, 'steps': 14882, 'loss/train': 2.194173574447632} +07/25/2024 12:35:38 - INFO - __main__ - Step 14884: {'lr': 0.0004889490270913523, 'samples': 714432, 'steps': 14883, 'loss/train': 1.1926935911178589} +07/25/2024 12:35:39 - INFO - __main__ - Step 14885: {'lr': 0.000488947480281363, 'samples': 714480, 'steps': 14884, 'loss/train': 1.5938611030578613} +07/25/2024 12:35:39 - INFO - __main__ - Step 14886: {'lr': 0.0004889459333655745, 'samples': 714528, 'steps': 14885, 'loss/train': 1.7074307203292847} +07/25/2024 12:35:39 - INFO - __main__ - Step 14887: {'lr': 0.0004889443863439873, 'samples': 714576, 'steps': 14886, 'loss/train': 1.733371376991272} +07/25/2024 12:35:39 - INFO - __main__ - Step 14888: {'lr': 0.0004889428392166025, 'samples': 714624, 'steps': 14887, 'loss/train': 2.417008399963379} +07/25/2024 12:35:40 - INFO - __main__ - Step 14889: {'lr': 0.0004889412919834202, 'samples': 714672, 'steps': 14888, 'loss/train': 1.818513035774231} +07/25/2024 12:35:40 - INFO - __main__ - Step 14890: {'lr': 0.0004889397446444416, 'samples': 714720, 'steps': 14889, 'loss/train': 1.8637259006500244} +07/25/2024 12:35:40 - INFO - __main__ - Step 14891: {'lr': 0.000488938197199667, 'samples': 714768, 'steps': 14890, 'loss/train': 1.9400326013565063} +07/25/2024 12:35:41 - INFO - __main__ - Step 14892: {'lr': 0.0004889366496490975, 'samples': 714816, 'steps': 14891, 'loss/train': 2.697848320007324} +07/25/2024 12:35:41 - INFO - __main__ - Step 14893: {'lr': 0.0004889351019927334, 'samples': 714864, 'steps': 14892, 'loss/train': 1.4876545667648315} +07/25/2024 12:35:41 - INFO - __main__ - Step 14894: {'lr': 0.0004889335542305755, 'samples': 714912, 'steps': 14893, 'loss/train': 2.097869396209717} +07/25/2024 12:35:41 - INFO - __main__ - Step 14895: {'lr': 0.0004889320063626246, 'samples': 714960, 'steps': 14894, 'loss/train': 1.911810278892517} +07/25/2024 12:35:42 - INFO - __main__ - Step 14896: {'lr': 0.0004889304583888813, 'samples': 715008, 'steps': 14895, 'loss/train': 2.008897304534912} +07/25/2024 12:35:42 - INFO - __main__ - Step 14897: {'lr': 0.0004889289103093463, 'samples': 715056, 'steps': 14896, 'loss/train': 1.5544817447662354} +07/25/2024 12:35:42 - INFO - __main__ - Step 14898: {'lr': 0.0004889273621240201, 'samples': 715104, 'steps': 14897, 'loss/train': 1.9736651182174683} +07/25/2024 12:35:43 - INFO - __main__ - Step 14899: {'lr': 0.0004889258138329037, 'samples': 715152, 'steps': 14898, 'loss/train': 1.2550143003463745} +07/25/2024 12:35:43 - INFO - __main__ - Step 14900: {'lr': 0.0004889242654359977, 'samples': 715200, 'steps': 14899, 'loss/train': 1.593371033668518} +07/25/2024 12:35:43 - INFO - __main__ - Step 14901: {'lr': 0.0004889227169333027, 'samples': 715248, 'steps': 14900, 'loss/train': 1.8387055397033691} +07/25/2024 12:35:43 - INFO - __main__ - Step 14902: {'lr': 0.0004889211683248193, 'samples': 715296, 'steps': 14901, 'loss/train': 0.14294958114624023} +07/25/2024 12:35:44 - INFO - __main__ - Step 14903: {'lr': 0.0004889196196105484, 'samples': 715344, 'steps': 14902, 'loss/train': 1.3207039833068848} +07/25/2024 12:35:44 - INFO - __main__ - Step 14904: {'lr': 0.0004889180707904905, 'samples': 715392, 'steps': 14903, 'loss/train': 1.9810197353363037} +07/25/2024 12:35:44 - INFO - __main__ - Step 14905: {'lr': 0.0004889165218646465, 'samples': 715440, 'steps': 14904, 'loss/train': 1.6117984056472778} +07/25/2024 12:35:45 - INFO - __main__ - Step 14906: {'lr': 0.0004889149728330168, 'samples': 715488, 'steps': 14905, 'loss/train': 1.9044034481048584} +07/25/2024 12:35:45 - INFO - __main__ - Step 14907: {'lr': 0.0004889134236956025, 'samples': 715536, 'steps': 14906, 'loss/train': 1.9229756593704224} +07/25/2024 12:35:45 - INFO - __main__ - Step 14908: {'lr': 0.0004889118744524038, 'samples': 715584, 'steps': 14907, 'loss/train': 2.081165075302124} +07/25/2024 12:35:45 - INFO - __main__ - Step 14909: {'lr': 0.0004889103251034217, 'samples': 715632, 'steps': 14908, 'loss/train': 2.0641205310821533} +07/25/2024 12:35:46 - INFO - __main__ - Step 14910: {'lr': 0.0004889087756486568, 'samples': 715680, 'steps': 14909, 'loss/train': 1.6470500230789185} +07/25/2024 12:35:46 - INFO - __main__ - Step 14911: {'lr': 0.0004889072260881097, 'samples': 715728, 'steps': 14910, 'loss/train': 1.5733264684677124} +07/25/2024 12:35:46 - INFO - __main__ - Step 14912: {'lr': 0.0004889056764217813, 'samples': 715776, 'steps': 14911, 'loss/train': 2.0227956771850586} +07/25/2024 12:35:47 - INFO - __main__ - Step 14913: {'lr': 0.0004889041266496722, 'samples': 715824, 'steps': 14912, 'loss/train': 1.8927638530731201} +07/25/2024 12:35:47 - INFO - __main__ - Step 14914: {'lr': 0.0004889025767717829, 'samples': 715872, 'steps': 14913, 'loss/train': 1.2657183408737183} +07/25/2024 12:35:47 - INFO - __main__ - Step 14915: {'lr': 0.0004889010267881143, 'samples': 715920, 'steps': 14914, 'loss/train': 1.1601688861846924} +07/25/2024 12:35:47 - INFO - __main__ - Step 14916: {'lr': 0.000488899476698667, 'samples': 715968, 'steps': 14915, 'loss/train': 3.4919824600219727} +07/25/2024 12:35:48 - INFO - __main__ - Step 14917: {'lr': 0.0004888979265034417, 'samples': 716016, 'steps': 14916, 'loss/train': 1.027809739112854} +07/25/2024 12:35:48 - INFO - __main__ - Step 14918: {'lr': 0.000488896376202439, 'samples': 716064, 'steps': 14917, 'loss/train': 1.9201436042785645} +07/25/2024 12:35:48 - INFO - __main__ - Step 14919: {'lr': 0.00048889482579566, 'samples': 716112, 'steps': 14918, 'loss/train': 2.6501665115356445} +07/25/2024 12:35:48 - INFO - __main__ - Step 14920: {'lr': 0.0004888932752831048, 'samples': 716160, 'steps': 14919, 'loss/train': 1.2928378582000732} +07/25/2024 12:35:49 - INFO - __main__ - Step 14921: {'lr': 0.0004888917246647743, 'samples': 716208, 'steps': 14920, 'loss/train': 1.9430683851242065} +07/25/2024 12:35:49 - INFO - __main__ - Step 14922: {'lr': 0.0004888901739406694, 'samples': 716256, 'steps': 14921, 'loss/train': 1.8690886497497559} +07/25/2024 12:35:49 - INFO - __main__ - Step 14923: {'lr': 0.0004888886231107907, 'samples': 716304, 'steps': 14922, 'loss/train': 1.1371115446090698} +07/25/2024 12:35:50 - INFO - __main__ - Step 14924: {'lr': 0.0004888870721751386, 'samples': 716352, 'steps': 14923, 'loss/train': 1.2718265056610107} +07/25/2024 12:35:50 - INFO - __main__ - Step 14925: {'lr': 0.0004888855211337141, 'samples': 716400, 'steps': 14924, 'loss/train': 2.035248041152954} +07/25/2024 12:35:50 - INFO - __main__ - Step 14926: {'lr': 0.0004888839699865177, 'samples': 716448, 'steps': 14925, 'loss/train': 0.12746578454971313} +07/25/2024 12:35:50 - INFO - __main__ - Step 14927: {'lr': 0.0004888824187335502, 'samples': 716496, 'steps': 14926, 'loss/train': 1.4948447942733765} +07/25/2024 12:35:51 - INFO - __main__ - Step 14928: {'lr': 0.0004888808673748124, 'samples': 716544, 'steps': 14927, 'loss/train': 1.669003963470459} +07/25/2024 12:35:51 - INFO - __main__ - Step 14929: {'lr': 0.0004888793159103046, 'samples': 716592, 'steps': 14928, 'loss/train': 1.6212780475616455} +07/25/2024 12:35:51 - INFO - __main__ - Step 14930: {'lr': 0.000488877764340028, 'samples': 716640, 'steps': 14929, 'loss/train': 1.6217745542526245} +07/25/2024 12:35:52 - INFO - __main__ - Step 14931: {'lr': 0.0004888762126639828, 'samples': 716688, 'steps': 14930, 'loss/train': 1.9618566036224365} +07/25/2024 12:35:52 - INFO - __main__ - Step 14932: {'lr': 0.00048887466088217, 'samples': 716736, 'steps': 14931, 'loss/train': 1.9344159364700317} +07/25/2024 12:35:52 - INFO - __main__ - Step 14933: {'lr': 0.0004888731089945903, 'samples': 716784, 'steps': 14932, 'loss/train': 1.8265939950942993} +07/25/2024 12:35:52 - INFO - __main__ - Step 14934: {'lr': 0.000488871557001244, 'samples': 716832, 'steps': 14933, 'loss/train': 2.127591609954834} +07/25/2024 12:35:53 - INFO - __main__ - Step 14935: {'lr': 0.0004888700049021323, 'samples': 716880, 'steps': 14934, 'loss/train': 2.1034276485443115} +07/25/2024 12:35:53 - INFO - __main__ - Step 14936: {'lr': 0.0004888684526972556, 'samples': 716928, 'steps': 14935, 'loss/train': 2.0113866329193115} +07/25/2024 12:35:53 - INFO - __main__ - Step 14937: {'lr': 0.0004888669003866147, 'samples': 716976, 'steps': 14936, 'loss/train': 1.9015591144561768} +07/25/2024 12:35:54 - INFO - __main__ - Step 14938: {'lr': 0.00048886534797021, 'samples': 717024, 'steps': 14937, 'loss/train': 1.7186458110809326} +07/25/2024 12:35:54 - INFO - __main__ - Step 14939: {'lr': 0.0004888637954480427, 'samples': 717072, 'steps': 14938, 'loss/train': 1.97562837600708} +07/25/2024 12:35:54 - INFO - __main__ - Step 14940: {'lr': 0.000488862242820113, 'samples': 717120, 'steps': 14939, 'loss/train': 2.41957688331604} +07/25/2024 12:35:54 - INFO - __main__ - Step 14941: {'lr': 0.000488860690086422, 'samples': 717168, 'steps': 14940, 'loss/train': 2.0551490783691406} +07/25/2024 12:35:55 - INFO - __main__ - Step 14942: {'lr': 0.00048885913724697, 'samples': 717216, 'steps': 14941, 'loss/train': 1.9384756088256836} +07/25/2024 12:35:55 - INFO - __main__ - Step 14943: {'lr': 0.0004888575843017579, 'samples': 717264, 'steps': 14942, 'loss/train': 2.2573487758636475} +07/25/2024 12:35:55 - INFO - __main__ - Step 14944: {'lr': 0.0004888560312507863, 'samples': 717312, 'steps': 14943, 'loss/train': 1.4248597621917725} +07/25/2024 12:35:56 - INFO - __main__ - Step 14945: {'lr': 0.0004888544780940561, 'samples': 717360, 'steps': 14944, 'loss/train': 2.1720263957977295} +07/25/2024 12:35:56 - INFO - __main__ - Step 14946: {'lr': 0.0004888529248315678, 'samples': 717408, 'steps': 14945, 'loss/train': 1.5208414793014526} +07/25/2024 12:35:56 - INFO - __main__ - Step 14947: {'lr': 0.000488851371463322, 'samples': 717456, 'steps': 14946, 'loss/train': 1.5278546810150146} +07/25/2024 12:35:56 - INFO - __main__ - Step 14948: {'lr': 0.0004888498179893196, 'samples': 717504, 'steps': 14947, 'loss/train': 1.7983945608139038} +07/25/2024 12:35:57 - INFO - __main__ - Step 14949: {'lr': 0.0004888482644095613, 'samples': 717552, 'steps': 14948, 'loss/train': 1.6942670345306396} +07/25/2024 12:35:57 - INFO - __main__ - Step 14950: {'lr': 0.0004888467107240474, 'samples': 717600, 'steps': 14949, 'loss/train': 0.1775784194469452} +07/25/2024 12:35:57 - INFO - __main__ - Step 14951: {'lr': 0.0004888451569327791, 'samples': 717648, 'steps': 14950, 'loss/train': 1.701954960823059} +07/25/2024 12:35:58 - INFO - __main__ - Step 14952: {'lr': 0.0004888436030357567, 'samples': 717696, 'steps': 14951, 'loss/train': 2.2308571338653564} +07/25/2024 12:35:58 - INFO - __main__ - Step 14953: {'lr': 0.0004888420490329811, 'samples': 717744, 'steps': 14952, 'loss/train': 2.1246347427368164} +07/25/2024 12:35:58 - INFO - __main__ - Step 14954: {'lr': 0.0004888404949244529, 'samples': 717792, 'steps': 14953, 'loss/train': 1.7836250066757202} +07/25/2024 12:35:58 - INFO - __main__ - Step 14955: {'lr': 0.000488838940710173, 'samples': 717840, 'steps': 14954, 'loss/train': 0.8969606161117554} +07/25/2024 12:35:59 - INFO - __main__ - Step 14956: {'lr': 0.0004888373863901418, 'samples': 717888, 'steps': 14955, 'loss/train': 1.786672592163086} +07/25/2024 12:35:59 - INFO - __main__ - Step 14957: {'lr': 0.00048883583196436, 'samples': 717936, 'steps': 14956, 'loss/train': 2.291158437728882} +07/25/2024 12:35:59 - INFO - __main__ - Step 14958: {'lr': 0.0004888342774328284, 'samples': 717984, 'steps': 14957, 'loss/train': 1.5981088876724243} +07/25/2024 12:36:00 - INFO - __main__ - Step 14959: {'lr': 0.0004888327227955478, 'samples': 718032, 'steps': 14958, 'loss/train': 2.160310983657837} +07/25/2024 12:36:00 - INFO - __main__ - Step 14960: {'lr': 0.0004888311680525187, 'samples': 718080, 'steps': 14959, 'loss/train': 2.251434564590454} +07/25/2024 12:36:00 - INFO - __main__ - Step 14961: {'lr': 0.0004888296132037419, 'samples': 718128, 'steps': 14960, 'loss/train': 1.7408720254898071} +07/25/2024 12:36:00 - INFO - __main__ - Step 14962: {'lr': 0.0004888280582492179, 'samples': 718176, 'steps': 14961, 'loss/train': 1.7250618934631348} +07/25/2024 12:36:01 - INFO - __main__ - Step 14963: {'lr': 0.0004888265031889476, 'samples': 718224, 'steps': 14962, 'loss/train': 2.1524133682250977} +07/25/2024 12:36:01 - INFO - __main__ - Step 14964: {'lr': 0.0004888249480229316, 'samples': 718272, 'steps': 14963, 'loss/train': 3.453383445739746} +07/25/2024 12:36:01 - INFO - __main__ - Step 14965: {'lr': 0.0004888233927511707, 'samples': 718320, 'steps': 14964, 'loss/train': 2.185145854949951} +07/25/2024 12:36:02 - INFO - __main__ - Step 14966: {'lr': 0.0004888218373736654, 'samples': 718368, 'steps': 14965, 'loss/train': 1.924613118171692} +07/25/2024 12:36:02 - INFO - __main__ - Step 14967: {'lr': 0.0004888202818904166, 'samples': 718416, 'steps': 14966, 'loss/train': 1.6286903619766235} +07/25/2024 12:36:02 - INFO - __main__ - Step 14968: {'lr': 0.0004888187263014247, 'samples': 718464, 'steps': 14967, 'loss/train': 1.795397400856018} +07/25/2024 12:36:02 - INFO - __main__ - Step 14969: {'lr': 0.0004888171706066907, 'samples': 718512, 'steps': 14968, 'loss/train': 2.019658088684082} +07/25/2024 12:36:03 - INFO - __main__ - Step 14970: {'lr': 0.000488815614806215, 'samples': 718560, 'steps': 14969, 'loss/train': 1.3682076930999756} +07/25/2024 12:36:03 - INFO - __main__ - Step 14971: {'lr': 0.0004888140588999986, 'samples': 718608, 'steps': 14970, 'loss/train': 2.2088706493377686} +07/25/2024 12:36:03 - INFO - __main__ - Step 14972: {'lr': 0.0004888125028880419, 'samples': 718656, 'steps': 14971, 'loss/train': 1.5266324281692505} +07/25/2024 12:36:04 - INFO - __main__ - Step 14973: {'lr': 0.0004888109467703457, 'samples': 718704, 'steps': 14972, 'loss/train': 1.4770032167434692} +07/25/2024 12:36:04 - INFO - __main__ - Step 14974: {'lr': 0.0004888093905469109, 'samples': 718752, 'steps': 14973, 'loss/train': 0.12746860086917877} +07/25/2024 12:36:04 - INFO - __main__ - Step 14975: {'lr': 0.0004888078342177378, 'samples': 718800, 'steps': 14974, 'loss/train': 1.5950161218643188} +07/25/2024 12:36:04 - INFO - __main__ - Step 14976: {'lr': 0.0004888062777828274, 'samples': 718848, 'steps': 14975, 'loss/train': 1.5008020401000977} +07/25/2024 12:36:05 - INFO - __main__ - Step 14977: {'lr': 0.0004888047212421803, 'samples': 718896, 'steps': 14976, 'loss/train': 0.9375318288803101} +07/25/2024 12:36:05 - INFO - __main__ - Step 14978: {'lr': 0.000488803164595797, 'samples': 718944, 'steps': 14977, 'loss/train': 1.9444342851638794} +07/25/2024 12:36:05 - INFO - __main__ - Step 14979: {'lr': 0.0004888016078436785, 'samples': 718992, 'steps': 14978, 'loss/train': 1.620240330696106} +07/25/2024 12:36:06 - INFO - __main__ - Step 14980: {'lr': 0.0004888000509858252, 'samples': 719040, 'steps': 14979, 'loss/train': 1.9268848896026611} +07/25/2024 12:36:06 - INFO - __main__ - Step 14981: {'lr': 0.000488798494022238, 'samples': 719088, 'steps': 14980, 'loss/train': 1.6129826307296753} +07/25/2024 12:36:06 - INFO - __main__ - Step 14982: {'lr': 0.0004887969369529175, 'samples': 719136, 'steps': 14981, 'loss/train': 0.9050710797309875} +07/25/2024 12:36:06 - INFO - __main__ - Step 14983: {'lr': 0.0004887953797778645, 'samples': 719184, 'steps': 14982, 'loss/train': 1.69768226146698} +07/25/2024 12:36:07 - INFO - __main__ - Step 14984: {'lr': 0.0004887938224970795, 'samples': 719232, 'steps': 14983, 'loss/train': 2.016042709350586} +07/25/2024 12:36:07 - INFO - __main__ - Step 14985: {'lr': 0.0004887922651105633, 'samples': 719280, 'steps': 14984, 'loss/train': 2.0440826416015625} +07/25/2024 12:36:07 - INFO - __main__ - Step 14986: {'lr': 0.0004887907076183166, 'samples': 719328, 'steps': 14985, 'loss/train': 2.305758476257324} +07/25/2024 12:36:08 - INFO - __main__ - Step 14987: {'lr': 0.00048878915002034, 'samples': 719376, 'steps': 14986, 'loss/train': 2.1887288093566895} +07/25/2024 12:36:08 - INFO - __main__ - Step 14988: {'lr': 0.0004887875923166343, 'samples': 719424, 'steps': 14987, 'loss/train': 2.3184945583343506} +07/25/2024 12:36:08 - INFO - __main__ - Step 14989: {'lr': 0.0004887860345072001, 'samples': 719472, 'steps': 14988, 'loss/train': 4.124790668487549} +07/25/2024 12:36:08 - INFO - __main__ - Step 14990: {'lr': 0.0004887844765920382, 'samples': 719520, 'steps': 14989, 'loss/train': 2.244513988494873} +07/25/2024 12:36:09 - INFO - __main__ - Step 14991: {'lr': 0.0004887829185711492, 'samples': 719568, 'steps': 14990, 'loss/train': 1.8684250116348267} +07/25/2024 12:36:09 - INFO - __main__ - Step 14992: {'lr': 0.0004887813604445339, 'samples': 719616, 'steps': 14991, 'loss/train': 2.469360113143921} +07/25/2024 12:36:09 - INFO - __main__ - Step 14993: {'lr': 0.0004887798022121928, 'samples': 719664, 'steps': 14992, 'loss/train': 1.8329384326934814} +07/25/2024 12:36:09 - INFO - __main__ - Step 14994: {'lr': 0.0004887782438741268, 'samples': 719712, 'steps': 14993, 'loss/train': 1.466169834136963} +07/25/2024 12:36:10 - INFO - __main__ - Step 14995: {'lr': 0.0004887766854303363, 'samples': 719760, 'steps': 14994, 'loss/train': 1.535654067993164} +07/25/2024 12:36:10 - INFO - __main__ - Step 14996: {'lr': 0.0004887751268808223, 'samples': 719808, 'steps': 14995, 'loss/train': 1.5997605323791504} +07/25/2024 12:36:10 - INFO - __main__ - Step 14997: {'lr': 0.0004887735682255854, 'samples': 719856, 'steps': 14996, 'loss/train': 1.491394281387329} +07/25/2024 12:36:11 - INFO - __main__ - Step 14998: {'lr': 0.0004887720094646261, 'samples': 719904, 'steps': 14997, 'loss/train': 1.5700575113296509} +07/25/2024 12:36:11 - INFO - __main__ - Step 14999: {'lr': 0.0004887704505979454, 'samples': 719952, 'steps': 14998, 'loss/train': 1.2988357543945312} +07/25/2024 12:36:11 - INFO - __main__ - Step 15000: {'lr': 0.0004887688916255437, 'samples': 720000, 'steps': 14999, 'loss/train': 1.8666151762008667} +07/25/2024 12:36:11 - INFO - __main__ - Evaluating and saving model checkpoint +07/25/2024 12:36:11 - DEBUG - datasets.iterable_dataset - dataloader worker#0, ': Starting to iterate over 1/1 shards. +07/25/2024 12:36:15 - INFO - __main__ - Step 15000: {'loss/eval': 1.937909483909607, 'perplexity': 6.944218635559082}